def test_launch_analysis(mock_analyze, mock_update, mock_get_engine): mock_get_engine.return_value = { 'best_practice_analysis': 'wgs_germline', 'status': 'OPEN' } project = NGIProject('S.One_20_01', 'dir_P123', 'P123', '/some/path') launch_analysis([project]) mock_analyze.assert_called_once()
def process_demultiplexed_flowcells(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None, restart_failed_jobs=False, restart_finished_jobs=False, restart_running_jobs=False, fallback_libprep=None, keep_existing_data=False, no_qc=False, quiet=False, manual=False, config=None, config_file_path=None, generate_bqsr_bam=False): """Sort demultiplexed Illumina flowcells into projects and launch their analysis. :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories. :param list restrict_to_projects: A list of projects; analysis will be restricted to these. Optional. :param list restrict_to_samples: A list of samples; analysis will be restricted to these. Optional. :param bool restart_failed_jobs: Restart jobs marked as "FAILED" in Charon. :param bool restart_finished_jobs: Restart jobs marked as "DONE" in Charon. :param bool restart_running_jobs: Restart jobs marked as running in Charon :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None) :param bool keep_existing_data: Keep existing analysis data when launching new jobs :param bool quiet: Don't send notification emails; added to config :param bool manual: This is being run from a user script; added to config :param dict config: The parsed NGI configuration file; optional. :param str config_file_path: The path to the NGI configuration file; optional. """ if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] projects_to_analyze = organize_projects_from_flowcell( demux_fcid_dirs=demux_fcid_dirs, restrict_to_projects=restrict_to_projects, restrict_to_samples=restrict_to_samples, fallback_libprep=fallback_libprep, quiet=quiet, config=config) for project in projects_to_analyze: if UPPSALA_PROJECT_RE.match(project.project_id): LOG.info( 'Creating Charon records for Uppsala project "{}" if they ' 'are missing'.format(project)) create_charon_entries_from_project(project, sequencing_facility="NGI-U") launch_analysis(projects_to_analyze, restart_failed_jobs, restart_finished_jobs, restart_running_jobs, keep_existing_data=keep_existing_data, no_qc=no_qc, config=config, generate_bqsr_bam=generate_bqsr_bam)
def process_demultiplexed_flowcells(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None, restart_failed_jobs=False, restart_finished_jobs=False, restart_running_jobs=False, config=None, config_file_path=None, quiet=False, manual=False): """Sort demultiplexed Illumina flowcells into projects and launch their analysis. :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories. :param list restrict_to_projects: A list of projects; analysis will be restricted to these. Optional. :param list restrict_to_samples: A list of samples; analysis will be restricted to these. Optional. :param bool restart_failed_jobs: Restart jobs marked as "FAILED" in Charon. :param bool restart_finished_jobs: Restart jobs marked as "DONE" in Charon. :param dict config: The parsed NGI configuration file; optional. :param str config_file_path: The path to the NGI configuration file; optional. """ if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] demux_fcid_dirs_set = set(demux_fcid_dirs) # Sort/copy each raw demux FC into project/sample/fcid format -- "analysis-ready" projects_to_analyze = dict() for demux_fcid_dir in demux_fcid_dirs_set: demux_fcid_dir = os.path.abspath(demux_fcid_dir) # These will be a bunch of Project objects each containing Samples, FCIDs, lists of fastq files projects_to_analyze = setup_analysis_directory_structure(demux_fcid_dir, projects_to_analyze, restrict_to_projects, restrict_to_samples, create_files=True, config=config, quiet=quiet) if not projects_to_analyze: if restrict_to_projects: error_message = ("No projects found to process: the specified flowcells " "({fcid_dirs}) do not contain the specified project(s) " "({restrict_to_projects}) or there was an error " "gathering required information.").format( fcid_dirs=",".join(demux_fcid_dirs_set), restrict_to_projects=",".join(restrict_to_projects)) else: error_message = ("No projects found to process in flowcells {} " "or there was an error gathering required " "information.".format(",".join(demux_fcid_dirs_set))) raise RuntimeError(error_message) else: projects_to_analyze = projects_to_analyze.values() for project in projects_to_analyze: if UPPSALA_PROJECT_RE.match(project.project_id): LOG.info('Creating Charon records for Uppsala project "{}" if they ' 'are missing'.format(project)) create_charon_entries_from_project(project, sequencing_facility="NGI-U") launch_analysis(projects_to_analyze, restart_failed_jobs, restart_finished_jobs, restart_running_jobs, config=config)
def test_launch_analysis(self, process_tracking_mock, charon_session_classes_mock, charon_session_launchers_mock): charon_classes_mock = charon_session_classes_mock.return_value charon_classes_mock.project_get.return_value = { "best_practice_analysis": list(self.config["analysis"]["best_practice_analysis"].keys())[0] } charon_launchers_mock = charon_session_launchers_mock.return_value charon_launchers_mock.project_get.return_value = {"status": "OPEN"} with mock.patch("ngi_pipeline.engines.sarek.analyze") as analyze_mock: project_obj = self.projects[0] launch_analysis([project_obj], config=self.config, no_qc=True) process_tracking_mock.assert_called_once() analyze_mock.assert_called_once() called_args = analyze_mock.call_args[0] self.assertEqual(project_obj, called_args[0].project)
def process_demultiplexed_flowcells(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None, restart_failed_jobs=False, restart_finished_jobs=False, restart_running_jobs=False, fallback_libprep=None, keep_existing_data=False, no_qc=False, quiet=False, manual=False, config=None, config_file_path=None, generate_bqsr_bam=False): """Sort demultiplexed Illumina flowcells into projects and launch their analysis. :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories. :param list restrict_to_projects: A list of projects; analysis will be restricted to these. Optional. :param list restrict_to_samples: A list of samples; analysis will be restricted to these. Optional. :param bool restart_failed_jobs: Restart jobs marked as "FAILED" in Charon. :param bool restart_finished_jobs: Restart jobs marked as "DONE" in Charon. :param bool restart_running_jobs: Restart jobs marked as running in Charon :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None) :param bool keep_existing_data: Keep existing analysis data when launching new jobs :param bool quiet: Don't send notification emails; added to config :param bool manual: This is being run from a user script; added to config :param dict config: The parsed NGI configuration file; optional. :param str config_file_path: The path to the NGI configuration file; optional. """ if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] projects_to_analyze = organize_projects_from_flowcell(demux_fcid_dirs=demux_fcid_dirs, restrict_to_projects=restrict_to_projects, restrict_to_samples=restrict_to_samples, fallback_libprep=fallback_libprep, quiet=quiet, config=config) for project in projects_to_analyze: if UPPSALA_PROJECT_RE.match(project.project_id): LOG.info('Creating Charon records for Uppsala project "{}" if they ' 'are missing'.format(project)) create_charon_entries_from_project(project, sequencing_facility="NGI-U") launch_analysis(projects_to_analyze, restart_failed_jobs, restart_finished_jobs, restart_running_jobs, keep_existing_data=keep_existing_data, no_qc=no_qc, config=config, generate_bqsr_bam=generate_bqsr_bam)
## Analyze Project if 'analyze_project_dirs' in args: for analyze_project_dir in args.analyze_project_dirs: try: project_dir = locate_project(analyze_project_dir) except ValueError as e: LOG.error(e) continue project_obj = \ recreate_project_from_filesystem(project_dir=project_dir, restrict_to_samples=args.restrict_to_samples) launchers.launch_analysis( [project_obj], restart_failed_jobs=args.restart_failed_jobs, restart_finished_jobs=args.restart_finished_jobs, restart_running_jobs=args.restart_running_jobs, no_qc=args.no_qc, quiet=args.quiet, manual=True) ## QC Project elif 'qc_project_dirs' in args: for qc_project_dir in args.qc_project_dirs: project = recreate_project_from_filesystem( project_dir=qc_project_dir, restrict_to_samples=args.restrict_to_samples) if not project.samples: LOG.info('No samples found for project {} (path {})'.format( project.project_id, qc_project_dir)) for sample in project: qc_ngi.launchers.analyze(project, sample, quiet=args.quiet)
def process_demultiplexed_flowcells(demux_fcid_dirs, restrict_to_projects=None, restrict_to_samples=None, restart_failed_jobs=False, restart_finished_jobs=False, restart_running_jobs=False, config=None, config_file_path=None, quiet=False, manual=False): """Sort demultiplexed Illumina flowcells into projects and launch their analysis. :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories. :param list restrict_to_projects: A list of projects; analysis will be restricted to these. Optional. :param list restrict_to_samples: A list of samples; analysis will be restricted to these. Optional. :param bool restart_failed_jobs: Restart jobs marked as "FAILED" in Charon. :param bool restart_finished_jobs: Restart jobs marked as "DONE" in Charon. :param dict config: The parsed NGI configuration file; optional. :param str config_file_path: The path to the NGI configuration file; optional. """ if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] demux_fcid_dirs_set = set(demux_fcid_dirs) # Sort/copy each raw demux FC into project/sample/fcid format -- "analysis-ready" projects_to_analyze = dict() for demux_fcid_dir in demux_fcid_dirs_set: demux_fcid_dir = os.path.abspath(demux_fcid_dir) # These will be a bunch of Project objects each containing Samples, FCIDs, lists of fastq files projects_to_analyze = setup_analysis_directory_structure( demux_fcid_dir, projects_to_analyze, restrict_to_projects, restrict_to_samples, create_files=True, config=config, quiet=quiet) if not projects_to_analyze: if restrict_to_projects: error_message = ( "No projects found to process: the specified flowcells " "({fcid_dirs}) do not contain the specified project(s) " "({restrict_to_projects}) or there was an error " "gathering required information.").format( fcid_dirs=",".join(demux_fcid_dirs_set), restrict_to_projects=",".join(restrict_to_projects)) else: error_message = ("No projects found to process in flowcells {} " "or there was an error gathering required " "information.".format( ",".join(demux_fcid_dirs_set))) raise RuntimeError(error_message) else: projects_to_analyze = projects_to_analyze.values() for project in projects_to_analyze: if UPPSALA_PROJECT_RE.match(project.project_id): LOG.info( 'Creating Charon records for Uppsala project "{}" if they ' 'are missing'.format(project)) create_charon_entries_from_project(project, sequencing_facility="NGI-U") launch_analysis(projects_to_analyze, restart_failed_jobs, restart_finished_jobs, restart_running_jobs, config=config)
elif "analyze_project_dirs" in args: for analyze_project_dir in args.analyze_project_dirs: try: project_dir = locate_project(analyze_project_dir) except ValueError as e: LOG.error(e) continue project_obj = recreate_project_from_filesystem( project_dir=project_dir, restrict_to_samples=args.restrict_to_samples ) launchers.launch_analysis( [project_obj], restart_failed_jobs=args.restart_failed_jobs, restart_finished_jobs=args.restart_finished_jobs, restart_running_jobs=args.restart_running_jobs, keep_existing_data=args.keep_existing_data, no_qc=args.no_qc, quiet=args.quiet, manual=True, generate_bqsr_bam=args.generate_bqsr_bam, ) elif "delete_proj_analysis" in args: from ngi_pipeline.conductor.launchers import get_engine_for_bp delete_proj_analysis_list = list(set(args.delete_proj_analysis)) for delete_proj_analysis in delete_proj_analysis_list: if args.restrict_to_samples: try: project_dir = locate_project(delete_proj_analysis) except ValueError as e:
args.restrict_to_samples, args.restart_failed_jobs, args.restart_finished_jobs, args.restart_running_jobs, quiet=args.quiet, manual=True) ### TODO change to work with multiple projects elif 'analyze_project_dir' in args: project = recreate_project_from_filesystem(project_dir=args.analyze_project_dir, restrict_to_samples=args.restrict_to_samples) if project and os.path.split(project.base_path)[1] == "DATA": project.base_path = os.path.split(project.base_path)[0] launchers.launch_analysis([project], restart_failed_jobs=args.restart_failed_jobs, restart_finished_jobs=args.restart_finished_jobs, restart_running_jobs=args.restart_running_jobs, quiet=args.quiet, manual=True) elif 'organize_fc_dirs' in args: if args.force_update: args.force_update = validate_force_update() if args.delete_existing: args.delete_existing = validate_delete_existing() if not args.restrict_to_projects: args.restrict_to_projects = [] if not args.restrict_to_samples: args.restrict_to_samples = [] organize_fc_dirs_set = set(args.organize_fc_dirs) projects_to_analyze = dict() ## NOTE this bit of code not currently in use but could use later #if args.already_parsed: # Starting from Project/Sample/Libprep/Seqrun tree format # for organize_fc_dir in organize_fc_dirs_set: # p = recreate_project_from_filesystem(organize_fc_dir, # force_create_project=args.force_create_project)
## Analyze Project elif 'analyze_project_dirs' in args: for analyze_project_dir in args.analyze_project_dirs: try: project_dir = locate_project(analyze_project_dir) except ValueError as e: LOG.error(e) continue project_obj = \ recreate_project_from_filesystem(project_dir=project_dir, restrict_to_samples=args.restrict_to_samples) launchers.launch_analysis([project_obj], restart_failed_jobs=args.restart_failed_jobs, restart_finished_jobs=args.restart_finished_jobs, restart_running_jobs=args.restart_running_jobs, keep_existing_data=args.keep_existing_data, no_qc=args.no_qc, quiet=args.quiet, manual=True, generate_bqsr_bam=args.generate_bqsr_bam) elif 'delete_proj_analysis' in args: from ngi_pipeline.conductor.launchers import get_engine_for_bp delete_proj_analysis_list = list(set(args.delete_proj_analysis)) for delete_proj_analysis in delete_proj_analysis_list: if args.restrict_to_samples: try: project_dir = locate_project(delete_proj_analysis) except ValueError as e: LOG.error(e) continue