Beispiel #1
0
def test_launch_analysis(mock_analyze, mock_update, mock_get_engine):
    mock_get_engine.return_value = {
        'best_practice_analysis': 'wgs_germline',
        'status': 'OPEN'
    }
    project = NGIProject('S.One_20_01', 'dir_P123', 'P123', '/some/path')
    launch_analysis([project])
    mock_analyze.assert_called_once()
Beispiel #2
0
def process_demultiplexed_flowcells(demux_fcid_dirs,
                                    restrict_to_projects=None,
                                    restrict_to_samples=None,
                                    restart_failed_jobs=False,
                                    restart_finished_jobs=False,
                                    restart_running_jobs=False,
                                    fallback_libprep=None,
                                    keep_existing_data=False,
                                    no_qc=False,
                                    quiet=False,
                                    manual=False,
                                    config=None,
                                    config_file_path=None,
                                    generate_bqsr_bam=False):
    """Sort demultiplexed Illumina flowcells into projects and launch their analysis.

    :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories.
    :param list restrict_to_projects: A list of projects; analysis will be
                                      restricted to these. Optional.
    :param list restrict_to_samples: A list of samples; analysis will be
                                     restricted to these. Optional.
    :param bool restart_failed_jobs: Restart jobs marked as "FAILED" in Charon.
    :param bool restart_finished_jobs: Restart jobs marked as "DONE" in Charon.
    :param bool restart_running_jobs: Restart jobs marked as running in Charon
    :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None)
    :param bool keep_existing_data: Keep existing analysis data when launching new jobs
    :param bool quiet: Don't send notification emails; added to config
    :param bool manual: This is being run from a user script; added to config
    :param dict config: The parsed NGI configuration file; optional.
    :param str config_file_path: The path to the NGI configuration file; optional.
    """
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    projects_to_analyze = organize_projects_from_flowcell(
        demux_fcid_dirs=demux_fcid_dirs,
        restrict_to_projects=restrict_to_projects,
        restrict_to_samples=restrict_to_samples,
        fallback_libprep=fallback_libprep,
        quiet=quiet,
        config=config)
    for project in projects_to_analyze:
        if UPPSALA_PROJECT_RE.match(project.project_id):
            LOG.info(
                'Creating Charon records for Uppsala project "{}" if they '
                'are missing'.format(project))
            create_charon_entries_from_project(project,
                                               sequencing_facility="NGI-U")
    launch_analysis(projects_to_analyze,
                    restart_failed_jobs,
                    restart_finished_jobs,
                    restart_running_jobs,
                    keep_existing_data=keep_existing_data,
                    no_qc=no_qc,
                    config=config,
                    generate_bqsr_bam=generate_bqsr_bam)
Beispiel #3
0
def process_demultiplexed_flowcells(demux_fcid_dirs, restrict_to_projects=None,
                                    restrict_to_samples=None, restart_failed_jobs=False,
                                    restart_finished_jobs=False, restart_running_jobs=False,
                                    config=None, config_file_path=None, quiet=False, manual=False):
    """Sort demultiplexed Illumina flowcells into projects and launch their analysis.

    :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories.
    :param list restrict_to_projects: A list of projects; analysis will be
                                      restricted to these. Optional.
    :param list restrict_to_samples: A list of samples; analysis will be
                                     restricted to these. Optional.
    :param bool restart_failed_jobs: Restart jobs marked as "FAILED" in Charon.
    :param bool restart_finished_jobs: Restart jobs marked as "DONE" in Charon.
    :param dict config: The parsed NGI configuration file; optional.
    :param str config_file_path: The path to the NGI configuration file; optional.
    """
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    demux_fcid_dirs_set = set(demux_fcid_dirs)
    # Sort/copy each raw demux FC into project/sample/fcid format -- "analysis-ready"
    projects_to_analyze = dict()
    for demux_fcid_dir in demux_fcid_dirs_set:
        demux_fcid_dir = os.path.abspath(demux_fcid_dir)
        # These will be a bunch of Project objects each containing Samples, FCIDs, lists of fastq files
        projects_to_analyze = setup_analysis_directory_structure(demux_fcid_dir,
                                                                 projects_to_analyze,
                                                                 restrict_to_projects,
                                                                 restrict_to_samples,
                                                                 create_files=True,
                                                                 config=config,
                                                                 quiet=quiet)
    if not projects_to_analyze:
        if restrict_to_projects:
            error_message = ("No projects found to process: the specified flowcells "
                             "({fcid_dirs}) do not contain the specified project(s) "
                             "({restrict_to_projects}) or there was an error "
                             "gathering required information.").format(
                                    fcid_dirs=",".join(demux_fcid_dirs_set),
                                    restrict_to_projects=",".join(restrict_to_projects))
        else:
            error_message = ("No projects found to process in flowcells {} "
                             "or there was an error gathering required "
                             "information.".format(",".join(demux_fcid_dirs_set)))
        raise RuntimeError(error_message)
    else:
        projects_to_analyze = projects_to_analyze.values()
    for project in projects_to_analyze:
        if UPPSALA_PROJECT_RE.match(project.project_id):
            LOG.info('Creating Charon records for Uppsala project "{}" if they '
                     'are missing'.format(project))
            create_charon_entries_from_project(project, sequencing_facility="NGI-U")
    launch_analysis(projects_to_analyze, restart_failed_jobs, restart_finished_jobs,
                    restart_running_jobs, config=config)
Beispiel #4
0
    def test_launch_analysis(self, process_tracking_mock,
                             charon_session_classes_mock,
                             charon_session_launchers_mock):
        charon_classes_mock = charon_session_classes_mock.return_value
        charon_classes_mock.project_get.return_value = {
            "best_practice_analysis":
            list(self.config["analysis"]["best_practice_analysis"].keys())[0]
        }

        charon_launchers_mock = charon_session_launchers_mock.return_value
        charon_launchers_mock.project_get.return_value = {"status": "OPEN"}

        with mock.patch("ngi_pipeline.engines.sarek.analyze") as analyze_mock:
            project_obj = self.projects[0]
            launch_analysis([project_obj], config=self.config, no_qc=True)
            process_tracking_mock.assert_called_once()
            analyze_mock.assert_called_once()
            called_args = analyze_mock.call_args[0]
            self.assertEqual(project_obj, called_args[0].project)
Beispiel #5
0
def process_demultiplexed_flowcells(demux_fcid_dirs, restrict_to_projects=None,
                                    restrict_to_samples=None, restart_failed_jobs=False,
                                    restart_finished_jobs=False, restart_running_jobs=False,
                                    fallback_libprep=None, keep_existing_data=False, no_qc=False,
                                    quiet=False, manual=False, config=None, config_file_path=None,
                                    generate_bqsr_bam=False):
    """Sort demultiplexed Illumina flowcells into projects and launch their analysis.

    :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories.
    :param list restrict_to_projects: A list of projects; analysis will be
                                      restricted to these. Optional.
    :param list restrict_to_samples: A list of samples; analysis will be
                                     restricted to these. Optional.
    :param bool restart_failed_jobs: Restart jobs marked as "FAILED" in Charon.
    :param bool restart_finished_jobs: Restart jobs marked as "DONE" in Charon.
    :param bool restart_running_jobs: Restart jobs marked as running in Charon
    :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None)
    :param bool keep_existing_data: Keep existing analysis data when launching new jobs
    :param bool quiet: Don't send notification emails; added to config
    :param bool manual: This is being run from a user script; added to config
    :param dict config: The parsed NGI configuration file; optional.
    :param str config_file_path: The path to the NGI configuration file; optional.
    """
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    projects_to_analyze = organize_projects_from_flowcell(demux_fcid_dirs=demux_fcid_dirs,
                                                          restrict_to_projects=restrict_to_projects,
                                                          restrict_to_samples=restrict_to_samples,
                                                          fallback_libprep=fallback_libprep,
                                                          quiet=quiet, config=config)
    for project in projects_to_analyze:
        if UPPSALA_PROJECT_RE.match(project.project_id):
            LOG.info('Creating Charon records for Uppsala project "{}" if they '
                     'are missing'.format(project))
            create_charon_entries_from_project(project, sequencing_facility="NGI-U")
    launch_analysis(projects_to_analyze, restart_failed_jobs, restart_finished_jobs,
                    restart_running_jobs, keep_existing_data=keep_existing_data,
                    no_qc=no_qc, config=config, generate_bqsr_bam=generate_bqsr_bam)
Beispiel #6
0
    ## Analyze Project
    if 'analyze_project_dirs' in args:
        for analyze_project_dir in args.analyze_project_dirs:
            try:
                project_dir = locate_project(analyze_project_dir)
            except ValueError as e:
                LOG.error(e)
                continue
            project_obj = \
                    recreate_project_from_filesystem(project_dir=project_dir,
                                                     restrict_to_samples=args.restrict_to_samples)
            launchers.launch_analysis(
                [project_obj],
                restart_failed_jobs=args.restart_failed_jobs,
                restart_finished_jobs=args.restart_finished_jobs,
                restart_running_jobs=args.restart_running_jobs,
                no_qc=args.no_qc,
                quiet=args.quiet,
                manual=True)

    ## QC Project
    elif 'qc_project_dirs' in args:
        for qc_project_dir in args.qc_project_dirs:
            project = recreate_project_from_filesystem(
                project_dir=qc_project_dir,
                restrict_to_samples=args.restrict_to_samples)
            if not project.samples:
                LOG.info('No samples found for project {} (path {})'.format(
                    project.project_id, qc_project_dir))
            for sample in project:
                qc_ngi.launchers.analyze(project, sample, quiet=args.quiet)
Beispiel #7
0
def process_demultiplexed_flowcells(demux_fcid_dirs,
                                    restrict_to_projects=None,
                                    restrict_to_samples=None,
                                    restart_failed_jobs=False,
                                    restart_finished_jobs=False,
                                    restart_running_jobs=False,
                                    config=None,
                                    config_file_path=None,
                                    quiet=False,
                                    manual=False):
    """Sort demultiplexed Illumina flowcells into projects and launch their analysis.

    :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories.
    :param list restrict_to_projects: A list of projects; analysis will be
                                      restricted to these. Optional.
    :param list restrict_to_samples: A list of samples; analysis will be
                                     restricted to these. Optional.
    :param bool restart_failed_jobs: Restart jobs marked as "FAILED" in Charon.
    :param bool restart_finished_jobs: Restart jobs marked as "DONE" in Charon.
    :param dict config: The parsed NGI configuration file; optional.
    :param str config_file_path: The path to the NGI configuration file; optional.
    """
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    demux_fcid_dirs_set = set(demux_fcid_dirs)
    # Sort/copy each raw demux FC into project/sample/fcid format -- "analysis-ready"
    projects_to_analyze = dict()
    for demux_fcid_dir in demux_fcid_dirs_set:
        demux_fcid_dir = os.path.abspath(demux_fcid_dir)
        # These will be a bunch of Project objects each containing Samples, FCIDs, lists of fastq files
        projects_to_analyze = setup_analysis_directory_structure(
            demux_fcid_dir,
            projects_to_analyze,
            restrict_to_projects,
            restrict_to_samples,
            create_files=True,
            config=config,
            quiet=quiet)
    if not projects_to_analyze:
        if restrict_to_projects:
            error_message = (
                "No projects found to process: the specified flowcells "
                "({fcid_dirs}) do not contain the specified project(s) "
                "({restrict_to_projects}) or there was an error "
                "gathering required information.").format(
                    fcid_dirs=",".join(demux_fcid_dirs_set),
                    restrict_to_projects=",".join(restrict_to_projects))
        else:
            error_message = ("No projects found to process in flowcells {} "
                             "or there was an error gathering required "
                             "information.".format(
                                 ",".join(demux_fcid_dirs_set)))
        raise RuntimeError(error_message)
    else:
        projects_to_analyze = projects_to_analyze.values()
    for project in projects_to_analyze:
        if UPPSALA_PROJECT_RE.match(project.project_id):
            LOG.info(
                'Creating Charon records for Uppsala project "{}" if they '
                'are missing'.format(project))
            create_charon_entries_from_project(project,
                                               sequencing_facility="NGI-U")
    launch_analysis(projects_to_analyze,
                    restart_failed_jobs,
                    restart_finished_jobs,
                    restart_running_jobs,
                    config=config)
    elif "analyze_project_dirs" in args:
        for analyze_project_dir in args.analyze_project_dirs:
            try:
                project_dir = locate_project(analyze_project_dir)
            except ValueError as e:
                LOG.error(e)
                continue
            project_obj = recreate_project_from_filesystem(
                project_dir=project_dir, restrict_to_samples=args.restrict_to_samples
            )
            launchers.launch_analysis(
                [project_obj],
                restart_failed_jobs=args.restart_failed_jobs,
                restart_finished_jobs=args.restart_finished_jobs,
                restart_running_jobs=args.restart_running_jobs,
                keep_existing_data=args.keep_existing_data,
                no_qc=args.no_qc,
                quiet=args.quiet,
                manual=True,
                generate_bqsr_bam=args.generate_bqsr_bam,
            )

    elif "delete_proj_analysis" in args:
        from ngi_pipeline.conductor.launchers import get_engine_for_bp

        delete_proj_analysis_list = list(set(args.delete_proj_analysis))
        for delete_proj_analysis in delete_proj_analysis_list:
            if args.restrict_to_samples:
                try:
                    project_dir = locate_project(delete_proj_analysis)
                except ValueError as e:
                                                args.restrict_to_samples,
                                                args.restart_failed_jobs,
                                                args.restart_finished_jobs,
                                                args.restart_running_jobs,
                                                quiet=args.quiet,
                                                manual=True)

    ### TODO change to work with multiple projects
    elif 'analyze_project_dir' in args:
        project = recreate_project_from_filesystem(project_dir=args.analyze_project_dir,
                                                   restrict_to_samples=args.restrict_to_samples)
        if project and os.path.split(project.base_path)[1] == "DATA":
            project.base_path = os.path.split(project.base_path)[0]
        launchers.launch_analysis([project],
                                  restart_failed_jobs=args.restart_failed_jobs,
                                  restart_finished_jobs=args.restart_finished_jobs,
                                  restart_running_jobs=args.restart_running_jobs,
                                  quiet=args.quiet,
                                  manual=True)

    elif 'organize_fc_dirs' in args:
        if args.force_update: args.force_update = validate_force_update()
        if args.delete_existing: args.delete_existing = validate_delete_existing()
        if not args.restrict_to_projects: args.restrict_to_projects = []
        if not args.restrict_to_samples: args.restrict_to_samples = []
        organize_fc_dirs_set = set(args.organize_fc_dirs)
        projects_to_analyze = dict()
        ## NOTE this bit of code not currently in use but could use later
        #if args.already_parsed: # Starting from Project/Sample/Libprep/Seqrun tree format
        #    for organize_fc_dir in organize_fc_dirs_set:
        #        p = recreate_project_from_filesystem(organize_fc_dir,
        #                                             force_create_project=args.force_create_project)
Beispiel #10
0
    ## Analyze Project
    elif 'analyze_project_dirs' in args:
        for analyze_project_dir in args.analyze_project_dirs:
            try:
                project_dir = locate_project(analyze_project_dir)
            except ValueError as e:
                LOG.error(e)
                continue
            project_obj = \
                    recreate_project_from_filesystem(project_dir=project_dir,
                                                     restrict_to_samples=args.restrict_to_samples)
            launchers.launch_analysis([project_obj],
                                      restart_failed_jobs=args.restart_failed_jobs,
                                      restart_finished_jobs=args.restart_finished_jobs,
                                      restart_running_jobs=args.restart_running_jobs,
                                      keep_existing_data=args.keep_existing_data,
                                      no_qc=args.no_qc,
                                      quiet=args.quiet,
                                      manual=True,
                                      generate_bqsr_bam=args.generate_bqsr_bam)

    elif 'delete_proj_analysis' in args:
        from ngi_pipeline.conductor.launchers import get_engine_for_bp
        delete_proj_analysis_list = list(set(args.delete_proj_analysis))
        for delete_proj_analysis in delete_proj_analysis_list:
            if args.restrict_to_samples:
                try:
                    project_dir = locate_project(delete_proj_analysis)
                except ValueError as e:
                    LOG.error(e)
                    continue