def analyze(analysis_object, config=None, config_file_path=None):

    charon_session = CharonSession()
    charon_pj=charon_session.project_get(analysis_object.project.project_id)
    reference_genome=charon_pj.get('reference')
    if charon_pj.get("sequencing_facility") == "NGI-S":
        analysis_object.sequencing_facility="sthlm"
    elif charon_pj.get("sequencing_facility") == "NGI-U":
        analysis_object.sequencing_facility="upps"
    else:
        LOG.error("charon project not registered with stockholm or uppsala. Which config file should we use for the RNA pipeline ?")
        raise RuntimeError
    fastq_files=[]
    if reference_genome and reference_genome != 'other':
        for sample in analysis_object.project:
            try:
                charon_reported_status = charon_session.sample_get(analysis_object.project.project_id,
                                                                   sample).get('analysis_status')
                # Check Charon to ensure this hasn't already been processed
                do_analyze=handle_sample_status(analysis_object, sample, charon_reported_status)
                if not do_analyze :
                    continue
            except CharonError as e:
                LOG.error(e)

            for libprep in sample:
                charon_lp_status=charon_session.libprep_get(analysis_object.project.project_id, sample.name, libprep.name).get('qc')
                do_analyze=handle_libprep_status(analysis_object, libprep, charon_lp_status)
                if not do_analyze :
                    continue
                else:
                    for seqrun in libprep:
                        charon_sr_status=charon_session.seqrun_get(analysis_object.project.project_id, sample.name, libprep.name, seqrun.name).get('alignment_status')
                        do_analyze=handle_seqrun_status(analysis_object, seqrun, charon_sr_status)
                        if not do_analyze :
                            continue
                        else:
                            seqrun.being_analyzed=True
                            sample.being_analyzed = sample.being_analyzed or True
                            # filter out index files from analysis
                            for fastq_file in filter(lambda f: not is_index_file(f), seqrun.fastq_files):
                                fastq_path=os.path.join(analysis_object.project.base_path, "DATA", analysis_object.project.project_id, sample.name, libprep.name, seqrun.name, fastq_file)
                                fastq_files.append(fastq_path)
        
        if not fastq_files:
            LOG.error("No fastq files obtained for the analysis fo project {}, please check the Charon status.".format(analysis_object.project.name))
        else :
            if analysis_object.restart_running_jobs:
                stop_ongoing_analysis(analysis_object)
            fastq_dir=preprocess_analysis(analysis_object, fastq_files)
            sbatch_path=write_batch_job(analysis_object, reference_genome, fastq_dir)
            job_id=start_analysis(sbatch_path)
            analysis_path=os.path.join(analysis_object.project.base_path, "ANALYSIS", analysis_object.project.project_id, 'rna_ngi')
            record_project_job(analysis_object.project, job_id, analysis_path)
Beispiel #2
0
def analyze(analysis_object,
            level='sample',
            config=None,
            config_file_path=None):
    """Analyze data at the sample level.

    :param NGIAnalysis analysis_object: holds all the parameters for the analysis

    :raises ValueError: If exec_mode is an unsupported value
    """
    charon_session = CharonSession()
    for sample in analysis_object.project:
        try:
            charon_reported_status = charon_session.sample_get(
                analysis_object.project.project_id,
                sample).get('analysis_status')
            # Check Charon to ensure this hasn't already been processed
            do_analyze = handle_sample_status(analysis_object, sample,
                                              charon_reported_status)
            if not do_analyze:
                continue
        except CharonError as e:
            LOG.error(e)
            continue
        if level == "sample":
            status_field = "alignment_status"
        elif level == "genotype":
            status_field = "genotype_status"
        else:
            LOG.warn('Unknown workflow level: "{}"'.format(level))
            status_field = "alignment_status"  # Or should we abort?
        try:
            check_for_preexisting_sample_runs(
                analysis_object.project, sample,
                analysis_object.restart_running_jobs,
                analysis_object.restart_finished_jobs, status_field)
        except RuntimeError as e:
            raise RuntimeError(
                'Aborting processing of project/sample "{}/{}": '
                '{}'.format(analysis_object.project, sample, e))
        if analysis_object.exec_mode.lower() not in ("sbatch", "local"):
            raise ValueError(
                '"exec_mode" param must be one of "sbatch" or "local" '
                'value was "{}"'.format(analysis_object.exec_mode))
        if analysis_object.exec_mode == "local":
            modules_to_load = analysis_object.config.get("piper", {}).get(
                "load_modules", [])
            load_modules(modules_to_load)
        for workflow_subtask in workflows.get_subtasks_for_level(level=level):
            if level == "genotype":
                genotype_status = None  # Some records in Charon lack this field, I'm guessing
                try:
                    charon_session = CharonSession()
                    genotype_status = charon_session.sample_get(
                        projectid=analysis_object.project.project_id,
                        sampleid=sample.name).get("genotype_status")
                except CharonError as e:
                    LOG.error(
                        'Couldn\'t determine genotyping status for project/'
                        'sample "{}/{}"; skipping analysis.'.format(
                            analysis_object.project, sample))
                    continue
                if find_previous_genotype_analyses(
                        analysis_object.project,
                        sample) or genotype_status == "DONE":
                    if not analysis_object.restart_finished_jobs:
                        LOG.info(
                            'Project/sample "{}/{}" has completed genotype '
                            'analysis previously; skipping (use flag to force '
                            'analysis)'.format(analysis_object.project,
                                               sample))
                        continue
            if analysis_object.restart_running_jobs:
                # Kill currently-running jobs if they exist
                kill_running_sample_analysis(
                    workflow_subtask=workflow_subtask,
                    project_id=analysis_object.project.project_id,
                    sample_id=sample.name)
            # This checks the local jobs database
            if not is_sample_analysis_running_local(
                    workflow_subtask=workflow_subtask,
                    project_id=analysis_object.project.project_id,
                    sample_id=sample.name):
                LOG.info('Launching "{}" analysis for sample "{}" in project '
                         '"{}"'.format(workflow_subtask, sample,
                                       analysis_object.project))
                try:
                    log_file_path = create_log_file_path(
                        workflow_subtask=workflow_subtask,
                        project_base_path=analysis_object.project.base_path,
                        project_name=analysis_object.project.dirname,
                        project_id=analysis_object.project.project_id,
                        sample_id=sample.name)
                    rotate_file(log_file_path)
                    exit_code_path = create_exit_code_file_path(
                        workflow_subtask=workflow_subtask,
                        project_base_path=analysis_object.project.base_path,
                        project_name=analysis_object.project.dirname,
                        project_id=analysis_object.project.project_id,
                        sample_id=sample.name)
                    if level == "sample":
                        if not analysis_object.keep_existing_data:
                            remove_previous_sample_analyses(
                                analysis_object.project, sample)
                            default_files_to_copy = None
                    elif level == "genotype":
                        if not analysis_object.keep_existing_data:
                            remove_previous_genotype_analyses(
                                analysis_object.project)
                            default_files_to_copy = None

                    # Update the project to keep only valid fastq files for setup.xml creation
                    if level == "genotype":
                        updated_project, default_files_to_copy = \
                                collect_files_for_sample_analysis(analysis_object.project,
                                                                  sample,
                                                                  restart_finished_jobs=True,
                                                                  status_field="genotype_status")
                    else:
                        updated_project, default_files_to_copy = \
                                collect_files_for_sample_analysis(analysis_object.project,
                                                                  sample,
                                                                  analysis_object.restart_finished_jobs,
                                                                  status_field="alignment_status")
                    setup_xml_cl, setup_xml_path = build_setup_xml(
                        project=updated_project,
                        sample=sample,
                        workflow=workflow_subtask,
                        local_scratch_mode=(
                            analysis_object.exec_mode == "sbatch"),
                        config=analysis_object.config)
                    piper_cl = build_piper_cl(
                        project=analysis_object.project,
                        workflow_name=workflow_subtask,
                        setup_xml_path=setup_xml_path,
                        exit_code_path=exit_code_path,
                        config=analysis_object.config,
                        exec_mode=analysis_object.exec_mode,
                        generate_bqsr_bam=analysis_object.generate_bqsr_bam)
                    if analysis_object.exec_mode == "sbatch":
                        process_id = None
                        slurm_job_id = sbatch_piper_sample(
                            [setup_xml_cl, piper_cl],
                            workflow_subtask,
                            analysis_object.project,
                            sample,
                            restart_finished_jobs=analysis_object.
                            restart_finished_jobs,
                            files_to_copy=default_files_to_copy)
                        for x in xrange(10):
                            # Time delay to let sbatch get its act together
                            # (takes a few seconds to be visible with sacct)
                            try:
                                get_slurm_job_status(slurm_job_id)
                                break
                            except ValueError:
                                time.sleep(2)
                        else:
                            LOG.error('sbatch file for sample {}/{} did not '
                                      'queue properly! Job ID {} cannot be '
                                      'found.'.format(analysis_object.project,
                                                      sample, slurm_job_id))
                    else:  # "local"
                        raise NotImplementedError(
                            'Local execution not currently implemented. '
                            'I\'m sure Denis can help you with this.')
                        #slurm_job_id = None
                        #launch_piper_job(setup_xml_cl, project)
                        #process_handle = launch_piper_job(piper_cl, project)
                        #process_id = process_handle.pid
                    try:
                        record_process_sample(
                            project=analysis_object.project,
                            sample=sample,
                            analysis_module_name="piper_ngi",
                            slurm_job_id=slurm_job_id,
                            process_id=process_id,
                            workflow_subtask=workflow_subtask)
                    except RuntimeError as e:
                        LOG.error(e)
                        ## Question: should we just kill the run in this case or let it go?
                        continue
                except (NotImplementedError, RuntimeError, ValueError) as e:
                    error_msg = (
                        'Processing project "{}" / sample "{}" / workflow "{}" '
                        'failed: {}'.format(analysis_object.project, sample,
                                            workflow_subtask, e))
                    LOG.error(error_msg)
Beispiel #3
0
def analyze(analysis_object, config=None, config_file_path=None):

    charon_session = CharonSession()
    charon_pj = charon_session.project_get(analysis_object.project.project_id)
    reference_genome = charon_pj.get('reference')
    if charon_pj.get("sequencing_facility") == "NGI-S":
        analysis_object.sequencing_facility = "sthlm"
    elif charon_pj.get("sequencing_facility") == "NGI-U":
        analysis_object.sequencing_facility = "upps"
    else:
        LOG.error(
            "charon project not registered with stockholm or uppsala. Which config file should we use for the RNA pipeline ?"
        )
        raise RuntimeError
    fastq_files = []
    if reference_genome and reference_genome != 'other':
        for sample in analysis_object.project:
            try:
                charon_reported_status = charon_session.sample_get(
                    analysis_object.project.project_id,
                    sample).get('analysis_status')
                # Check Charon to ensure this hasn't already been processed
                do_analyze = handle_sample_status(analysis_object, sample,
                                                  charon_reported_status)
                if not do_analyze:
                    continue
            except CharonError as e:
                LOG.error(e)

            for libprep in sample:
                charon_lp_status = charon_session.libprep_get(
                    analysis_object.project.project_id, sample.name,
                    libprep.name).get('qc')
                do_analyze = handle_libprep_status(analysis_object, libprep,
                                                   charon_lp_status)
                if not do_analyze:
                    continue
                else:
                    for seqrun in libprep:
                        charon_sr_status = charon_session.seqrun_get(
                            analysis_object.project.project_id, sample.name,
                            libprep.name, seqrun.name).get('alignment_status')
                        do_analyze = handle_seqrun_status(
                            analysis_object, seqrun, charon_sr_status)
                        if not do_analyze:
                            continue
                        else:
                            seqrun.being_analyzed = True
                            sample.being_analyzed = sample.being_analyzed or True
                            for fastq_file in seqrun.fastq_files:
                                fastq_path = os.path.join(
                                    analysis_object.project.base_path, "DATA",
                                    analysis_object.project.project_id,
                                    sample.name, libprep.name, seqrun.name,
                                    fastq_file)
                                fastq_files.append(fastq_path)

        if not fastq_files:
            LOG.error(
                "No fastq files obtained for the analysis fo project {}, please check the Charon status."
                .format(analysis_object.project.name))
        else:
            if analysis_object.restart_running_jobs:
                stop_ongoing_analysis(analysis_object)
            fastq_dir = preprocess_analysis(analysis_object, fastq_files)
            sbatch_path = write_batch_job(analysis_object, reference_genome,
                                          fastq_dir)
            job_id = start_analysis(sbatch_path)
            analysis_path = os.path.join(analysis_object.project.base_path,
                                         "ANALYSIS",
                                         analysis_object.project.project_id,
                                         'rna_ngi')
            record_project_job(analysis_object.project, job_id, analysis_path)
def analyze(analysis_object, level='sample', config=None, config_file_path=None):
    """Analyze data at the sample level.

    :param NGIAnalysis analysis_object: holds all the parameters for the analysis

    :raises ValueError: If exec_mode is an unsupported value
    """
    charon_session = CharonSession()
    for sample in analysis_object.project:
        try:
            charon_reported_status = charon_session.sample_get(analysis_object.project.project_id,
                                                               sample).get('analysis_status')
            # Check Charon to ensure this hasn't already been processed
            do_analyze=handle_sample_status(analysis_object, sample, charon_reported_status)
            if not do_analyze :
                continue
        except CharonError as e:
            LOG.error(e)
            continue
        if level == "sample":
            status_field = "alignment_status"
        elif level == "genotype":
            status_field = "genotype_status"
        else:
            LOG.warning('Unknown workflow level: "{}"'.format(level))
            status_field = "alignment_status" # Or should we abort?
        try:
            check_for_preexisting_sample_runs(analysis_object.project, sample, analysis_object.restart_running_jobs,
                                              analysis_object.restart_finished_jobs, status_field)
        except RuntimeError as e:
            raise RuntimeError('Aborting processing of project/sample "{}/{}": '
                               '{}'.format(analysis_object.project, sample, e))
        if analysis_object.exec_mode.lower() not in ("sbatch", "local"):
            raise ValueError('"exec_mode" param must be one of "sbatch" or "local" '
                             'value was "{}"'.format(analysis_object.exec_mode))
        if analysis_object.exec_mode == "local":
            modules_to_load = analysis_object.config.get("piper", {}).get("load_modules", [])
            load_modules(modules_to_load)
        for workflow_subtask in workflows.get_subtasks_for_level(level=level):
            if level == "genotype":
                genotype_status = None # Some records in Charon lack this field, I'm guessing
                try:
                    charon_session = CharonSession()
                    genotype_status = charon_session.sample_get(projectid=analysis_object.project.project_id,
                                                                sampleid=sample.name).get("genotype_status")
                except CharonError as e:
                    LOG.error('Couldn\'t determine genotyping status for project/'
                              'sample "{}/{}"; skipping analysis.'.format(analysis_object.project, sample))
                    continue
                if find_previous_genotype_analyses(analysis_object.project, sample) or genotype_status == "DONE":
                    if not analysis_object.restart_finished_jobs:
                        LOG.info('Project/sample "{}/{}" has completed genotype '
                                 'analysis previously; skipping (use flag to force '
                                 'analysis)'.format(analysis_object.project, sample))
                        continue
            if analysis_object.restart_running_jobs:
                # Kill currently-running jobs if they exist
                kill_running_sample_analysis(workflow_subtask=workflow_subtask,
                                             project_id=analysis_object.project.project_id,
                                             sample_id=sample.name)
            # This checks the local jobs database
            if not is_sample_analysis_running_local(workflow_subtask=workflow_subtask,
                                                    project_id=analysis_object.project.project_id,
                                                    sample_id=sample.name):
                LOG.info('Launching "{}" analysis for sample "{}" in project '
                         '"{}"'.format(workflow_subtask, sample, analysis_object.project))
                try:
                    log_file_path = create_log_file_path(workflow_subtask=workflow_subtask,
                                                         project_base_path=analysis_object.project.base_path,
                                                         project_name=analysis_object.project.dirname,
                                                         project_id=analysis_object.project.project_id,
                                                         sample_id=sample.name)
                    rotate_file(log_file_path)
                    exit_code_path = create_exit_code_file_path(workflow_subtask=workflow_subtask,
                                                                project_base_path=analysis_object.project.base_path,
                                                                project_name=analysis_object.project.dirname,
                                                                project_id=analysis_object.project.project_id,
                                                                sample_id=sample.name)
                    if level == "sample":
                        if not analysis_object.keep_existing_data:
                            remove_previous_sample_analyses(analysis_object.project, sample)
                            default_files_to_copy=None
                    elif level == "genotype":
                        if not analysis_object.keep_existing_data:
                            remove_previous_genotype_analyses(analysis_object.project)
                            default_files_to_copy=None

                    # Update the project to keep only valid fastq files for setup.xml creation
                    if level == "genotype":
                        updated_project, default_files_to_copy = \
                                collect_files_for_sample_analysis(analysis_object.project,
                                                                  sample,
                                                                  restart_finished_jobs=True,
                                                                  status_field="genotype_status")
                    else:
                        updated_project, default_files_to_copy = \
                                collect_files_for_sample_analysis(analysis_object.project,
                                                                  sample,
                                                                  analysis_object.restart_finished_jobs,
                                                                  status_field="alignment_status")
                    setup_xml_cl, setup_xml_path = build_setup_xml(project=updated_project,
                                                                   sample=sample,
                                                                   workflow=workflow_subtask,
                                                                   local_scratch_mode=(analysis_object.exec_mode == "sbatch"),
                                                                   config=analysis_object.config)
                    piper_cl = build_piper_cl(project=analysis_object.project,
                                              workflow_name=workflow_subtask,
                                              setup_xml_path=setup_xml_path,
                                              exit_code_path=exit_code_path,
                                              config=analysis_object.config,
                                              exec_mode=analysis_object.exec_mode,
                                              generate_bqsr_bam=analysis_object.generate_bqsr_bam)
                    if analysis_object.exec_mode == "sbatch":
                        process_id = None
                        slurm_job_id = sbatch_piper_sample([setup_xml_cl, piper_cl],
                                                           workflow_subtask,
                                                           analysis_object.project, sample,
                                                           restart_finished_jobs=analysis_object.restart_finished_jobs,
                                                           files_to_copy=default_files_to_copy)
                        for x in xrange(10):
                            # Time delay to let sbatch get its act together
                            # (takes a few seconds to be visible with sacct)
                            try:
                                get_slurm_job_status(slurm_job_id)
                                break
                            except ValueError:
                                time.sleep(2)
                        else:
                            LOG.error('sbatch file for sample {}/{} did not '
                                      'queue properly! Job ID {} cannot be '
                                      'found.'.format(analysis_object.project, sample, slurm_job_id))
                    else: # "local"
                        raise NotImplementedError('Local execution not currently implemented. '
                                                  'I\'m sure Denis can help you with this.')
                        #slurm_job_id = None
                        #launch_piper_job(setup_xml_cl, project)
                        #process_handle = launch_piper_job(piper_cl, project)
                        #process_id = process_handle.pid
                    try:
                        record_process_sample(project=analysis_object.project,
                                              sample=sample,
                                              analysis_module_name="piper_ngi",
                                              slurm_job_id=slurm_job_id,
                                              process_id=process_id,
                                              workflow_subtask=workflow_subtask)
                    except RuntimeError as e:
                        LOG.error(e)
                        ## Question: should we just kill the run in this case or let it go?
                        continue
                except (NotImplementedError, RuntimeError, ValueError) as e:
                    error_msg = ('Processing project "{}" / sample "{}" / workflow "{}" '
                                 'failed: {}'.format(analysis_object.project, sample,
                                                     workflow_subtask,
                                                     e))
                    LOG.error(error_msg)