Beispiel #1
0
 def test_mail_analysis_info(self, mock_trace, mock_mail):
     """Send info email"""
     mock_trace.return_value = [('some_file.py', 
                                 42, 
                                 '<module>', 
                                 'some_function()'), ()]
     mail_analysis(project_name=self.project_name,
                   sample_name=self.sample_name,
                   engine_name=self.engine_name,
                   level='INFO',
                   info_text='Some information',
                   workflow=self.workflow,
                   config_file_path='data/test_ngi_config_minimal.yaml')
     mock_mail.assert_called_once_with(origin='*****@*****.**', 
                                       recipient='some_user@some_email.com', 
                                       subject='[S.One_15_01] [Some Workflow] [INFO] analysis information / status update', 
                                       text=('Get a load of this:\n'
                                             'Project: S.One_15_01\n'
                                             'Sample: P1155_101\n'
                                             'Engine: piper_ngi\n'
                                             'Workflow: Some Workflow\n'
                                             'File: some_file.py\n'
                                             'Line: 42\n\n'
                                             'Additional information:\n\n'
                                             'Some information\n'))
Beispiel #2
0
def recurse_status_for_sample(project_obj, set_status, update_done=False):
    """Set seqruns under sample to have status "set_status"
    """

    charon_session = CharonSession()
    project_id = project_obj.project_id
    for sample_obj in project_obj:
        # There's only one sample but this is an iterator
        sample_id = sample_obj.name
    for libprep_obj in sample_obj:
        libprep_id = libprep_obj.name
        for seqrun_obj in libprep_obj:
            seqrun_id = seqrun_obj.name
            label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id)
            LOG.info(('Updating status of project/sample/libprep/seqrun '
                      '"{}" to "{}" in Charon ').format(label, set_status))
            try:
                charon_session.seqrun_update(projectid=project_id,
                                             sampleid=sample_id,
                                             libprepid=libprep_id,
                                             seqrunid=seqrun_id,
                                             alignment_status=set_status)
            except CharonError as e:
                error_text =('Could not update status of project/sample/libprep/seqrun '
                             '"{}" in Charon to "{}": {}'.format(label, set_status, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_id, sample_name=sample_obj.name,
                              level="ERROR", info_text=error_text)
Beispiel #3
0
def update_analysis(project_id, status):
    charon_session = CharonSession()
    mail_analysis(project_id,
                  engine_name='rna_ngi',
                  level='INFO' if status else 'ERROR')
    new_sample_status = 'ANALYZED' if status else 'FAILED'
    new_seqrun_status = 'DONE' if status else 'FAILED'
    for sample in charon_session.project_get_samples(project_id).get(
            "samples", {}):
        if sample.get('analysis_status') == "UNDER_ANALYSIS":
            LOG.info("Marking analysis of sample {}/{} as {}".format(
                project_id, sample.get('sampleid'), new_sample_status))
            charon_session.sample_update(project_id,
                                         sample.get('sampleid'),
                                         analysis_status=new_sample_status)
            for libprep in charon_session.sample_get_libpreps(
                    project_id, sample.get('sampleid')).get('libpreps', {}):
                if libprep.get('qc') != 'FAILED':
                    for seqrun in charon_session.libprep_get_seqruns(
                            project_id, sample.get('sampleid'),
                            libprep.get('libprepid')).get('seqruns', {}):
                        if seqrun.get('alignment_status') == "RUNNING":
                            LOG.info(
                                "Marking analysis of seqrun {}/{}/{}/{} as {}".
                                format(project_id, sample.get('sampleid'),
                                       libprep.get('libprepid'),
                                       seqrun.get('seqrunid'),
                                       new_seqrun_status))
                            charon_session.seqrun_update(
                                project_id,
                                sample.get('sampleid'),
                                libprep.get('libprepid'),
                                seqrun.get('seqrunid'),
                                alignment_status=new_seqrun_status)
Beispiel #4
0
def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir):
    """Find all the valid seqruns for a particular sample, parse their
    qualimap output files, and update Charon with the mean autosomal
    coverage for each.

    :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing)
    :param str sample_id: The sample name (e.g. P1170_105)

    :raises OSError: If the qc path specified is missing or otherwise inaccessible
    :raises RuntimeError: If you specify both the seqrun_id and fcid and they don't match
    :raises ValueError: If arguments are incorrect
    """
    seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id)

    charon_session = CharonSession()
    for libprep_id, seqruns in seqruns_by_libprep.iteritems():
        for seqrun_id in seqruns:
            label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id)
            ma_coverage = _parse_mean_coverage_from_qualimap(piper_qc_dir, sample_id, seqrun_id)
            LOG.info('Updating project/sample/libprep/seqrun "{}" in '
                     'Charon with mean autosomal coverage "{}"'.format(label,  ma_coverage))
            try:
                charon_session.seqrun_update(projectid=project_id,
                                             sampleid=sample_id,
                                             libprepid=libprep_id,
                                             seqrunid=seqrun_id,
                                             mean_autosomal_coverage=ma_coverage)
            except CharonError as e:
                error_text = ('Could not update project/sample/libprep/seqrun "{}" '
                              'in Charon with mean autosomal coverage '
                              '"{}": {}'.format(label, ma_coverage, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_id, sample_name=sample_id,
                              engine_name="piper_ngi", level="ERROR", info_text=error_text)
Beispiel #5
0
 def test_mail_analysis_error(self, mock_trace, mock_mail):
     """Send error email"""
     mock_trace.return_value = [('some_file.py', 
                                 42, 
                                 '<module>', 
                                 'some_function()'), ()]
     mail_analysis(project_name=self.project_name,
                   sample_name=self.sample_name,
                   engine_name=self.engine_name,
                   level='ERROR',
                   info_text='Error: some error',
                   workflow=self.workflow,
                   config_file_path='data/test_ngi_config_minimal.yaml')
     mock_mail.assert_called_once_with(origin='*****@*****.**', 
                                       recipient='some_user@some_email.com',
                                       subject='[S.One_15_01] [Some Workflow] [ERROR] analysis intervention required', 
                                       text=('This analysis has encountered an error:\n'
                                             'Project: S.One_15_01\n'
                                             'Sample: P1155_101\n'
                                             'Engine: piper_ngi\n'
                                             'Workflow: Some Workflow\n'
                                             'File: some_file.py\n'
                                             'Line: 42\n\n'
                                             'Additional information:\n\n'
                                             'Error: some error\n'))
def update_sample_duplication_and_coverage(project_id,
                                           sample_id,
                                           project_base_path,
                                           config=None,
                                           config_file_path=None):
    """Update Charon with the duplication rates for said sample.

    :param str project_base_path: The path to the project dir 
    :param str sample_id: The sample name (e.g. P1170_105)

    """

    dup_file_path = os.path.join(project_base_path, 'ANALYSIS', project_id,
                                 'piper_ngi', '05_processed_alignments',
                                 "{}.metrics".format(sample_id))
    genome_results_file_path = os.path.join(
        project_base_path, 'ANALYSIS', project_id, 'piper_ngi',
        '06_final_alignment_qc', "{}.clean.dedup.qc".format(sample_id),
        "genome_results.txt")

    try:
        dup_pc = parse_deduplication_percentage(dup_file_path)
    except:
        dup_pc = 0
        LOG.error(
            "Cannot find {}.metrics file for duplication rate at {}. Continuing."
            .format(sample_id, dup_file_path))
    try:
        cov = parse_qualimap_coverage(genome_results_file_path)
        reads = parse_qualimap_reads(genome_results_file_path)
    except IOError as e:
        cov = 0
        reads = 0
        LOG.error(
            "Cannot find genome_results.txt file for sample coverage at {}. Continuing."
            .format(genome_results_file_path))
    try:
        charon_session = CharonSession()
        charon_session.sample_update(projectid=project_id,
                                     sampleid=sample_id,
                                     duplication_pc=dup_pc,
                                     total_sequenced_reads=reads,
                                     total_autosomal_coverage=cov)
        LOG.info(
            'Updating sample "{}" in '
            'Charon with mean duplication_percentage"{}" and autosomal coverage "{}"'
            .format(sample_id, dup_pc, cov))
    except CharonError as e:
        error_text = ('Could not update project/sample "{}/{}" '
                      'in Charon with duplication rate : {}'
                      'and coverage {}'.format("{}/{}".format(
                          project_id, sampleid, dup_pc, cov)))
        LOG.error(error_text)
        if not config.get('quiet'):
            mail_analysis(project_name=project_id,
                          sample_name=sample_id,
                          engine_name="piper_ngi",
                          level="ERROR",
                          info_text=error_text)
Beispiel #7
0
def record_process_sample(project, sample, workflow_subtask, analysis_module_name,
                          process_id=None, slurm_job_id=None, config=None):
    LOG.info('Recording slurm job id "{}" for project "{}", sample "{}", '
             'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask))
    with get_db_session() as session:
        sample_db_obj = SampleAnalysis(project_id=project.project_id,
                                       project_name=project.name,
                                       project_base_path=project.base_path,
                                       sample_id=sample.name,
                                       engine=analysis_module_name,
                                       workflow=workflow_subtask,
                                       process_id=process_id,
                                       slurm_job_id=slurm_job_id)
        try:
            session.add(sample_db_obj)
            for attempts in range(3):
                try:
                    session.commit()
                    LOG.info('Successfully recorded slurm job id "{}" for project "{}", sample "{}", '
                             'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask))
                    break
                except OperationalError as e:
                    LOG.warn('Database locked ("{}"). Waiting...'.format(e))
                    time.sleep(15)
            else:
                raise RuntimeError("Could not write to database after three attempts (locked?)")
        except (IntegrityError, RuntimeError):
            raise RuntimeError('Could not record slurm job id "{}" for project "{}", sample "{}", '
                               'workflow "{}": {}'.format(slurm_job_id, project, sample, workflow_subtask, e))
    try:
        set_status = "UNDER_ANALYSIS"
        LOG.info(('Updating Charon status for project/sample '
                  '{}/{} to {}').format(project, sample, set_status))
        CharonSession().sample_update(projectid=project.project_id,
                                      sampleid=sample.name,
                                      analysis_status=set_status)
        project_obj = create_project_obj_from_analysis_log(project.name,
                                                           project.project_id,
                                                           project.base_path,
                                                           sample.name,
                                                           workflow_subtask)
        recurse_status_for_sample(project_obj, "RUNNING")
    except CharonError as e:
        error_text = ('Could not update Charon status for project/sample '
                      '{}/{} due to error: {}'.format(project, sample, e))

        LOG.error(error_text)
        if not config.get('quiet'):
            mail_analysis(project_name=project_id, sample_name=sample_id,
                      engine_name='piper_ngi', level="ERROR", info_text=error_text)
Beispiel #8
0
def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir,
                                       config=None, config_file_path=None):
    """Find all the valid seqruns for a particular sample, parse their
    qualimap output files, and update Charon with the mean autosomal
    coverage for each.

    :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing)
    :param str sample_id: The sample name (e.g. P1170_105)

    :raises OSError: If the qc path specified is missing or otherwise inaccessible
    :raises ValueError: If arguments are incorrect
    """
    seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id)

    charon_session = CharonSession()
    for libprep_id, seqruns in seqruns_by_libprep.items():
        for seqrun_id in seqruns:
            label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id)
            genome_results_file_paths=glob.glob(os.path.join(piper_qc_dir, "{}.{}*.qc".format(sample_id, seqrun_id.split('_')[-1]),"genome_results.txt"))
            ma_coverage = parse_mean_coverage_from_qualimap(piper_qc_dir, sample_id, seqrun_id)

            reads=0
            for path in genome_results_file_paths:
                try:
                    reads += parse_qualimap_reads(path)
                except IOError as e :
                    LOG.error("Cannot find the genome_results.txt file to get the number of reads in {}".format(path))
                except :
                    LOG.error("Error in handling the genome_results.txt file located at {}".format(path))

            LOG.info('Updating project/sample/libprep/seqrun "{}" in '
                     'Charon with mean autosomal coverage "{}" and total reads {}'.format(label, ma_coverage, reads))
            try:
                charon_session.seqrun_update(projectid=project_id,
                                             sampleid=sample_id,
                                             libprepid=libprep_id,
                                             seqrunid=seqrun_id,
                                             total_reads=reads,
                                             mean_autosomal_coverage=ma_coverage)
            except CharonError as e:
                error_text = ('Could not update project/sample/libprep/seqrun "{}" '
                              'in Charon with mean autosomal coverage '
                              '"{}": {}'.format(label, ma_coverage, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_id, sample_name=sample_id,
                                  engine_name="piper_ngi", level="ERROR", info_text=error_text)
def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir,
                                       config=None, config_file_path=None):
    """Find all the valid seqruns for a particular sample, parse their
    qualimap output files, and update Charon with the mean autosomal
    coverage for each.

    :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing)
    :param str sample_id: The sample name (e.g. P1170_105)

    :raises OSError: If the qc path specified is missing or otherwise inaccessible
    :raises ValueError: If arguments are incorrect
    """
    seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id)

    charon_session = CharonSession()
    for libprep_id, seqruns in seqruns_by_libprep.iteritems():
        for seqrun_id in seqruns:
            label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id)
            genome_results_file_paths=glob.glob(os.path.join(piper_qc_dir, "{}.{}*.qc".format(sample_id, seqrun_id.split('_')[-1]),"genome_results.txt"))
            ma_coverage = parse_mean_coverage_from_qualimap(piper_qc_dir, sample_id, seqrun_id)

            reads=0
            for path in genome_results_file_paths:
                try:
                    reads += parse_qualimap_reads(path)
                except IOError as e :
                    LOG.error("Cannot find the genome_results.txt file to get the number of reads in {}".format(path))
                except :
                    LOG.error("Error in handling the genome_results.txt file located at {}".format(path))

            LOG.info('Updating project/sample/libprep/seqrun "{}" in '
                     'Charon with mean autosomal coverage "{}" and total reads {}'.format(label, ma_coverage, reads))
            try:
                charon_session.seqrun_update(projectid=project_id,
                                             sampleid=sample_id,
                                             libprepid=libprep_id,
                                             seqrunid=seqrun_id,
                                             total_reads=reads,
                                             mean_autosomal_coverage=ma_coverage)
            except CharonError as e:
                error_text = ('Could not update project/sample/libprep/seqrun "{}" '
                              'in Charon with mean autosomal coverage '
                              '"{}": {}'.format(label, ma_coverage, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_id, sample_name=sample_id,
                                  engine_name="piper_ngi", level="ERROR", info_text=error_text)
def update_sample_duplication_and_coverage(project_id, sample_id, project_base_path,
                                       config=None, config_file_path=None):
    """Update Charon with the duplication rates for said sample.

    :param str project_base_path: The path to the project dir 
    :param str sample_id: The sample name (e.g. P1170_105)

    """
    
    dup_file_path=os.path.join(project_base_path, 'ANALYSIS', project_id, 'piper_ngi', '05_processed_alignments', "{}.metrics".format(sample_id))
    genome_results_file_path=os.path.join(project_base_path, 'ANALYSIS', project_id, 'piper_ngi', '06_final_alignment_qc', "{}.clean.dedup.qc".format(sample_id),"genome_results.txt")

    try:
        dup_pc=parse_deduplication_percentage(dup_file_path)
    except:
        dup_pc=0
        LOG.error("Cannot find {}.metrics file for duplication rate at {}. Continuing.".format(sample_id, dup_file_path))
    try:
        cov=parse_qualimap_coverage(genome_results_file_path)
        reads=parse_qualimap_reads(genome_results_file_path)
    except IOError as e:
        cov=0
        reads=0
        LOG.error("Cannot find genome_results.txt file for sample coverage at {}. Continuing.".format(genome_results_file_path))
    try:
        charon_session = CharonSession()
        charon_session.sample_update(projectid=project_id,
                                     sampleid=sample_id,
                                     duplication_pc=dup_pc,
                                     total_sequenced_reads=reads,
                                     total_autosomal_coverage=cov)
        LOG.info('Updating sample "{}" in '
                 'Charon with mean duplication_percentage"{}" and autosomal coverage "{}"'.format(sample_id, dup_pc, cov))
    except CharonError as e:
        error_text = ('Could not update project/sample "{}/{}" '
                    'in Charon with duplication rate : {}'
                      'and coverage {}'.format("{}/{}".format(project_id, sampleid, dup_pc, cov)))
        LOG.error(error_text)
        if not config.get('quiet'):
            mail_analysis(project_name=project_id, sample_name=sample_id,
                          engine_name="piper_ngi", level="ERROR", info_text=error_text)
Beispiel #11
0
def recurse_status_for_sample(project_obj, status_field, status_value, update_done=False,
                              extra_args=None, config=None, config_file_path=None):
    """Set seqruns under sample to have status for field <status_field> to <status_value>
    """

    if not extra_args:
        extra_args = {}
    extra_args.update({status_field: status_value})
    charon_session = CharonSession()
    project_id = project_obj.project_id
    for sample_obj in project_obj:
        # There's only one sample but this is an iterator so we iterate
        sample_id = sample_obj.name
        for libprep_obj in sample_obj:
            libprep_id = libprep_obj.name
            for seqrun_obj in libprep_obj:
                seqrun_id = seqrun_obj.name
                label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id)
                LOG.info('Updating status for field "{}" of project/sample/libprep/seqrun '
                         '"{}" to "{}" in Charon '.format(status_field, label, status_value))
                try:
                    charon_session.seqrun_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 libprepid=libprep_id,
                                                 seqrunid=seqrun_id,
                                                 **extra_args)
                except CharonError as e:
                    error_text = ('Could not update {} for project/sample/libprep/seqrun '
                                  '"{}" in Charon to "{}": {}'.format(status_field,
                                                                      label,
                                                                      status_value,
                                                                      e))
                    LOG.error(error_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_id, sample_name=sample_obj.name,
                                      level="ERROR", info_text=error_text, workflow=status_field)
Beispiel #12
0
def handle_sample_status(analysis_object, sample, charon_reported_status):
    """ returns true of false wether the sample should be analyzed"""
    if charon_reported_status == "UNDER_ANALYSIS":
        if not analysis_object.restart_running_jobs:
            error_text = ('Charon reports seqrun analysis for project "{}" '
                          '/ sample "{}" does not need processing (already '
                          '"{}")'.format(analysis_object.project, sample,
                                         charon_reported_status))
            LOG.error(error_text)
            if not analysis_object.config.get('quiet'):
                mail_analysis(project_name=analysis_object.project.name,
                              sample_name=sample.name,
                              engine_name=analysis_module.__name__,
                              level="ERROR",
                              info_text=error_text)
            return False
        else:
            return True
    elif charon_reported_status == "ANALYZED":
        if not analysis_object.restart_finished_jobs:
            error_text = ('Charon reports seqrun analysis for project "{}" '
                          '/ sample "{}" does not need processing (already '
                          '"{}")'.format(analysis_object.project, sample,
                                         charon_reported_status))
            LOG.error(error_text)
            if not analysis_object.config.get(
                    'quiet') and not analysis_object.config.get('manual'):
                mail_analysis(project_name=analysis_object.project.name,
                              sample_name=sample.name,
                              engine_name=analysis_module.__name__,
                              level="ERROR",
                              info_text=error_text)
            return False
        else:
            return True
    elif charon_reported_status == "FAILED":
        if not analysis_object.restart_failed_jobs:
            error_text = ('FAILED:  Project "{}" / sample "{}" Charon reports '
                          'FAILURE, manual investigation needed!'.format(
                              analysis_object.project, sample))
            LOG.error(error_text)
            if not analysis_object.config.get('quiet'):
                mail_analysis(project_name=analysis_object.project.name,
                              sample_name=sample.name,
                              engine_name=analysis_module.__name__,
                              level="ERROR",
                              info_text=error_text)
            return False
        else:
            return True
    else:
        return True
Beispiel #13
0
def handle_sample_status(analysis_object, sample, charon_reported_status):
    """ returns true of false wether the sample should be analyzed"""
    if charon_reported_status == "UNDER_ANALYSIS":
        if not analysis_object.restart_running_jobs:
            error_text = ('Charon reports seqrun analysis for project "{}" '
                          '/ sample "{}" does not need processing (already '
                          '"{}")'.format(analysis_object.project, sample, charon_reported_status))
            LOG.error(error_text)
            if not analysis_object.config.get('quiet'):
                mail_analysis(project_name=analysis_object.project.name, sample_name=sample.name,
                              engine_name=analysis_module.__name__,
                              level="ERROR", info_text=error_text)
            return False 
        else:
            return True
    elif charon_reported_status == "ANALYZED":
        if not analysis_object.restart_finished_jobs:
            error_text = ('Charon reports seqrun analysis for project "{}" '
                          '/ sample "{}" does not need processing (already '
                          '"{}")'.format(analysis_object.project, sample, charon_reported_status))
            LOG.error(error_text)
            if not analysis_object.config.get('quiet') and not analysis_object.config.get('manual'):
                mail_analysis(project_name=analysis_object.project.name, sample_name=sample.name,
                              engine_name=analysis_module.__name__,
                              level="ERROR", info_text=error_text)
            return False 
        else:
            return True
    elif charon_reported_status == "FAILED":
        if not analysis_object.restart_failed_jobs:
            error_text = ('FAILED:  Project "{}" / sample "{}" Charon reports '
                          'FAILURE, manual investigation needed!'.format(analysis_object.project, sample))
            LOG.error(error_text)
            if not analysis_object.config.get('quiet'):
                mail_analysis(project_name=analysis_object.project.name, sample_name=sample.name,
                              engine_name=analysis_module.__name__,
                              level="ERROR", info_text=error_text)
            return False 
        else:
            return True
    else:
        return True
Beispiel #14
0
    def test_mail_analysis(self):
        # INFO
        mail_analysis(project_name=self.project_name,
                      sample_name=self.sample_name,
                      engine_name=self.engine_name,
                      level="INFO",
                      info_text="Your mom goes to college.",
                      workflow=self.workflow)
        # WARN
        mail_analysis(project_name=self.project_name,
                      sample_name=self.sample_name,
                      engine_name=self.engine_name,
                      level="WARN",
                      info_text="Your mom: she goes to college!",
                      workflow=self.workflow)

        # ERROR
        mail_analysis(project_name=self.project_name,
                      sample_name=self.sample_name,
                      engine_name=self.engine_name,
                      level="ERROR",
                      info_text="News about your mom -- she goes to college!!",
                      workflow=self.workflow)
    def test_mail_analysis(self):
        # INFO
        mail_analysis(project_name=self.project_name,
                      sample_name=self.sample_name,
                      engine_name=self.engine_name,
                      level="INFO",
                      info_text="Your mom goes to college.",
                      workflow=self.workflow)
        # WARN
        mail_analysis(project_name=self.project_name,
                      sample_name=self.sample_name,
                      engine_name=self.engine_name,
                      level="WARN",
                      info_text="Your mom: she goes to college!",
                      workflow=self.workflow)

        # ERROR
        mail_analysis(project_name=self.project_name,
                      sample_name=self.sample_name,
                      engine_name=self.engine_name,
                      level="ERROR",
                      info_text="News about your mom -- she goes to college!!",
                      workflow=self.workflow)
Beispiel #16
0
def launch_analysis(projects_to_analyze,
                    restart_failed_jobs=False,
                    restart_finished_jobs=False,
                    restart_running_jobs=False,
                    keep_existing_data=False,
                    no_qc=False,
                    exec_mode="sbatch",
                    quiet=False,
                    manual=False,
                    config=None,
                    config_file_path=None,
                    generate_bqsr_bam=False):
    """Launch the appropriate analysis for each fastq file in the project.

    :param list projects_to_analyze: The list of projects (Project objects) to analyze
    :param dict config: The parsed NGI configuration file; optional/has default.
    :param str config_file_path: The path to the NGI configuration file; optional/has default.
    """
    for project in projects_to_analyze:  # Get information from Charon regarding which best practice analyses to run
        try:
            engine = get_engine_for_bp(project, config, config_file_path)
        except (RuntimeError, CharonError) as e:
            LOG.error('Project {} could not be processed: {}'.format(
                project, e))
            continue
        engine.local_process_tracking.update_charon_with_local_jobs_status(
            config=config)
    charon_session = CharonSession()
    for project in projects_to_analyze:
        try:
            project_status = charon_session.project_get(
                project.project_id)['status']
        except CharonError as e:
            LOG.error('Project {} could not be processed: {}'.format(
                project, e))
            continue
        if not project_status == "OPEN":
            error_text = (
                'Data found on filesystem for project "{}" but Charon '
                'reports its status is not OPEN ("{}"). Not launching '
                'analysis for this project.'.format(project, project_status))
            LOG.error(error_text)
            if not config.get('quiet'):
                mail_analysis(project_name=project.name,
                              level="ERROR",
                              info_text=error_text)
            continue
        try:
            analysis_module = get_engine_for_bp(project)
        except (RuntimeError, CharonError) as e:  # BPA missing from Charon?
            LOG.error('Skipping project "{}" because of error: {}'.format(
                project, e))
            continue
        if not no_qc:
            try:
                qc_analysis_module = load_engine_module("qc", config)
            except RuntimeError as e:
                LOG.error("Could not launch qc analysis: {}".format(e))
        for sample in project:
            # Launch QC analysis
            if not no_qc:
                try:
                    LOG.info('Attempting to launch sample QC analysis '
                             'for project "{}" / sample "{}" / engine '
                             '"{}"'.format(project, sample,
                                           qc_analysis_module.__name__))
                    qc_analysis_module.analyze(project=project,
                                               sample=sample,
                                               config=config)
                except Exception as e:
                    error_text = (
                        'Cannot process project "{}" / sample "{}" / '
                        'engine "{}" : {}'.format(project, sample,
                                                  analysis_module.__name__, e))
                    LOG.error(error_text)
                    if not config.get("quiet"):
                        mail_analysis(project_name=project.name,
                                      sample_name=sample.name,
                                      engine_name=analysis_module.__name__,
                                      level="ERROR",
                                      info_text=e)
            # Launch actual best-practice analysis
            try:
                charon_reported_status = charon_session.sample_get(
                    project.project_id, sample).get('analysis_status')
                # Check Charon to ensure this hasn't already been processed
                if charon_reported_status == "UNDER_ANALYSIS":
                    if not restart_running_jobs:
                        error_text = (
                            'Charon reports seqrun analysis for project "{}" '
                            '/ sample "{}" does not need processing (already '
                            '"{}")'.format(project, sample,
                                           charon_reported_status))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project.name,
                                          sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR",
                                          info_text=error_text)
                        continue
                elif charon_reported_status == "ANALYZED":
                    if not restart_finished_jobs:
                        error_text = (
                            'Charon reports seqrun analysis for project "{}" '
                            '/ sample "{}" does not need processing (already '
                            '"{}")'.format(project, sample,
                                           charon_reported_status))
                        LOG.error(error_text)
                        if not config.get('quiet') and not config.get(
                                'manual'):
                            mail_analysis(project_name=project.name,
                                          sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR",
                                          info_text=error_text)
                        continue
                elif charon_reported_status == "FAILED":
                    if not restart_failed_jobs:
                        error_text = (
                            'FAILED:  Project "{}" / sample "{}" Charon reports '
                            'FAILURE, manual investigation needed!'.format(
                                project, sample))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project.name,
                                          sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR",
                                          info_text=error_text)
                        continue
            except CharonError as e:
                LOG.error(e)
                continue
            try:
                LOG.info('Attempting to launch sample analysis for '
                         'project "{}" / sample "{}" / engine'
                         '"{}"'.format(project, sample,
                                       analysis_module.__name__))
                #actual analysis launch
                analysis_module.analyze(
                    project=project,
                    sample=sample,
                    restart_finished_jobs=restart_finished_jobs,
                    restart_running_jobs=restart_running_jobs,
                    keep_existing_data=keep_existing_data,
                    exec_mode=exec_mode,
                    config=config,
                    generate_bqsr_bam=generate_bqsr_bam)
            except Exception as e:
                error_text = ('Cannot process project "{}" / sample "{}" / '
                              'engine "{}" : {}'.format(
                                  project, sample, analysis_module.__name__,
                                  e))
                LOG.error(error_text)
                if not config.get("quiet"):
                    mail_analysis(project_name=project.name,
                                  sample_name=sample.name,
                                  engine_name=analysis_module.__name__,
                                  level="ERROR",
                                  info_text=e)
                continue
def update_charon_with_local_jobs_status(quiet=False, config=None, config_file_path=None):
    """Check the status of all locally-tracked jobs and update Charon accordingly.
    """
    if quiet and not config.get("quiet"):
        config['quiet'] = True
    LOG.info("Updating Charon with the status of all locally-tracked jobs...")
    with get_db_session() as session:
        charon_session = CharonSession()
        for sample_entry in session.query(SampleAnalysis).all():
            # Local names
            workflow = sample_entry.workflow
            project_name = sample_entry.project_name
            project_id = sample_entry.project_id
            project_base_path = sample_entry.project_base_path
            sample_id = sample_entry.sample_id
            engine = sample_entry.engine
            # Only one of these id fields (slurm, pid) will have a value
            slurm_job_id = sample_entry.slurm_job_id
            process_id = sample_entry.process_id
            piper_exit_code = get_exit_code(workflow_name=workflow,
                                            project_base_path=project_base_path,
                                            project_name=project_name,
                                            project_id=project_id,
                                            sample_id=sample_id)
            label = "project/sample {}/{}".format(project_name, sample_id)

            if workflow not in ("merge_process_variantcall", "genotype_concordance",):
                LOG.error('Unknown workflow "{}" for {}; cannot update '
                          'Charon. Skipping sample.'.format(workflow, label))
                continue

            try:
                project_obj = create_project_obj_from_analysis_log(project_name,
                                                                   project_id,
                                                                   project_base_path,
                                                                   sample_id,
                                                                   workflow)
            except IOError as e: # analysis log file is missing!
                error_text = ('Could not find analysis log file! Cannot update '
                              'Charon for {} run {}/{}: {}'.format(workflow,
                                                                   project_id,
                                                                   sample_id,
                                                                   e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name,
                                  sample_name=sample_id,
                                  engine_name=engine,
                                  level="ERROR",
                                  info_text=error_text,
                                  workflow=workflow)
                continue
            try:
                if piper_exit_code == 0:
                    # 0 -> Job finished successfully
                    if workflow == "merge_process_variantcall":
                        sample_status_field = "analysis_status"
                        seqrun_status_field = "alignment_status"
                        set_status = "ANALYZED" # sample level
                    elif workflow == "genotype_concordance":
                        sample_status_field = seqrun_status_field = "genotype_status"
                        set_status = "DONE" # sample level
                    recurse_status = "DONE" # For the seqrun level
                    info_text = ('Workflow "{}" for {} finished succesfully. '
                                 'Recording status {} in Charon'.format(workflow,
                                                                        label,
                                                                        set_status))
                    LOG.info(info_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name,
                                      sample_name=sample_id,
                                      engine_name=engine,
                                      level="INFO",
                                      info_text=info_text,
                                      workflow=workflow)
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 **{sample_status_field: set_status})
                    recurse_status_for_sample(project_obj,
                                              status_field=seqrun_status_field,
                                              status_value=recurse_status,
                                              config=config)
                    # Job is only deleted if the Charon status update succeeds
                    session.delete(sample_entry)
                    #run MultiQC
                    LOG.info("Running MultiQC on project {}".format(project_name))
                    try:
                        run_multiqc(project_base_path, project_id, project_name)
                    except Exception as e:
                        LOG.error(e)


                    if workflow == "merge_process_variantcall":
                        # Parse seqrun output results / update Charon
                        # This is a semi-optional step -- failure here will send an
                        # email but not more than once. The record is still removed
                        # from the local jobs database, so this will have to be done
                        # manually if you want it done at all.
                        piper_qc_dir = os.path.join(project_base_path, "ANALYSIS",
                                                    project_id, "piper_ngi",
                                                    "02_preliminary_alignment_qc")
                        update_coverage_for_sample_seqruns(project_id, sample_id,
                                                           piper_qc_dir)
                        update_sample_duplication_and_coverage(project_id, sample_id,
                                                           project_base_path)

                        
                    elif workflow == "genotype_concordance":
                        piper_gt_dir = os.path.join(project_base_path, "ANALYSIS",
                                                    project_id, "piper_ngi",
                                                    "03_genotype_concordance")
                        try:
                            update_gtc_for_sample(project_id, sample_id, piper_gt_dir)
                        except (CharonError, IOError, ValueError) as e:
                            LOG.error(e)
                elif type(piper_exit_code) is int and piper_exit_code > 0:
                    # 1 -> Job failed
                    set_status = "FAILED"
                    error_text = ('Workflow "{}" for {} failed. Recording status '
                                  '{} in Charon.'.format(workflow, label, set_status))
                    LOG.error(error_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name,
                                      sample_name=sample_id,
                                      engine_name=engine,
                                      level="ERROR",
                                      info_text=error_text,
                                      workflow=workflow)
                    if workflow == "merge_process_variantcall":
                        sample_status_field = "analysis_status"
                        seqrun_status_field = "alignment_status"
                    elif workflow == "genotype_concordance":
                        sample_status_field = seqrun_status_field = "genotype_status"
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 **{sample_status_field: set_status})
                    recurse_status_for_sample(project_obj, status_field=seqrun_status_field,
                                              status_value=set_status, config=config)
                    # Job is only deleted if the Charon update succeeds
                    session.delete(sample_entry)
                else:
                    # None -> Job still running OR exit code was never written (failure)
                    JOB_FAILED = None
                    if slurm_job_id:
                        try:
                            slurm_exit_code = get_slurm_job_status(slurm_job_id)
                        except ValueError as e:
                            slurm_exit_code = 1
                        if slurm_exit_code is not None: # "None" indicates job is still running
                            JOB_FAILED = True
                    else:
                        if not psutil.pid_exists(process_id):
                            # Job did not write an exit code and is also not running
                            JOB_FAILED = True
                    if JOB_FAILED:
                        set_status = "FAILED"
                        error_text = ('No exit code found but job not running '
                                      'for {} / {}: setting status to {} in '
                                      'Charon'.format(label, workflow, set_status))
                        if slurm_job_id:
                            exit_code_file_path = \
                                create_exit_code_file_path(workflow_subtask=workflow,
                                                           project_base_path=project_base_path,
                                                           project_name=project_name,
                                                           project_id=project_id,
                                                           sample_id=sample_id)
                            error_text += (' (slurm job id "{}", exit code file path '
                                           '"{}")'.format(slurm_job_id, exit_code_file_path))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project_name,
                                          sample_name=sample_id,
                                          engine_name=engine, level="ERROR",
                                          info_text=error_text,
                                          workflow=workflow)
                        if workflow == "merge_process_variantcall":
                            sample_status_field = "analysis_status"
                            seqrun_status_field = "alignment_status"
                        elif workflow == "genotype_concordance":
                            sample_status_field = seqrun_status_field = "genotype_status"
                        charon_session.sample_update(projectid=project_id,
                                                     sampleid=sample_id,
                                                     **{sample_status_field: set_status})
                        recurse_status_for_sample(project_obj,
                                                  status_field=seqrun_status_field,
                                                  status_value=set_status,
                                                  config=config)
                        # Job is only deleted if the Charon update succeeds
                        LOG.debug("Deleting local entry {}".format(sample_entry))
                        session.delete(sample_entry)
                    else: # Job still running
                        set_status = "UNDER_ANALYSIS"
                        if workflow == "merge_process_variantcall":
                            sample_status_field = "analysis_status"
                            seqrun_status_field = "alignment_status"
                            recurse_status = "RUNNING"
                        elif workflow == "genotype_concordance":
                            sample_status_field = seqrun_status_field = "genotype_status"
                            recurse_status = "UNDER_ANALYSIS"
                        try:
                            charon_status = \
                                    charon_session.sample_get(projectid=project_id,
                                                              sampleid=sample_id).get(sample_status_field)
                            if charon_status and not charon_status == set_status:
                                LOG.warn('Tracking inconsistency for {}: Charon status '
                                         'for field "{}" is "{}" but local process tracking '
                                         'database indicates it is running. Setting value '
                                         'in Charon to {}.'.format(label, sample_status_field,
                                                                   charon_status, set_status))
                                charon_session.sample_update(projectid=project_id,
                                                             sampleid=sample_id,
                                                             **{sample_status_field: set_status})
                                recurse_status_for_sample(project_obj,
                                                          status_field=seqrun_status_field,
                                                          status_value=recurse_status,
                                                          config=config)
                        except CharonError as e:
                            error_text = ('Unable to update/verify Charon '
                                          'for {}: {}'.format(label, e))
                            LOG.error(error_text)
                            if not config.get('quiet'):
                                mail_analysis(project_name=project_name, sample_name=sample_id,
                                              engine_name=engine, level="ERROR",
                                              workflow=workflow, info_text=error_text)
            except CharonError as e:
                error_text = ('Unable to update Charon for {}: '
                              '{}'.format(label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="ERROR",
                                  workflow=workflow, info_text=error_text)
            except OSError as e:
                error_text = ('Permissions error when trying to update Charon '
                              '"{}" status for "{}": {}'.format(workflow, label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="ERROR",
                                  workflow=workflow, info_text=error_text)
        session.commit()
Beispiel #18
0
def launch_analysis(projects_to_analyze, restart_failed_jobs=False,
                    restart_finished_jobs=False, restart_running_jobs=False,
                    keep_existing_data=False, no_qc=False, exec_mode="sbatch",
                    quiet=False, manual=False, config=None, config_file_path=None,
                    generate_bqsr_bam=False):
    """Launch the appropriate analysis for each fastq file in the project.

    :param list projects_to_analyze: The list of projects (Project objects) to analyze
    :param dict config: The parsed NGI configuration file; optional/has default.
    :param str config_file_path: The path to the NGI configuration file; optional/has default.
    """
    for project in projects_to_analyze: # Get information from Charon regarding which best practice analyses to run
        try:
            engine = get_engine_for_bp(project, config, config_file_path)
        except (RuntimeError, CharonError) as e:
            LOG.error('Project {} could not be processed: {}'.format(project, e))
            continue
        engine.local_process_tracking.update_charon_with_local_jobs_status(config=config)
    charon_session = CharonSession()
    for project in projects_to_analyze:
        try:
            project_status = charon_session.project_get(project.project_id)['status']
        except CharonError as e:
            LOG.error('Project {} could not be processed: {}'.format(project, e))
            continue
        if not project_status == "OPEN":
            error_text = ('Data found on filesystem for project "{}" but Charon '
                          'reports its status is not OPEN ("{}"). Not launching '
                          'analysis for this project.'.format(project, project_status))
            LOG.error(error_text)
            if not config.get('quiet'):
                mail_analysis(project_name=project.name, level="ERROR", info_text=error_text)
            continue
        try:
            analysis_module = get_engine_for_bp(project)
        except (RuntimeError, CharonError) as e: # BPA missing from Charon?
            LOG.error('Skipping project "{}" because of error: {}'.format(project, e))
            continue
        if not no_qc:
            try:
                qc_analysis_module = load_engine_module("qc", config)
            except RuntimeError as e:
                LOG.error("Could not launch qc analysis: {}".format(e))
        for sample in project:
            # Launch QC analysis
            if not no_qc:
                try:
                    LOG.info('Attempting to launch sample QC analysis '
                             'for project "{}" / sample "{}" / engine '
                             '"{}"'.format(project, sample, qc_analysis_module.__name__))
                    qc_analysis_module.analyze(project=project,
                                               sample=sample,
                                               config=config)
                except Exception as e:
                    error_text = ('Cannot process project "{}" / sample "{}" / '
                                  'engine "{}" : {}'.format(project, sample,
                                                            analysis_module.__name__,
                                                            e))
                    LOG.error(error_text)
                    if not config.get("quiet"):
                        mail_analysis(project_name=project.name, sample_name=sample.name,
                                      engine_name=analysis_module.__name__,
                                      level="ERROR", info_text=e)
            # Launch actual best-practice analysis
            try:
                charon_reported_status = charon_session.sample_get(project.project_id,
                                                                   sample).get('analysis_status')
                # Check Charon to ensure this hasn't already been processed
                if charon_reported_status == "UNDER_ANALYSIS":
                    if not restart_running_jobs:
                        error_text = ('Charon reports seqrun analysis for project "{}" '
                                      '/ sample "{}" does not need processing (already '
                                      '"{}")'.format(project, sample, charon_reported_status))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project.name, sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR", info_text=error_text)
                        continue
                elif charon_reported_status == "ANALYZED":
                    if not restart_finished_jobs:
                        error_text = ('Charon reports seqrun analysis for project "{}" '
                                      '/ sample "{}" does not need processing (already '
                                      '"{}")'.format(project, sample, charon_reported_status))
                        LOG.error(error_text)
                        if not config.get('quiet') and not config.get('manual'):
                            mail_analysis(project_name=project.name, sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR", info_text=error_text)
                        continue
                elif charon_reported_status == "FAILED":
                    if not restart_failed_jobs:
                        error_text = ('FAILED:  Project "{}" / sample "{}" Charon reports '
                                      'FAILURE, manual investigation needed!'.format(project, sample))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project.name, sample_name=sample.name,
                                          engine_name=analysis_module.__name__,
                                          level="ERROR", info_text=error_text)
                        continue
            except CharonError as e:
                LOG.error(e)
                continue
            try:
                LOG.info('Attempting to launch sample analysis for '
                         'project "{}" / sample "{}" / engine'
                         '"{}"'.format(project, sample, analysis_module.__name__))
                #actual analysis launch
                analysis_module.analyze(project=project,
                                        sample=sample,
                                        restart_finished_jobs=restart_finished_jobs,
                                        restart_running_jobs=restart_running_jobs,
                                        keep_existing_data=keep_existing_data,
                                        exec_mode=exec_mode,
                                        config=config,
                                        generate_bqsr_bam=generate_bqsr_bam)
            except Exception as e:
                error_text = ('Cannot process project "{}" / sample "{}" / '
                              'engine "{}" : {}'.format(project, sample,
                                                        analysis_module.__name__,
                                                        e))
                LOG.error(error_text)
                if not config.get("quiet"):
                    mail_analysis(project_name=project.name, sample_name=sample.name,
                                  engine_name=analysis_module.__name__,
                                  level="ERROR", info_text=e)
                continue
Beispiel #19
0
def setup_analysis_directory_structure(fc_dir,
                                       projects_to_analyze,
                                       restrict_to_projects=None,
                                       restrict_to_samples=None,
                                       create_files=True,
                                       fallback_libprep=None,
                                       quiet=False,
                                       config=None,
                                       config_file_path=None):
    """
    Copy and sort files from their CASAVA-demultiplexed flowcell structure
    into their respective project/sample/libPrep/FCIDs. This collects samples
    split across multiple flowcells.

    :param str fc_dir: The directory created by CASAVA for this flowcell.
    :param dict config: The parsed configuration file.
    :param set projects_to_analyze: A dict (of Project objects, or empty)
    :param bool create_files: Alter the filesystem (as opposed to just parsing flowcells) (default True)
    :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None)
    :param list restrict_to_projects: Specific projects within the flowcell to process exclusively
    :param list restrict_to_samples: Specific samples within the flowcell to process exclusively

    :returns: A list of NGIProject objects that need to be run through the analysis pipeline
    :rtype: list

    :raises KeyError: If a required configuration key is not available.
    """
    LOG.info(
        "Setting up analysis for demultiplexed data in source folder \"{}\"".
        format(fc_dir))
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    config[
        "quiet"] = quiet  # Hack because I enter here from a script sometimes
    #Checks flowcell path to establish which group owns it
    pattern = ".+({}|{})\/.+".format(config["analysis"]["sthlm_root"],
                                     config["analysis"]["upps_root"])
    matches = re.match(pattern, fc_dir)
    if matches:
        flowcell_uppnexid = matches.group(1)
    else:
        LOG.error(
            "cannot guess which project (sthlm/uppsala) the flowcell {} belongs to"
            .format(fc_dir))
        raise RuntimeError

    analysis_top_dir = os.path.abspath(
        os.path.join(config["analysis"]["base_root"], flowcell_uppnexid,
                     config["analysis"]["top_dir"]))
    try:
        safe_makedir(analysis_top_dir)
    except OSError as e:
        LOG.error(
            'Error: Analysis top directory {} does not exist and could not '
            'be created.'.format(analysis_top_dir))
    fc_dir = fc_dir if os.path.isabs(fc_dir) else os.path.join(
        analysis_top_dir, fc_dir)
    if not os.path.exists(fc_dir):
        LOG.error("Error: Flowcell directory {} does not exist".format(fc_dir))
        return []
    # Map the directory structure for this flowcell
    try:
        fc_dir_structure = parse_flowcell(fc_dir)
    except (OSError, ValueError) as e:
        LOG.error("Error when processing flowcell dir \"{}\": {}".format(
            fc_dir, e))
        return []
    fc_full_id = fc_dir_structure['fc_full_id']
    if not fc_dir_structure.get('projects'):
        LOG.warning(
            "No projects found in specified flowcell directory \"{}\"".format(
                fc_dir))

    # Iterate over the projects in the flowcell directory
    for project in fc_dir_structure.get('projects', []):
        project_name = project['project_name']
        project_original_name = project['project_original_name']
        samplesheet_path = fc_dir_structure.get("samplesheet_path")

        # parse the samplesheet and get the expected sample numbers assigned by bcl2fastq
        samplesheet_sample_numbers = get_sample_numbers_from_samplesheet(
            samplesheet_path) if samplesheet_path else None

        try:
            # Maps e.g. "Y.Mom_14_01" to "P123"
            project_id = get_project_id_from_name(project_name)
        except (CharonError, RuntimeError, ValueError) as e:
            LOG.warning(
                'Could not retrieve project id from Charon (record missing?). '
                'Using project name ("{}") as project id '
                '(error: {})'.format(project_name, e))
            project_id = project_name
        # If specific projects are specified, skip those that do not match
        if restrict_to_projects and project_name not in restrict_to_projects and \
                                    project_id not in restrict_to_projects:
            LOG.debug(
                "Skipping project {} (not in restrict_to_projects)".format(
                    project_name))
            continue
        LOG.info("Setting up project {}".format(project.get("project_name")))
        # Create a project directory if it doesn't already exist, including
        # intervening "DATA" directory
        project_dir = os.path.join(analysis_top_dir, "DATA", project_id)
        project_sl_dir = os.path.join(analysis_top_dir, "DATA", project_name)
        project_analysis_dir = os.path.join(analysis_top_dir, "ANALYSIS",
                                            project_id)
        project_analysis_sl_dir = os.path.join(analysis_top_dir, "ANALYSIS",
                                               project_name)
        if create_files:
            safe_makedir(project_dir, 0o2770)
            safe_makedir(project_analysis_dir, 0o2770)
            if not project_dir == project_sl_dir and \
               not os.path.exists(project_sl_dir):
                os.symlink(project_dir, project_sl_dir)
            if not project_analysis_dir == project_analysis_sl_dir and \
               not os.path.exists(project_analysis_sl_dir):
                os.symlink(project_analysis_dir, project_analysis_sl_dir)
        try:
            project_obj = projects_to_analyze[project_dir]
        except KeyError:
            project_obj = NGIProject(name=project_name,
                                     dirname=project_id,
                                     project_id=project_id,
                                     base_path=analysis_top_dir)
            projects_to_analyze[project_dir] = project_obj
        # Iterate over the samples in the project
        for sample in project.get('samples', []):
            sample_name = sample['sample_name']
            # If specific samples are specified, skip those that do not match
            if restrict_to_samples and sample_name not in restrict_to_samples:
                LOG.debug("Skipping sample {}: not in specified samples "
                          "{}".format(sample_name,
                                      ", ".join(restrict_to_samples)))
                continue
            LOG.info("Setting up sample {}".format(sample_name))
            # Create a directory for the sample if it doesn't already exist
            sample_dir = os.path.join(project_dir, sample_name)
            if create_files: safe_makedir(sample_dir, 0o2770)
            # This will only create a new sample object if it doesn't already exist in the project
            sample_obj = project_obj.add_sample(name=sample_name,
                                                dirname=sample_name)
            # Get the Library Prep ID for each file
            pattern = re.compile(".*\.(fastq|fq)(\.gz|\.gzip|\.bz2)?$")
            fastq_files = list(filter(pattern.match, sample.get('files', [])))
            # For each fastq file, create the libprep and seqrun objects
            # and add the fastq file to the seqprep object
            # Note again that these objects only get created if they don't yet exist;
            # if they do exist, the existing object is returned
            for fq_file in fastq_files:
                # Try to use assignment from SampleSheet
                samplesheet_sample = match_fastq_sample_number_to_samplesheet(
                    fq_file, samplesheet_sample_numbers, project_id)
                if samplesheet_sample is not None and \
                        samplesheet_sample[6] is not None:
                    libprep_name = samplesheet_sample[6]
                else:
                    LOG.debug(
                        'Unable to determine library prep from sample sheet file; try to determine from Charon'
                    )
                    try:
                        # Requires Charon access
                        libprep_name = determine_library_prep_from_fcid(
                            project_id, sample_name, fc_full_id)
                        LOG.debug('Found libprep name "{}" in Charon'.format(
                            libprep_name))
                    except ValueError:
                        charon_session = CharonSession()
                        libpreps = charon_session.sample_get_libpreps(
                            project_id, sample_name).get('libpreps')
                        if len(libpreps) == 1:
                            libprep_name = libpreps[0].get('libprepid')
                            LOG.warning(
                                'Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" '
                                'has no libprep information in Charon, but only one '
                                'library prep is present in Charon ("{}"). Using '
                                'this as the library prep.'.format(
                                    project_name, sample_name, fc_full_id,
                                    fq_file, libprep_name))
                        elif fallback_libprep:
                            libprep_name = fallback_libprep
                            LOG.warning(
                                'Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" '
                                'has no libprep information in Charon, but a fallback '
                                'libprep value of "{}" was supplied -- using this '
                                'value.'.format(project_name, sample_name,
                                                fc_full_id, fq_file,
                                                libprep_name))
                        else:
                            error_text = (
                                'Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" '
                                'has no libprep information in Charon. Skipping '
                                'analysis.'.format(project_name, sample_name,
                                                   fc_full_id, fq_file))
                            LOG.error(error_text)
                            if not config.get('quiet'):
                                mail_analysis(project_name=project_name,
                                              sample_name=sample_name,
                                              level="ERROR",
                                              info_text=error_text)
                            continue
                libprep_object = sample_obj.add_libprep(name=libprep_name,
                                                        dirname=libprep_name)
                libprep_dir = os.path.join(sample_dir, libprep_name)
                if create_files: safe_makedir(libprep_dir, 0o2770)
                seqrun_object = libprep_object.add_seqrun(name=fc_full_id,
                                                          dirname=fc_full_id)
                seqrun_dir = os.path.join(libprep_dir, fc_full_id)
                if create_files: safe_makedir(seqrun_dir, 0o2770)
                seqrun_object.add_fastq_files(fq_file)
            if fastq_files and create_files:
                src_sample_dir = os.path.join(fc_dir_structure['fc_dir'],
                                              project['data_dir'],
                                              project['project_dir'],
                                              sample['sample_dir'])
                for libprep_obj in sample_obj:
                    for seqrun_obj in libprep_obj:
                        src_fastq_files = [
                            os.path.join(src_sample_dir, fastq_file)
                            for fastq_file in seqrun_obj.fastq_files
                        ]
                        seqrun_dst_dir = os.path.join(project_obj.base_path,
                                                      "DATA",
                                                      project_obj.dirname,
                                                      sample_obj.dirname,
                                                      libprep_obj.dirname,
                                                      seqrun_obj.dirname)
                        LOG.info(
                            "Symlinking fastq files from {} to {}...".format(
                                src_sample_dir, seqrun_dst_dir))
                        try:
                            do_symlink(src_fastq_files, seqrun_dst_dir)
                        except OSError:
                            error_text = (
                                'Could not symlink files for project/sample'
                                'libprep/seqrun {}/{}/{}/{}'.format(
                                    project_obj, sample_obj, libprep_obj,
                                    seqrun_obj))
                            LOG.error(error_text)
                            if not config.get('quiet'):
                                mail_analysis(project_name=project_name,
                                              sample_name=sample_name,
                                              level="ERROR",
                                              info_text=error_text)
                            continue
    return projects_to_analyze
Beispiel #20
0
def update_charon_with_local_jobs_status(quiet=False, config=None, config_file_path=None):
    """Check the status of all locally-tracked jobs and update Charon accordingly.
    """
    if quiet and not config.get("quiet"):
        config['quiet'] = True
    LOG.info("Updating Charon with the status of all locally-tracked jobs...")
    multiqc_projects=set()
    with get_db_session() as session:
        charon_session = CharonSession()
        for sample_entry in session.query(SampleAnalysis).all():
            # Local names
            workflow = sample_entry.workflow
            project_name = sample_entry.project_name
            project_id = sample_entry.project_id
            project_base_path = sample_entry.project_base_path
            sample_id = sample_entry.sample_id
            engine = sample_entry.engine
            # Only one of these id fields (slurm, pid) will have a value
            slurm_job_id = sample_entry.slurm_job_id
            process_id = sample_entry.process_id
            piper_exit_code = get_exit_code(workflow_name=workflow,
                                            project_base_path=project_base_path,
                                            project_name=project_name,
                                            project_id=project_id,
                                            sample_id=sample_id)
            label = "project/sample {}/{}".format(project_name, sample_id)

            if workflow not in ("merge_process_variantcall", "genotype_concordance",):
                LOG.error('Unknown workflow "{}" for {}; cannot update '
                          'Charon. Skipping sample.'.format(workflow, label))
                continue

            try:
                project_obj = create_project_obj_from_analysis_log(project_name,
                                                                   project_id,
                                                                   project_base_path,
                                                                   sample_id,
                                                                   workflow)
            except IOError as e: # analysis log file is missing!
                error_text = ('Could not find analysis log file! Cannot update '
                              'Charon for {} run {}/{}: {}'.format(workflow,
                                                                   project_id,
                                                                   sample_id,
                                                                   e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name,
                                  sample_name=sample_id,
                                  engine_name=engine,
                                  level="ERROR",
                                  info_text=error_text,
                                  workflow=workflow)
                continue
            try:
                if piper_exit_code == 0:
                    # 0 -> Job finished successfully
                    if workflow == "merge_process_variantcall":
                        sample_status_field = "analysis_status"
                        seqrun_status_field = "alignment_status"
                        set_status = "ANALYZED" # sample level
                    elif workflow == "genotype_concordance":
                        sample_status_field = seqrun_status_field = "genotype_status"
                        set_status = "DONE" # sample level
                    recurse_status = "DONE" # For the seqrun level
                    info_text = ('Workflow "{}" for {} finished succesfully. '
                                 'Recording status {} in Charon'.format(workflow,
                                                                        label,
                                                                        set_status))
                    LOG.info(info_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name,
                                      sample_name=sample_id,
                                      engine_name=engine,
                                      level="INFO",
                                      info_text=info_text,
                                      workflow=workflow)
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 **{sample_status_field: set_status})
                    recurse_status_for_sample(project_obj,
                                              status_field=seqrun_status_field,
                                              status_value=recurse_status,
                                              config=config)
                    # Job is only deleted if the Charon status update succeeds
                    session.delete(sample_entry)
                    #add project to MultiQC
                    multiqc_projects.add((project_base_path, project_id, project_name))


                    if workflow == "merge_process_variantcall":
                        # Parse seqrun output results / update Charon
                        # This is a semi-optional step -- failure here will send an
                        # email but not more than once. The record is still removed
                        # from the local jobs database, so this will have to be done
                        # manually if you want it done at all.
                        piper_qc_dir = os.path.join(project_base_path, "ANALYSIS",
                                                    project_id, "piper_ngi",
                                                    "02_preliminary_alignment_qc")
                        update_coverage_for_sample_seqruns(project_id, sample_id,
                                                           piper_qc_dir)
                        update_sample_duplication_and_coverage(project_id, sample_id,
                                                           project_base_path)

                        
                    elif workflow == "genotype_concordance":
                        piper_gt_dir = os.path.join(project_base_path, "ANALYSIS",
                                                    project_id, "piper_ngi",
                                                    "03_genotype_concordance")
                        try:
                            update_gtc_for_sample(project_id, sample_id, piper_gt_dir)
                        except (CharonError, IOError, ValueError) as e:
                            LOG.error(e)
                elif type(piper_exit_code) is int and piper_exit_code > 0:
                    # 1 -> Job failed
                    set_status = "FAILED"
                    error_text = ('Workflow "{}" for {} failed. Recording status '
                                  '{} in Charon.'.format(workflow, label, set_status))
                    LOG.error(error_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name,
                                      sample_name=sample_id,
                                      engine_name=engine,
                                      level="ERROR",
                                      info_text=error_text,
                                      workflow=workflow)
                    if workflow == "merge_process_variantcall":
                        sample_status_field = "analysis_status"
                        seqrun_status_field = "alignment_status"
                    elif workflow == "genotype_concordance":
                        sample_status_field = seqrun_status_field = "genotype_status"
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 **{sample_status_field: set_status})
                    recurse_status_for_sample(project_obj, status_field=seqrun_status_field,
                                              status_value=set_status, config=config)
                    # Job is only deleted if the Charon update succeeds
                    session.delete(sample_entry)
                else:
                    # None -> Job still running OR exit code was never written (failure)
                    JOB_FAILED = None
                    if slurm_job_id:
                        try:
                            slurm_exit_code = get_slurm_job_status(slurm_job_id)
                        except ValueError as e:
                            slurm_exit_code = 1
                        if slurm_exit_code is not None: # "None" indicates job is still running
                            JOB_FAILED = True
                    else:
                        if not psutil.pid_exists(process_id):
                            # Job did not write an exit code and is also not running
                            JOB_FAILED = True
                    if JOB_FAILED:
                        set_status = "FAILED"
                        error_text = ('No exit code found but job not running '
                                      'for {} / {}: setting status to {} in '
                                      'Charon'.format(label, workflow, set_status))
                        if slurm_job_id:
                            exit_code_file_path = \
                                create_exit_code_file_path(workflow_subtask=workflow,
                                                           project_base_path=project_base_path,
                                                           project_name=project_name,
                                                           project_id=project_id,
                                                           sample_id=sample_id)
                            error_text += (' (slurm job id "{}", exit code file path '
                                           '"{}")'.format(slurm_job_id, exit_code_file_path))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project_name,
                                          sample_name=sample_id,
                                          engine_name=engine, level="ERROR",
                                          info_text=error_text,
                                          workflow=workflow)
                        if workflow == "merge_process_variantcall":
                            sample_status_field = "analysis_status"
                            seqrun_status_field = "alignment_status"
                        elif workflow == "genotype_concordance":
                            sample_status_field = seqrun_status_field = "genotype_status"
                        charon_session.sample_update(projectid=project_id,
                                                     sampleid=sample_id,
                                                     **{sample_status_field: set_status})
                        recurse_status_for_sample(project_obj,
                                                  status_field=seqrun_status_field,
                                                  status_value=set_status,
                                                  config=config)
                        # Job is only deleted if the Charon update succeeds
                        LOG.debug("Deleting local entry {}".format(sample_entry))
                        session.delete(sample_entry)
                    else: # Job still running
                        set_status = "UNDER_ANALYSIS"
                        if workflow == "merge_process_variantcall":
                            sample_status_field = "analysis_status"
                            seqrun_status_field = "alignment_status"
                            recurse_status = "RUNNING"
                        elif workflow == "genotype_concordance":
                            sample_status_field = seqrun_status_field = "genotype_status"
                            recurse_status = "UNDER_ANALYSIS"
                        try:
                            remote_sample=charon_session.sample_get(projectid=project_id, sampleid=sample_id)
                            charon_status = remote_sample.get(sample_status_field)
                            if charon_status and not charon_status == set_status:
                                LOG.warning('Tracking inconsistency for {}: Charon status '
                                         'for field "{}" is "{}" but local process tracking '
                                         'database indicates it is running. Setting value '
                                         'in Charon to {}.'.format(label, sample_status_field,
                                                                   charon_status, set_status))
                                charon_session.sample_update(projectid=project_id,
                                                             sampleid=sample_id,
                                                             **{sample_status_field: set_status})
                                recurse_status_for_sample(project_obj,
                                                          status_field=seqrun_status_field,
                                                          status_value=recurse_status,
                                                          config=config)
                        except CharonError as e:
                            error_text = ('Unable to update/verify Charon '
                                          'for {}: {}'.format(label, e))
                            LOG.error(error_text)
                            if not config.get('quiet'):
                                mail_analysis(project_name=project_name, sample_name=sample_id,
                                              engine_name=engine, level="ERROR",
                                              workflow=workflow, info_text=error_text)
            except CharonError as e:
                error_text = ('Unable to update Charon for {}: '
                              '{}'.format(label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="ERROR",
                                  workflow=workflow, info_text=error_text)
            except OSError as e:
                error_text = ('Permissions error when trying to update Charon '
                              '"{}" status for "{}": {}'.format(workflow, label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="ERROR",
                                  workflow=workflow, info_text=error_text)
        session.commit()
    #Run Multiqc
    for pj_tuple in multiqc_projects:
        LOG.info("Running MultiQC on project {}".format(pj_tuple[1]))
        run_multiqc(pj_tuple[0], pj_tuple[1], pj_tuple[2])
Beispiel #21
0
def update_charon_with_local_jobs_status(config=None, config_file_path=None):
    """Check the status of all locally-tracked jobs and update Charon accordingly.
    """
    LOG.info("Updating Charon with the status of all locally-tracked jobs...")
    with get_db_session() as session:
        charon_session = CharonSession()
        for sample_entry in session.query(SampleAnalysis).all():
            # Local names
            workflow = sample_entry.workflow
            project_name = sample_entry.project_name
            project_id = sample_entry.project_id
            project_base_path = sample_entry.project_base_path
            sample_id = sample_entry.sample_id
            engine=sample_entry.engine
            # Only one of these will have a value
            slurm_job_id = sample_entry.slurm_job_id
            process_id = sample_entry.process_id
            piper_exit_code = get_exit_code(workflow_name=workflow,
                                            project_base_path=project_base_path,
                                            project_name=project_name,
                                            project_id=project_id,
                                            sample_id=sample_id)
            label = "project/sample {}/{}".format(project_name, sample_id)

            try:
                project_obj = create_project_obj_from_analysis_log(project_name,
                                                                   project_id,
                                                                   project_base_path,
                                                                   sample_id,
                                                                   workflow)
            except IOError as e: # analysis log file is missing!
                error_text = ('Could not find analysis log file! Cannot update '
                              'Charon for sample run {}/{}: {}'.format(project_id,
                                                                   sample_id,
                                                                   e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                              engine_name=engine, level="ERROR", info_text=error_text)
                continue
            try:
                if piper_exit_code and piper_exit_code == 0:
                    # 0 -> Job finished successfully
                    set_status = "ANALYZED"
                    info_text = ('Workflow "{}" for {} finished succesfully. '
                                 'Recording status {} in Charon'.format(workflow, label,
                                                                        set_status))
                    LOG.info(info_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="INFO", info_text=info_text)
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 analysis_status=set_status)
                    recurse_status="DONE"
                    recurse_status_for_sample(project_obj, recurse_status)
                    # Job is only deleted if the Charon status update succeeds
                    session.delete(sample_entry)
                    # Parse seqrun output results / update Charon
                    # This is a semi-optional step -- failure here will send an
                    # email but not more than once. The record is still removed
                    # from the local jobs database, so this will have to be done
                    # manually if you want it done at all.
                    piper_qc_dir = os.path.join(project_base_path, "ANALYSIS",
                                                project_id,"piper_ngi",  "02_preliminary_alignment_qc")
                    update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir)
                elif piper_exit_code and piper_exit_code >0:
                    # 1 -> Job failed
                    set_status = "FAILED"
                    error_text = ('Workflow "{}" for {} failed. Recording status '
                                 '{} in Charon.'.format(workflow, label, set_status))
                    LOG.error(error_text)
                    if not config.get('quiet'):
                        mail_analysis(project_name=project_name, sample_name=sample_id,
                                  engine_name=engine, level="ERROR", info_text=error_text)
                    charon_session.sample_update(projectid=project_id,
                                                 sampleid=sample_id,
                                                 analysis_status=set_status)
                    recurse_status_for_sample(project_obj, set_status)
                    # Job is only deleted if the Charon update succeeds
                    session.delete(sample_entry)
                else:
                    # None -> Job still running OR exit code was never written (failure)
                    JOB_FAILED = None
                    if slurm_job_id:
                        try:
                            slurm_exit_code = get_slurm_job_status(slurm_job_id)
                        except ValueError as e:
                            slurm_exit_code = 1
                        if slurm_exit_code is not None: # "None" indicates job is still running
                            JOB_FAILED = True
                    else:
                        if not psutil.pid_exists(process_id):
                            # Job did not write an exit code and is also not running
                            JOB_FAILED = True
                    if JOB_FAILED:
                        set_status = "FAILED"
                        error_text = ('No exit code found but job not running for '
                                      '{}: setting status to {} in Charon'.format(label, set_status))
                        LOG.error(error_text)
                        if not config.get('quiet'):
                            mail_analysis(project_name=project_name, sample_name=sample_id,
                                      engine_name=engine, level="ERROR", info_text=error_text)
                        charon_session.sample_update(projectid=project_id,
                                                     sampleid=sample_id,
                                                     analysis_status=set_status)
                        recurse_status_for_sample(project_obj, set_status)
                        # Job is only deleted if the Charon update succeeds
                        LOG.debug("Deleting local entry {}".format(sample_entry))
                        session.delete(sample_entry)
                    else: # Job still running
                        charon_status = charon_session.sample_get(projectid=project_id,
                                                                  sampleid=sample_id)['analysis_status']
                        if not charon_status == "UNDER_ANALYSIS":
                            set_status = "UNDER_ANALYSIS"
                            LOG.warn('Tracking inconsistency for {}: Charon status is "{}" but '
                                     'local process tracking database indicates it is running. '
                                     'Setting value in Charon to {}.'.format(label, charon_status,
                                                                             set_status))
                            charon_session.sample_update(projectid=project_id,
                                                         sampleid=sample_id,
                                                         analysis_status=set_status)
                            recurse_status_for_sample(project_obj, "RUNNING")
            except CharonError as e:
                error_text = ('Unable to update Charon status for "{}": {}'.format(label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                              engine_name=engine, level="ERROR", info_text=error_text)
            except OSError as e:
                error_text = ('Permissions error when trying to update Charon '
                              'status for "{}": {}'.format(label, e))
                LOG.error(error_text)
                if not config.get('quiet'):
                    mail_analysis(project_name=project_name, sample_name=sample_id,
                              engine_name=engine, level="ERROR", info_text=error_text)
        session.commit()
Beispiel #22
0
def record_process_sample(project, sample, workflow_subtask, analysis_module_name,
                          process_id=None, slurm_job_id=None, config=None, config_file_path=None):
    LOG.info('Recording slurm job id "{}" for project "{}", sample "{}", '
             'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask))
    with get_db_session() as session:
        sample_db_obj = SampleAnalysis(project_id=project.project_id,
                                       project_name=project.name,
                                       project_base_path=project.base_path,
                                       sample_id=sample.name,
                                       engine=analysis_module_name,
                                       workflow=workflow_subtask,
                                       process_id=process_id,
                                       slurm_job_id=slurm_job_id)
        try:
            session.add(sample_db_obj)
            for attempts in range(3):
                try:
                    session.commit()
                    LOG.info('Successfully recorded slurm job id "{}" for project "{}", sample "{}", '
                             'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask))
                    break
                except OperationalError as e:
                    LOG.warning('Database locked ("{}"). Waiting...'.format(e))
                    time.sleep(15)
            else:
                raise RuntimeError("Could not write to database after three attempts (locked?)")
        except (IntegrityError, RuntimeError) as e:
            raise RuntimeError('Could not record slurm job id "{}" for project "{}", '
                               'sample "{}", workflow "{}": {}'.format(slurm_job_id,
                                                                       project,
                                                                       sample,
                                                                       workflow_subtask,
                                                                       e.message))
        extra_args = None
        if workflow_subtask == "merge_process_variantcall":
            sample_status_field = "analysis_status"
            sample_status_value = "UNDER_ANALYSIS"
            sample_data_status_field = "status"
            sample_data_status_value = '' #in his way it will not be updated
            seqrun_status_field = "alignment_status"
            seqrun_status_value = "RUNNING"
            extra_args = {"mean_autosomal_coverage": 0}
        elif workflow_subtask == "genotype_concordance":
            sample_status_field = seqrun_status_field = "genotype_status"
            sample_status_value = seqrun_status_value = "UNDER_ANALYSIS"
            sample_data_status_field = "status"
            sample_data_status_value = "STALE"
        else:
            raise ValueError('Charon field for workflow "{}" unknown; '
                             'cannot update Charon.'.format(workflow_subtask))
        try:
            LOG.info('Updating Charon status for project/sample '
                     '{}/{} key : {} value : {}'.format(project, sample, sample_status_field, sample_status_value))
            CharonSession().sample_update(projectid=project.project_id,
                                          sampleid=sample.name,
                                          **{sample_status_field: sample_status_value,
                                              sample_data_status_field: sample_data_status_value})
            project_obj = create_project_obj_from_analysis_log(project.name,
                                                               project.project_id,
                                                               project.base_path,
                                                               sample.name,
                                                               workflow_subtask)
            recurse_status_for_sample(project_obj,
                                      status_field=seqrun_status_field,
                                      status_value=seqrun_status_value,
                                      extra_args=extra_args,
                                      config=config)
        except CharonError as e:
            error_text = ('Could not update Charon status for project/sample '
                          '{}/{} due to error: {}'.format(project, sample, e))

            LOG.error(error_text)
            if not config.get('quiet'):
                mail_analysis(project_name=project.project_id,
                              sample_name=sample.name,
                              engine_name='piper_ngi',
                              level="ERROR",
                              info_text=error_text,
                              workflow=workflow_subtask)
def record_process_sample(project, sample, workflow_subtask, analysis_module_name,
                          process_id=None, slurm_job_id=None, config=None, config_file_path=None):
    LOG.info('Recording slurm job id "{}" for project "{}", sample "{}", '
             'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask))
    with get_db_session() as session:
        sample_db_obj = SampleAnalysis(project_id=project.project_id,
                                       project_name=project.name,
                                       project_base_path=project.base_path,
                                       sample_id=sample.name,
                                       engine=analysis_module_name,
                                       workflow=workflow_subtask,
                                       process_id=process_id,
                                       slurm_job_id=slurm_job_id)
        try:
            session.add(sample_db_obj)
            for attempts in range(3):
                try:
                    session.commit()
                    LOG.info('Successfully recorded slurm job id "{}" for project "{}", sample "{}", '
                             'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask))
                    break
                except OperationalError as e:
                    LOG.warn('Database locked ("{}"). Waiting...'.format(e))
                    time.sleep(15)
            else:
                raise RuntimeError("Could not write to database after three attempts (locked?)")
        except (IntegrityError, RuntimeError) as e:
            raise RuntimeError('Could not record slurm job id "{}" for project "{}", '
                               'sample "{}", workflow "{}": {}'.format(slurm_job_id,
                                                                       project,
                                                                       sample,
                                                                       workflow_subtask,
                                                                       e.message))
        extra_args = None
        if workflow_subtask == "merge_process_variantcall":
            sample_status_field = "analysis_status"
            sample_status_value = "UNDER_ANALYSIS"
            seqrun_status_field = "alignment_status"
            seqrun_status_value = "RUNNING"
            extra_args = {"mean_autosomal_coverage": 0}
        elif workflow_subtask == "genotype_concordance":
            sample_status_field = seqrun_status_field = "genotype_status"
            sample_status_value = seqrun_status_value = "UNDER_ANALYSIS"
        else:
            raise ValueError('Charon field for workflow "{}" unknown; '
                             'cannot update Charon.'.format(workflow_subtask))
        try:
            LOG.info('Updating Charon status for project/sample '
                     '{}/{} key : {} value : {}'.format(project, sample, sample_status_field, sample_status_value))
            CharonSession().sample_update(projectid=project.project_id,
                                          sampleid=sample.name,
                                          **{sample_status_field: sample_status_value})
            project_obj = create_project_obj_from_analysis_log(project.name,
                                                               project.project_id,
                                                               project.base_path,
                                                               sample.name,
                                                               workflow_subtask)
            recurse_status_for_sample(project_obj,
                                      status_field=seqrun_status_field,
                                      status_value=seqrun_status_value,
                                      extra_args=extra_args,
                                      config=config)
        except CharonError as e:
            error_text = ('Could not update Charon status for project/sample '
                          '{}/{} due to error: {}'.format(project, sample, e))

            LOG.error(error_text)
            if not config.get('quiet'):
                mail_analysis(project_name=project.project_id,
                              sample_name=sample.name,
                              engine_name='piper_ngi',
                              level="ERROR",
                              info_text=error_text,
                              workflow=workflow_subtask)
Beispiel #24
0
def launch_analysis(projects_to_analyze, restart_failed_jobs=False,
                    restart_finished_jobs=False, restart_running_jobs=False,
                    keep_existing_data=False, no_qc=False, exec_mode="sbatch",
                    quiet=False, manual=False, config=None, config_file_path=None,
                    generate_bqsr_bam=False):
    """Launch the appropriate analysis for each fastq file in the project.

    :param list projects_to_analyze: The list of projects (Project objects) to analyze
    :param dict config: The parsed NGI configuration file; optional/has default.
    :param str config_file_path: The path to the NGI configuration file; optional/has default.
    """
    charon_session = CharonSession()
    for project in projects_to_analyze:
        analysis=NGIAnalysis(project=project, restart_failed_jobs=restart_failed_jobs,
                    restart_finished_jobs=restart_finished_jobs,
                    restart_running_jobs=restart_running_jobs,
                    keep_existing_data=keep_existing_data, no_qc=no_qc,
                    exec_mode=exec_mode, quiet=quiet, manual=manual,
                    config=config, config_file_path=config_file_path,
                    generate_bqsr_bam=generate_bqsr_bam, log=LOG)
        #update charon with the current analysis status
        analysis.engine.local_process_tracking.update_charon_with_local_jobs_status(config=config)
        try:
            project_status = charon_session.project_get(project.project_id)['status']
        except CharonError as e:
            LOG.error('Project {} could not be processed: {}'.format(project, e))
            continue
        if not project_status == "OPEN":
            error_text = ('Data found on filesystem for project "{}" but Charon '
                          'reports its status is not OPEN ("{}"). Not launching '
                          'analysis for this project.'.format(project, project_status))
            LOG.error(error_text)
            if not config.get('quiet'):
                mail_analysis(project_name=project.name, level="ERROR", info_text=error_text)
            continue
        try:
            analysis_module = get_engine_for_bp(project)
        except (RuntimeError, CharonError) as e: # BPA missing from Charon?
            LOG.error('Skipping project "{}" because of error: {}'.format(project, e))
            continue
        if not no_qc:
            try:
                qc_analysis_module = load_engine_module("qc", config)
            except RuntimeError as e:
                LOG.error("Could not launch qc analysis: {}".format(e))
        for sample in project:
            # Launch QC analysis
            if not no_qc:
                try:
                    LOG.info('Attempting to launch sample QC analysis '
                             'for project "{}" / sample "{}" / engine '
                             '"{}"'.format(project, sample, qc_analysis_module.__name__))
                    qc_analysis_module.analyze(project=project,
                                               sample=sample,
                                               config=config)
                except Exception as e:
                    error_text = ('Cannot process project "{}" / sample "{}" / '
                                  'engine "{}" : {}'.format(project, sample,
                                                            analysis_module.__name__,
                                                            e))
                    LOG.error(error_text)
                    if not config.get("quiet"):
                        mail_analysis(project_name=project.name, sample_name=sample.name,
                                      engine_name=analysis_module.__name__,
                                      level="ERROR", info_text=e)
            # Launch actual best-practice analysis
        analysis.engine.analyze(analysis)
Beispiel #25
0
def setup_analysis_directory_structure(fc_dir, projects_to_analyze,
                                       restrict_to_projects=None, restrict_to_samples=None,
                                       create_files=True,
                                       fallback_libprep=None,
                                       quiet=False,
                                       config=None, config_file_path=None):
    """
    Copy and sort files from their CASAVA-demultiplexed flowcell structure
    into their respective project/sample/libPrep/FCIDs. This collects samples
    split across multiple flowcells.

    :param str fc_dir: The directory created by CASAVA for this flowcell.
    :param dict config: The parsed configuration file.
    :param set projects_to_analyze: A dict (of Project objects, or empty)
    :param bool create_files: Alter the filesystem (as opposed to just parsing flowcells) (default True)
    :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None)
    :param list restrict_to_projects: Specific projects within the flowcell to process exclusively
    :param list restrict_to_samples: Specific samples within the flowcell to process exclusively

    :returns: A list of NGIProject objects that need to be run through the analysis pipeline
    :rtype: list

    :raises KeyError: If a required configuration key is not available.
    """
    LOG.info("Setting up analysis for demultiplexed data in source folder \"{}\"".format(fc_dir))
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    config["quiet"] = quiet # Hack because I enter here from a script sometimes
    pattern="(.+(?:{}|{}))\/.+".format(config["analysis"]["sthlm_root"], config["analysis"]["upps_root"])
    matches=re.match(pattern, fc_dir)
    if matches:
        flowcell_root=matches.group(1)
    else:
        LOG.error("cannot guess which project the flowcell {} belongs to".format(fc_dir))
        raise RuntimeError

    analysis_top_dir = os.path.abspath(os.path.join(flowcell_root,config["analysis"]["top_dir"]))
    try:
        safe_makedir(analysis_top_dir)
    except OSError as e:
        LOG.error('Error: Analysis top directory {} does not exist and could not '
                  'be created.'.format(analysis_top_dir))
    fc_dir = fc_dir if os.path.isabs(fc_dir) else os.path.join(analysis_top_dir, fc_dir)
    if not os.path.exists(fc_dir):
        LOG.error("Error: Flowcell directory {} does not exist".format(fc_dir))
        return []
    # Map the directory structure for this flowcell
    try:
        fc_dir_structure = parse_flowcell(fc_dir)
    except (OSError, ValueError) as e:
        LOG.error("Error when processing flowcell dir \"{}\": {}".format(fc_dir, e))
        return []
    fc_full_id = fc_dir_structure['fc_full_id']
    if not fc_dir_structure.get('projects'):
        LOG.warn("No projects found in specified flowcell directory \"{}\"".format(fc_dir))
    # Iterate over the projects in the flowcell directory
    for project in fc_dir_structure.get('projects', []):
        project_name = project['project_name']
        project_original_name = project['project_original_name']
        samplesheet_path = fc_dir_structure.get("samplesheet_path")
        try:
            # Maps e.g. "Y.Mom_14_01" to "P123"
            project_id = get_project_id_from_name(project_name)
        except (CharonError, RuntimeError, ValueError) as e:
            LOG.warn('Could not retrieve project id from Charon (record missing?). '
                     'Using project name ("{}") as project id '
                     '(error: {})'.format(project_name, e))
            project_id = project_name
        # If specific projects are specified, skip those that do not match
        if restrict_to_projects and project_name not in restrict_to_projects and \
                                    project_id not in restrict_to_projects:
            LOG.debug("Skipping project {} (not in restrict_to_projects)".format(project_name))
            continue
        LOG.info("Setting up project {}".format(project.get("project_name")))
        # Create a project directory if it doesn't already exist, including
        # intervening "DATA" directory
        project_dir = os.path.join(analysis_top_dir, "DATA", project_id)
        project_sl_dir = os.path.join(analysis_top_dir, "DATA", project_name)
        project_analysis_dir = os.path.join(analysis_top_dir, "ANALYSIS", project_id)
        project_analysis_sl_dir = os.path.join(analysis_top_dir, "ANALYSIS", project_name)
        if create_files:
            safe_makedir(project_dir, 0o2770)
            safe_makedir(project_analysis_dir, 0o2770)
            if not project_dir == project_sl_dir and \
               not os.path.exists(project_sl_dir):
                os.symlink(project_dir, project_sl_dir)
            if not project_analysis_dir == project_analysis_sl_dir and \
               not os.path.exists(project_analysis_sl_dir):
                os.symlink(project_analysis_dir, project_analysis_sl_dir)
        try:
            project_obj = projects_to_analyze[project_dir]
        except KeyError:
            project_obj = NGIProject(name=project_name, dirname=project_id,
                                     project_id=project_id,
                                     base_path=analysis_top_dir)
            projects_to_analyze[project_dir] = project_obj
        # Iterate over the samples in the project
        for sample in project.get('samples', []):
            sample_name = sample['sample_name']
            # If specific samples are specified, skip those that do not match
            if restrict_to_samples and sample_name not in restrict_to_samples:
                LOG.debug("Skipping sample {}: not in specified samples "
                          "{}".format(sample_name, ", ".join(restrict_to_samples)))
                continue
            LOG.info("Setting up sample {}".format(sample_name))
            # Create a directory for the sample if it doesn't already exist
            sample_dir = os.path.join(project_dir, sample_name)
            if create_files: safe_makedir(sample_dir, 0o2770)
            # This will only create a new sample object if it doesn't already exist in the project
            sample_obj = project_obj.add_sample(name=sample_name, dirname=sample_name)
            # Get the Library Prep ID for each file
            pattern = re.compile(".*\.(fastq|fq)(\.gz|\.gzip|\.bz2)?$")
            fastq_files = filter(pattern.match, sample.get('files', []))
            # For each fastq file, create the libprep and seqrun objects
            # and add the fastq file to the seqprep object
            # Note again that these objects only get created if they don't yet exist;
            # if they do exist, the existing object is returned
            for fq_file in fastq_files:
                # Try to parse from SampleSheet
                try:
                    if not samplesheet_path: raise ValueError()
                    lane_num = re.match(r'[\w-]+_L\d{2}(\d)_\w+', fq_file).groups()[0]
                    libprep_name = determine_library_prep_from_samplesheet(samplesheet_path,
                                                                           project_original_name,
                                                                           sample_name,
                                                                           lane_num)
                except (IndexError, ValueError) as e:
                    LOG.debug('Unable to determine library prep from sample sheet file '
                              '("{}"); try to determine from Charon'.format(e))
                    try:
                        # Requires Charon access
                        libprep_name = determine_library_prep_from_fcid(project_id, sample_name, fc_full_id)
                        LOG.debug('Found libprep name "{}" in Charon'.format(libprep_name))
                    except ValueError:
                        charon_session = CharonSession()
                        libpreps = charon_session.sample_get_libpreps(project_id, sample_name).get('libpreps')
                        if len(libpreps) == 1:
                            libprep_name = libpreps[0].get('libprepid')
                            LOG.warn('Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" '
                                     'has no libprep information in Charon, but only one '
                                     'library prep is present in Charon ("{}"). Using '
                                     'this as the library prep.'.format(project_name,
                                                                        sample_name,
                                                                        fc_full_id,
                                                                        fq_file,
                                                                        libprep_name))
                        elif fallback_libprep:
                            libprep_name = fallback_libprep
                            LOG.warn('Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" '
                                     'has no libprep information in Charon, but a fallback '
                                     'libprep value of "{}" was supplied -- using this '
                                     'value.'.format(project_name,
                                                     sample_name,
                                                     fc_full_id,
                                                     fq_file,
                                                     libprep_name,
                                                     fallback_libprep))
                        else:
                            error_text = ('Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" '
                                          'has no libprep information in Charon. Skipping '
                                          'analysis.'.format(project_name, sample_name,
                                                             fc_full_id, fq_file))
                            LOG.error(error_text)
                            if not config.get('quiet'):
                                mail_analysis(project_name=project_name,
                                              sample_name=sample_name,
                                              level="ERROR",
                                              info_text=error_text)
                            continue
                libprep_object = sample_obj.add_libprep(name=libprep_name,
                                                        dirname=libprep_name)
                libprep_dir = os.path.join(sample_dir, libprep_name)
                if create_files: safe_makedir(libprep_dir, 0o2770)
                seqrun_object = libprep_object.add_seqrun(name=fc_full_id,
                                                          dirname=fc_full_id)
                seqrun_dir = os.path.join(libprep_dir, fc_full_id)
                if create_files: safe_makedir(seqrun_dir, 0o2770)
                seqrun_object.add_fastq_files(fq_file)
            if fastq_files and create_files:
                src_sample_dir = os.path.join(fc_dir_structure['fc_dir'],
                                              project['data_dir'],
                                              project['project_dir'],
                                              sample['sample_dir'])
                for libprep_obj in sample_obj:
                    for seqrun_obj in libprep_obj:
                        src_fastq_files = [os.path.join(src_sample_dir, fastq_file) for
                                           fastq_file in seqrun_obj.fastq_files]
                        seqrun_dst_dir = os.path.join(project_obj.base_path, project_obj.dirname,
                                                      sample_obj.dirname, libprep_obj.dirname,
                                                      seqrun_obj.dirname)
                        LOG.info("Symlinking fastq files from {} to {}...".format(src_sample_dir, seqrun_dir))
                        try:
                            do_symlink(src_fastq_files, seqrun_dir)
                        except OSError:
                            error_text = ('Could not symlink files for project/sample'
                                          'libprep/seqrun {}/{}/{}/{}'.format(project_obj,
                                                                              sample_obj,
                                                                              libprep_obj,
                                                                              seqrun_obj))
                            LOG.error(error_text)
                            if not config.get('quiet'):
                                mail_analysis(project_name=project_name,
                                              sample_name=sample_name,
                                              level="ERROR",
                                              info_text=error_text)
                            continue
    return projects_to_analyze
Beispiel #26
0
def launch_analysis(projects_to_analyze,
                    restart_failed_jobs=False,
                    restart_finished_jobs=False,
                    restart_running_jobs=False,
                    keep_existing_data=False,
                    no_qc=False,
                    exec_mode="sbatch",
                    quiet=False,
                    manual=False,
                    config=None,
                    config_file_path=None,
                    generate_bqsr_bam=False):
    """Launch the appropriate analysis for each fastq file in the project.

    :param list projects_to_analyze: The list of projects (Project objects) to analyze
    :param dict config: The parsed NGI configuration file; optional/has default.
    :param str config_file_path: The path to the NGI configuration file; optional/has default.
    """
    charon_session = CharonSession()
    for project in projects_to_analyze:
        analysis = NGIAnalysis(project=project,
                               restart_failed_jobs=restart_failed_jobs,
                               restart_finished_jobs=restart_finished_jobs,
                               restart_running_jobs=restart_running_jobs,
                               keep_existing_data=keep_existing_data,
                               no_qc=no_qc,
                               exec_mode=exec_mode,
                               quiet=quiet,
                               manual=manual,
                               config=config,
                               config_file_path=config_file_path,
                               generate_bqsr_bam=generate_bqsr_bam,
                               log=LOG)
        #update charon with the current analysis status
        analysis.engine.local_process_tracking.update_charon_with_local_jobs_status(
            config=config)
        try:
            project_status = charon_session.project_get(
                project.project_id)['status']
        except CharonError as e:
            LOG.error('Project {} could not be processed: {}'.format(
                project, e))
            continue
        if not project_status == "OPEN":
            error_text = (
                'Data found on filesystem for project "{}" but Charon '
                'reports its status is not OPEN ("{}"). Not launching '
                'analysis for this project.'.format(project, project_status))
            LOG.error(error_text)
            if not config.get('quiet'):
                mail_analysis(project_name=project.name,
                              level="ERROR",
                              info_text=error_text)
            continue
        try:
            analysis_module = get_engine_for_bp(project)
        except (RuntimeError, CharonError) as e:  # BPA missing from Charon?
            LOG.error('Skipping project "{}" because of error: {}'.format(
                project, e))
            continue
        if not no_qc:
            try:
                qc_analysis_module = load_engine_module("qc", config)
            except RuntimeError as e:
                LOG.error("Could not launch qc analysis: {}".format(e))
        for sample in project:
            # Launch QC analysis
            if not no_qc:
                try:
                    LOG.info('Attempting to launch sample QC analysis '
                             'for project "{}" / sample "{}" / engine '
                             '"{}"'.format(project, sample,
                                           qc_analysis_module.__name__))
                    qc_analysis_module.analyze(project=project,
                                               sample=sample,
                                               config=config)
                except Exception as e:
                    error_text = (
                        'Cannot process project "{}" / sample "{}" / '
                        'engine "{}" : {}'.format(project, sample,
                                                  analysis_module.__name__, e))
                    LOG.error(error_text)
                    if not config.get("quiet"):
                        mail_analysis(project_name=project.name,
                                      sample_name=sample.name,
                                      engine_name=analysis_module.__name__,
                                      level="ERROR",
                                      info_text=e)
            # Launch actual best-practice analysis
        analysis.engine.analyze(analysis)