def test_mail_analysis_info(self, mock_trace, mock_mail): """Send info email""" mock_trace.return_value = [('some_file.py', 42, '<module>', 'some_function()'), ()] mail_analysis(project_name=self.project_name, sample_name=self.sample_name, engine_name=self.engine_name, level='INFO', info_text='Some information', workflow=self.workflow, config_file_path='data/test_ngi_config_minimal.yaml') mock_mail.assert_called_once_with(origin='*****@*****.**', recipient='some_user@some_email.com', subject='[S.One_15_01] [Some Workflow] [INFO] analysis information / status update', text=('Get a load of this:\n' 'Project: S.One_15_01\n' 'Sample: P1155_101\n' 'Engine: piper_ngi\n' 'Workflow: Some Workflow\n' 'File: some_file.py\n' 'Line: 42\n\n' 'Additional information:\n\n' 'Some information\n'))
def recurse_status_for_sample(project_obj, set_status, update_done=False): """Set seqruns under sample to have status "set_status" """ charon_session = CharonSession() project_id = project_obj.project_id for sample_obj in project_obj: # There's only one sample but this is an iterator sample_id = sample_obj.name for libprep_obj in sample_obj: libprep_id = libprep_obj.name for seqrun_obj in libprep_obj: seqrun_id = seqrun_obj.name label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) LOG.info(('Updating status of project/sample/libprep/seqrun ' '"{}" to "{}" in Charon ').format(label, set_status)) try: charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, alignment_status=set_status) except CharonError as e: error_text =('Could not update status of project/sample/libprep/seqrun ' '"{}" in Charon to "{}": {}'.format(label, set_status, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_obj.name, level="ERROR", info_text=error_text)
def update_analysis(project_id, status): charon_session = CharonSession() mail_analysis(project_id, engine_name='rna_ngi', level='INFO' if status else 'ERROR') new_sample_status = 'ANALYZED' if status else 'FAILED' new_seqrun_status = 'DONE' if status else 'FAILED' for sample in charon_session.project_get_samples(project_id).get( "samples", {}): if sample.get('analysis_status') == "UNDER_ANALYSIS": LOG.info("Marking analysis of sample {}/{} as {}".format( project_id, sample.get('sampleid'), new_sample_status)) charon_session.sample_update(project_id, sample.get('sampleid'), analysis_status=new_sample_status) for libprep in charon_session.sample_get_libpreps( project_id, sample.get('sampleid')).get('libpreps', {}): if libprep.get('qc') != 'FAILED': for seqrun in charon_session.libprep_get_seqruns( project_id, sample.get('sampleid'), libprep.get('libprepid')).get('seqruns', {}): if seqrun.get('alignment_status') == "RUNNING": LOG.info( "Marking analysis of seqrun {}/{}/{}/{} as {}". format(project_id, sample.get('sampleid'), libprep.get('libprepid'), seqrun.get('seqrunid'), new_seqrun_status)) charon_session.seqrun_update( project_id, sample.get('sampleid'), libprep.get('libprepid'), seqrun.get('seqrunid'), alignment_status=new_seqrun_status)
def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir): """Find all the valid seqruns for a particular sample, parse their qualimap output files, and update Charon with the mean autosomal coverage for each. :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing) :param str sample_id: The sample name (e.g. P1170_105) :raises OSError: If the qc path specified is missing or otherwise inaccessible :raises RuntimeError: If you specify both the seqrun_id and fcid and they don't match :raises ValueError: If arguments are incorrect """ seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id) charon_session = CharonSession() for libprep_id, seqruns in seqruns_by_libprep.iteritems(): for seqrun_id in seqruns: label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) ma_coverage = _parse_mean_coverage_from_qualimap(piper_qc_dir, sample_id, seqrun_id) LOG.info('Updating project/sample/libprep/seqrun "{}" in ' 'Charon with mean autosomal coverage "{}"'.format(label, ma_coverage)) try: charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, mean_autosomal_coverage=ma_coverage) except CharonError as e: error_text = ('Could not update project/sample/libprep/seqrun "{}" ' 'in Charon with mean autosomal coverage ' '"{}": {}'.format(label, ma_coverage, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name="piper_ngi", level="ERROR", info_text=error_text)
def test_mail_analysis_error(self, mock_trace, mock_mail): """Send error email""" mock_trace.return_value = [('some_file.py', 42, '<module>', 'some_function()'), ()] mail_analysis(project_name=self.project_name, sample_name=self.sample_name, engine_name=self.engine_name, level='ERROR', info_text='Error: some error', workflow=self.workflow, config_file_path='data/test_ngi_config_minimal.yaml') mock_mail.assert_called_once_with(origin='*****@*****.**', recipient='some_user@some_email.com', subject='[S.One_15_01] [Some Workflow] [ERROR] analysis intervention required', text=('This analysis has encountered an error:\n' 'Project: S.One_15_01\n' 'Sample: P1155_101\n' 'Engine: piper_ngi\n' 'Workflow: Some Workflow\n' 'File: some_file.py\n' 'Line: 42\n\n' 'Additional information:\n\n' 'Error: some error\n'))
def update_sample_duplication_and_coverage(project_id, sample_id, project_base_path, config=None, config_file_path=None): """Update Charon with the duplication rates for said sample. :param str project_base_path: The path to the project dir :param str sample_id: The sample name (e.g. P1170_105) """ dup_file_path = os.path.join(project_base_path, 'ANALYSIS', project_id, 'piper_ngi', '05_processed_alignments', "{}.metrics".format(sample_id)) genome_results_file_path = os.path.join( project_base_path, 'ANALYSIS', project_id, 'piper_ngi', '06_final_alignment_qc', "{}.clean.dedup.qc".format(sample_id), "genome_results.txt") try: dup_pc = parse_deduplication_percentage(dup_file_path) except: dup_pc = 0 LOG.error( "Cannot find {}.metrics file for duplication rate at {}. Continuing." .format(sample_id, dup_file_path)) try: cov = parse_qualimap_coverage(genome_results_file_path) reads = parse_qualimap_reads(genome_results_file_path) except IOError as e: cov = 0 reads = 0 LOG.error( "Cannot find genome_results.txt file for sample coverage at {}. Continuing." .format(genome_results_file_path)) try: charon_session = CharonSession() charon_session.sample_update(projectid=project_id, sampleid=sample_id, duplication_pc=dup_pc, total_sequenced_reads=reads, total_autosomal_coverage=cov) LOG.info( 'Updating sample "{}" in ' 'Charon with mean duplication_percentage"{}" and autosomal coverage "{}"' .format(sample_id, dup_pc, cov)) except CharonError as e: error_text = ('Could not update project/sample "{}/{}" ' 'in Charon with duplication rate : {}' 'and coverage {}'.format("{}/{}".format( project_id, sampleid, dup_pc, cov))) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name="piper_ngi", level="ERROR", info_text=error_text)
def record_process_sample(project, sample, workflow_subtask, analysis_module_name, process_id=None, slurm_job_id=None, config=None): LOG.info('Recording slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask)) with get_db_session() as session: sample_db_obj = SampleAnalysis(project_id=project.project_id, project_name=project.name, project_base_path=project.base_path, sample_id=sample.name, engine=analysis_module_name, workflow=workflow_subtask, process_id=process_id, slurm_job_id=slurm_job_id) try: session.add(sample_db_obj) for attempts in range(3): try: session.commit() LOG.info('Successfully recorded slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask)) break except OperationalError as e: LOG.warn('Database locked ("{}"). Waiting...'.format(e)) time.sleep(15) else: raise RuntimeError("Could not write to database after three attempts (locked?)") except (IntegrityError, RuntimeError): raise RuntimeError('Could not record slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}": {}'.format(slurm_job_id, project, sample, workflow_subtask, e)) try: set_status = "UNDER_ANALYSIS" LOG.info(('Updating Charon status for project/sample ' '{}/{} to {}').format(project, sample, set_status)) CharonSession().sample_update(projectid=project.project_id, sampleid=sample.name, analysis_status=set_status) project_obj = create_project_obj_from_analysis_log(project.name, project.project_id, project.base_path, sample.name, workflow_subtask) recurse_status_for_sample(project_obj, "RUNNING") except CharonError as e: error_text = ('Could not update Charon status for project/sample ' '{}/{} due to error: {}'.format(project, sample, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name='piper_ngi', level="ERROR", info_text=error_text)
def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir, config=None, config_file_path=None): """Find all the valid seqruns for a particular sample, parse their qualimap output files, and update Charon with the mean autosomal coverage for each. :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing) :param str sample_id: The sample name (e.g. P1170_105) :raises OSError: If the qc path specified is missing or otherwise inaccessible :raises ValueError: If arguments are incorrect """ seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id) charon_session = CharonSession() for libprep_id, seqruns in seqruns_by_libprep.items(): for seqrun_id in seqruns: label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) genome_results_file_paths=glob.glob(os.path.join(piper_qc_dir, "{}.{}*.qc".format(sample_id, seqrun_id.split('_')[-1]),"genome_results.txt")) ma_coverage = parse_mean_coverage_from_qualimap(piper_qc_dir, sample_id, seqrun_id) reads=0 for path in genome_results_file_paths: try: reads += parse_qualimap_reads(path) except IOError as e : LOG.error("Cannot find the genome_results.txt file to get the number of reads in {}".format(path)) except : LOG.error("Error in handling the genome_results.txt file located at {}".format(path)) LOG.info('Updating project/sample/libprep/seqrun "{}" in ' 'Charon with mean autosomal coverage "{}" and total reads {}'.format(label, ma_coverage, reads)) try: charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, total_reads=reads, mean_autosomal_coverage=ma_coverage) except CharonError as e: error_text = ('Could not update project/sample/libprep/seqrun "{}" ' 'in Charon with mean autosomal coverage ' '"{}": {}'.format(label, ma_coverage, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name="piper_ngi", level="ERROR", info_text=error_text)
def update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir, config=None, config_file_path=None): """Find all the valid seqruns for a particular sample, parse their qualimap output files, and update Charon with the mean autosomal coverage for each. :param str piper_qc_dir: The path to the Piper qc dir (02_preliminary_alignment_qc at time of writing) :param str sample_id: The sample name (e.g. P1170_105) :raises OSError: If the qc path specified is missing or otherwise inaccessible :raises ValueError: If arguments are incorrect """ seqruns_by_libprep = get_finished_seqruns_for_sample(project_id, sample_id) charon_session = CharonSession() for libprep_id, seqruns in seqruns_by_libprep.iteritems(): for seqrun_id in seqruns: label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) genome_results_file_paths=glob.glob(os.path.join(piper_qc_dir, "{}.{}*.qc".format(sample_id, seqrun_id.split('_')[-1]),"genome_results.txt")) ma_coverage = parse_mean_coverage_from_qualimap(piper_qc_dir, sample_id, seqrun_id) reads=0 for path in genome_results_file_paths: try: reads += parse_qualimap_reads(path) except IOError as e : LOG.error("Cannot find the genome_results.txt file to get the number of reads in {}".format(path)) except : LOG.error("Error in handling the genome_results.txt file located at {}".format(path)) LOG.info('Updating project/sample/libprep/seqrun "{}" in ' 'Charon with mean autosomal coverage "{}" and total reads {}'.format(label, ma_coverage, reads)) try: charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, total_reads=reads, mean_autosomal_coverage=ma_coverage) except CharonError as e: error_text = ('Could not update project/sample/libprep/seqrun "{}" ' 'in Charon with mean autosomal coverage ' '"{}": {}'.format(label, ma_coverage, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name="piper_ngi", level="ERROR", info_text=error_text)
def update_sample_duplication_and_coverage(project_id, sample_id, project_base_path, config=None, config_file_path=None): """Update Charon with the duplication rates for said sample. :param str project_base_path: The path to the project dir :param str sample_id: The sample name (e.g. P1170_105) """ dup_file_path=os.path.join(project_base_path, 'ANALYSIS', project_id, 'piper_ngi', '05_processed_alignments', "{}.metrics".format(sample_id)) genome_results_file_path=os.path.join(project_base_path, 'ANALYSIS', project_id, 'piper_ngi', '06_final_alignment_qc', "{}.clean.dedup.qc".format(sample_id),"genome_results.txt") try: dup_pc=parse_deduplication_percentage(dup_file_path) except: dup_pc=0 LOG.error("Cannot find {}.metrics file for duplication rate at {}. Continuing.".format(sample_id, dup_file_path)) try: cov=parse_qualimap_coverage(genome_results_file_path) reads=parse_qualimap_reads(genome_results_file_path) except IOError as e: cov=0 reads=0 LOG.error("Cannot find genome_results.txt file for sample coverage at {}. Continuing.".format(genome_results_file_path)) try: charon_session = CharonSession() charon_session.sample_update(projectid=project_id, sampleid=sample_id, duplication_pc=dup_pc, total_sequenced_reads=reads, total_autosomal_coverage=cov) LOG.info('Updating sample "{}" in ' 'Charon with mean duplication_percentage"{}" and autosomal coverage "{}"'.format(sample_id, dup_pc, cov)) except CharonError as e: error_text = ('Could not update project/sample "{}/{}" ' 'in Charon with duplication rate : {}' 'and coverage {}'.format("{}/{}".format(project_id, sampleid, dup_pc, cov))) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_id, engine_name="piper_ngi", level="ERROR", info_text=error_text)
def recurse_status_for_sample(project_obj, status_field, status_value, update_done=False, extra_args=None, config=None, config_file_path=None): """Set seqruns under sample to have status for field <status_field> to <status_value> """ if not extra_args: extra_args = {} extra_args.update({status_field: status_value}) charon_session = CharonSession() project_id = project_obj.project_id for sample_obj in project_obj: # There's only one sample but this is an iterator so we iterate sample_id = sample_obj.name for libprep_obj in sample_obj: libprep_id = libprep_obj.name for seqrun_obj in libprep_obj: seqrun_id = seqrun_obj.name label = "{}/{}/{}/{}".format(project_id, sample_id, libprep_id, seqrun_id) LOG.info('Updating status for field "{}" of project/sample/libprep/seqrun ' '"{}" to "{}" in Charon '.format(status_field, label, status_value)) try: charon_session.seqrun_update(projectid=project_id, sampleid=sample_id, libprepid=libprep_id, seqrunid=seqrun_id, **extra_args) except CharonError as e: error_text = ('Could not update {} for project/sample/libprep/seqrun ' '"{}" in Charon to "{}": {}'.format(status_field, label, status_value, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_id, sample_name=sample_obj.name, level="ERROR", info_text=error_text, workflow=status_field)
def handle_sample_status(analysis_object, sample, charon_reported_status): """ returns true of false wether the sample should be analyzed""" if charon_reported_status == "UNDER_ANALYSIS": if not analysis_object.restart_running_jobs: error_text = ('Charon reports seqrun analysis for project "{}" ' '/ sample "{}" does not need processing (already ' '"{}")'.format(analysis_object.project, sample, charon_reported_status)) LOG.error(error_text) if not analysis_object.config.get('quiet'): mail_analysis(project_name=analysis_object.project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=error_text) return False else: return True elif charon_reported_status == "ANALYZED": if not analysis_object.restart_finished_jobs: error_text = ('Charon reports seqrun analysis for project "{}" ' '/ sample "{}" does not need processing (already ' '"{}")'.format(analysis_object.project, sample, charon_reported_status)) LOG.error(error_text) if not analysis_object.config.get( 'quiet') and not analysis_object.config.get('manual'): mail_analysis(project_name=analysis_object.project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=error_text) return False else: return True elif charon_reported_status == "FAILED": if not analysis_object.restart_failed_jobs: error_text = ('FAILED: Project "{}" / sample "{}" Charon reports ' 'FAILURE, manual investigation needed!'.format( analysis_object.project, sample)) LOG.error(error_text) if not analysis_object.config.get('quiet'): mail_analysis(project_name=analysis_object.project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=error_text) return False else: return True else: return True
def handle_sample_status(analysis_object, sample, charon_reported_status): """ returns true of false wether the sample should be analyzed""" if charon_reported_status == "UNDER_ANALYSIS": if not analysis_object.restart_running_jobs: error_text = ('Charon reports seqrun analysis for project "{}" ' '/ sample "{}" does not need processing (already ' '"{}")'.format(analysis_object.project, sample, charon_reported_status)) LOG.error(error_text) if not analysis_object.config.get('quiet'): mail_analysis(project_name=analysis_object.project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=error_text) return False else: return True elif charon_reported_status == "ANALYZED": if not analysis_object.restart_finished_jobs: error_text = ('Charon reports seqrun analysis for project "{}" ' '/ sample "{}" does not need processing (already ' '"{}")'.format(analysis_object.project, sample, charon_reported_status)) LOG.error(error_text) if not analysis_object.config.get('quiet') and not analysis_object.config.get('manual'): mail_analysis(project_name=analysis_object.project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=error_text) return False else: return True elif charon_reported_status == "FAILED": if not analysis_object.restart_failed_jobs: error_text = ('FAILED: Project "{}" / sample "{}" Charon reports ' 'FAILURE, manual investigation needed!'.format(analysis_object.project, sample)) LOG.error(error_text) if not analysis_object.config.get('quiet'): mail_analysis(project_name=analysis_object.project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=error_text) return False else: return True else: return True
def test_mail_analysis(self): # INFO mail_analysis(project_name=self.project_name, sample_name=self.sample_name, engine_name=self.engine_name, level="INFO", info_text="Your mom goes to college.", workflow=self.workflow) # WARN mail_analysis(project_name=self.project_name, sample_name=self.sample_name, engine_name=self.engine_name, level="WARN", info_text="Your mom: she goes to college!", workflow=self.workflow) # ERROR mail_analysis(project_name=self.project_name, sample_name=self.sample_name, engine_name=self.engine_name, level="ERROR", info_text="News about your mom -- she goes to college!!", workflow=self.workflow)
def launch_analysis(projects_to_analyze, restart_failed_jobs=False, restart_finished_jobs=False, restart_running_jobs=False, keep_existing_data=False, no_qc=False, exec_mode="sbatch", quiet=False, manual=False, config=None, config_file_path=None, generate_bqsr_bam=False): """Launch the appropriate analysis for each fastq file in the project. :param list projects_to_analyze: The list of projects (Project objects) to analyze :param dict config: The parsed NGI configuration file; optional/has default. :param str config_file_path: The path to the NGI configuration file; optional/has default. """ for project in projects_to_analyze: # Get information from Charon regarding which best practice analyses to run try: engine = get_engine_for_bp(project, config, config_file_path) except (RuntimeError, CharonError) as e: LOG.error('Project {} could not be processed: {}'.format( project, e)) continue engine.local_process_tracking.update_charon_with_local_jobs_status( config=config) charon_session = CharonSession() for project in projects_to_analyze: try: project_status = charon_session.project_get( project.project_id)['status'] except CharonError as e: LOG.error('Project {} could not be processed: {}'.format( project, e)) continue if not project_status == "OPEN": error_text = ( 'Data found on filesystem for project "{}" but Charon ' 'reports its status is not OPEN ("{}"). Not launching ' 'analysis for this project.'.format(project, project_status)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project.name, level="ERROR", info_text=error_text) continue try: analysis_module = get_engine_for_bp(project) except (RuntimeError, CharonError) as e: # BPA missing from Charon? LOG.error('Skipping project "{}" because of error: {}'.format( project, e)) continue if not no_qc: try: qc_analysis_module = load_engine_module("qc", config) except RuntimeError as e: LOG.error("Could not launch qc analysis: {}".format(e)) for sample in project: # Launch QC analysis if not no_qc: try: LOG.info('Attempting to launch sample QC analysis ' 'for project "{}" / sample "{}" / engine ' '"{}"'.format(project, sample, qc_analysis_module.__name__)) qc_analysis_module.analyze(project=project, sample=sample, config=config) except Exception as e: error_text = ( 'Cannot process project "{}" / sample "{}" / ' 'engine "{}" : {}'.format(project, sample, analysis_module.__name__, e)) LOG.error(error_text) if not config.get("quiet"): mail_analysis(project_name=project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=e) # Launch actual best-practice analysis try: charon_reported_status = charon_session.sample_get( project.project_id, sample).get('analysis_status') # Check Charon to ensure this hasn't already been processed if charon_reported_status == "UNDER_ANALYSIS": if not restart_running_jobs: error_text = ( 'Charon reports seqrun analysis for project "{}" ' '/ sample "{}" does not need processing (already ' '"{}")'.format(project, sample, charon_reported_status)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=error_text) continue elif charon_reported_status == "ANALYZED": if not restart_finished_jobs: error_text = ( 'Charon reports seqrun analysis for project "{}" ' '/ sample "{}" does not need processing (already ' '"{}")'.format(project, sample, charon_reported_status)) LOG.error(error_text) if not config.get('quiet') and not config.get( 'manual'): mail_analysis(project_name=project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=error_text) continue elif charon_reported_status == "FAILED": if not restart_failed_jobs: error_text = ( 'FAILED: Project "{}" / sample "{}" Charon reports ' 'FAILURE, manual investigation needed!'.format( project, sample)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=error_text) continue except CharonError as e: LOG.error(e) continue try: LOG.info('Attempting to launch sample analysis for ' 'project "{}" / sample "{}" / engine' '"{}"'.format(project, sample, analysis_module.__name__)) #actual analysis launch analysis_module.analyze( project=project, sample=sample, restart_finished_jobs=restart_finished_jobs, restart_running_jobs=restart_running_jobs, keep_existing_data=keep_existing_data, exec_mode=exec_mode, config=config, generate_bqsr_bam=generate_bqsr_bam) except Exception as e: error_text = ('Cannot process project "{}" / sample "{}" / ' 'engine "{}" : {}'.format( project, sample, analysis_module.__name__, e)) LOG.error(error_text) if not config.get("quiet"): mail_analysis(project_name=project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=e) continue
def update_charon_with_local_jobs_status(quiet=False, config=None, config_file_path=None): """Check the status of all locally-tracked jobs and update Charon accordingly. """ if quiet and not config.get("quiet"): config['quiet'] = True LOG.info("Updating Charon with the status of all locally-tracked jobs...") with get_db_session() as session: charon_session = CharonSession() for sample_entry in session.query(SampleAnalysis).all(): # Local names workflow = sample_entry.workflow project_name = sample_entry.project_name project_id = sample_entry.project_id project_base_path = sample_entry.project_base_path sample_id = sample_entry.sample_id engine = sample_entry.engine # Only one of these id fields (slurm, pid) will have a value slurm_job_id = sample_entry.slurm_job_id process_id = sample_entry.process_id piper_exit_code = get_exit_code(workflow_name=workflow, project_base_path=project_base_path, project_name=project_name, project_id=project_id, sample_id=sample_id) label = "project/sample {}/{}".format(project_name, sample_id) if workflow not in ("merge_process_variantcall", "genotype_concordance",): LOG.error('Unknown workflow "{}" for {}; cannot update ' 'Charon. Skipping sample.'.format(workflow, label)) continue try: project_obj = create_project_obj_from_analysis_log(project_name, project_id, project_base_path, sample_id, workflow) except IOError as e: # analysis log file is missing! error_text = ('Could not find analysis log file! Cannot update ' 'Charon for {} run {}/{}: {}'.format(workflow, project_id, sample_id, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text, workflow=workflow) continue try: if piper_exit_code == 0: # 0 -> Job finished successfully if workflow == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" set_status = "ANALYZED" # sample level elif workflow == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" set_status = "DONE" # sample level recurse_status = "DONE" # For the seqrun level info_text = ('Workflow "{}" for {} finished succesfully. ' 'Recording status {} in Charon'.format(workflow, label, set_status)) LOG.info(info_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="INFO", info_text=info_text, workflow=workflow) charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=recurse_status, config=config) # Job is only deleted if the Charon status update succeeds session.delete(sample_entry) #run MultiQC LOG.info("Running MultiQC on project {}".format(project_name)) try: run_multiqc(project_base_path, project_id, project_name) except Exception as e: LOG.error(e) if workflow == "merge_process_variantcall": # Parse seqrun output results / update Charon # This is a semi-optional step -- failure here will send an # email but not more than once. The record is still removed # from the local jobs database, so this will have to be done # manually if you want it done at all. piper_qc_dir = os.path.join(project_base_path, "ANALYSIS", project_id, "piper_ngi", "02_preliminary_alignment_qc") update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir) update_sample_duplication_and_coverage(project_id, sample_id, project_base_path) elif workflow == "genotype_concordance": piper_gt_dir = os.path.join(project_base_path, "ANALYSIS", project_id, "piper_ngi", "03_genotype_concordance") try: update_gtc_for_sample(project_id, sample_id, piper_gt_dir) except (CharonError, IOError, ValueError) as e: LOG.error(e) elif type(piper_exit_code) is int and piper_exit_code > 0: # 1 -> Job failed set_status = "FAILED" error_text = ('Workflow "{}" for {} failed. Recording status ' '{} in Charon.'.format(workflow, label, set_status)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text, workflow=workflow) if workflow == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" elif workflow == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=set_status, config=config) # Job is only deleted if the Charon update succeeds session.delete(sample_entry) else: # None -> Job still running OR exit code was never written (failure) JOB_FAILED = None if slurm_job_id: try: slurm_exit_code = get_slurm_job_status(slurm_job_id) except ValueError as e: slurm_exit_code = 1 if slurm_exit_code is not None: # "None" indicates job is still running JOB_FAILED = True else: if not psutil.pid_exists(process_id): # Job did not write an exit code and is also not running JOB_FAILED = True if JOB_FAILED: set_status = "FAILED" error_text = ('No exit code found but job not running ' 'for {} / {}: setting status to {} in ' 'Charon'.format(label, workflow, set_status)) if slurm_job_id: exit_code_file_path = \ create_exit_code_file_path(workflow_subtask=workflow, project_base_path=project_base_path, project_name=project_name, project_id=project_id, sample_id=sample_id) error_text += (' (slurm job id "{}", exit code file path ' '"{}")'.format(slurm_job_id, exit_code_file_path)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text, workflow=workflow) if workflow == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" elif workflow == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=set_status, config=config) # Job is only deleted if the Charon update succeeds LOG.debug("Deleting local entry {}".format(sample_entry)) session.delete(sample_entry) else: # Job still running set_status = "UNDER_ANALYSIS" if workflow == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" recurse_status = "RUNNING" elif workflow == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" recurse_status = "UNDER_ANALYSIS" try: charon_status = \ charon_session.sample_get(projectid=project_id, sampleid=sample_id).get(sample_status_field) if charon_status and not charon_status == set_status: LOG.warn('Tracking inconsistency for {}: Charon status ' 'for field "{}" is "{}" but local process tracking ' 'database indicates it is running. Setting value ' 'in Charon to {}.'.format(label, sample_status_field, charon_status, set_status)) charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=recurse_status, config=config) except CharonError as e: error_text = ('Unable to update/verify Charon ' 'for {}: {}'.format(label, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", workflow=workflow, info_text=error_text) except CharonError as e: error_text = ('Unable to update Charon for {}: ' '{}'.format(label, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", workflow=workflow, info_text=error_text) except OSError as e: error_text = ('Permissions error when trying to update Charon ' '"{}" status for "{}": {}'.format(workflow, label, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", workflow=workflow, info_text=error_text) session.commit()
def launch_analysis(projects_to_analyze, restart_failed_jobs=False, restart_finished_jobs=False, restart_running_jobs=False, keep_existing_data=False, no_qc=False, exec_mode="sbatch", quiet=False, manual=False, config=None, config_file_path=None, generate_bqsr_bam=False): """Launch the appropriate analysis for each fastq file in the project. :param list projects_to_analyze: The list of projects (Project objects) to analyze :param dict config: The parsed NGI configuration file; optional/has default. :param str config_file_path: The path to the NGI configuration file; optional/has default. """ for project in projects_to_analyze: # Get information from Charon regarding which best practice analyses to run try: engine = get_engine_for_bp(project, config, config_file_path) except (RuntimeError, CharonError) as e: LOG.error('Project {} could not be processed: {}'.format(project, e)) continue engine.local_process_tracking.update_charon_with_local_jobs_status(config=config) charon_session = CharonSession() for project in projects_to_analyze: try: project_status = charon_session.project_get(project.project_id)['status'] except CharonError as e: LOG.error('Project {} could not be processed: {}'.format(project, e)) continue if not project_status == "OPEN": error_text = ('Data found on filesystem for project "{}" but Charon ' 'reports its status is not OPEN ("{}"). Not launching ' 'analysis for this project.'.format(project, project_status)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project.name, level="ERROR", info_text=error_text) continue try: analysis_module = get_engine_for_bp(project) except (RuntimeError, CharonError) as e: # BPA missing from Charon? LOG.error('Skipping project "{}" because of error: {}'.format(project, e)) continue if not no_qc: try: qc_analysis_module = load_engine_module("qc", config) except RuntimeError as e: LOG.error("Could not launch qc analysis: {}".format(e)) for sample in project: # Launch QC analysis if not no_qc: try: LOG.info('Attempting to launch sample QC analysis ' 'for project "{}" / sample "{}" / engine ' '"{}"'.format(project, sample, qc_analysis_module.__name__)) qc_analysis_module.analyze(project=project, sample=sample, config=config) except Exception as e: error_text = ('Cannot process project "{}" / sample "{}" / ' 'engine "{}" : {}'.format(project, sample, analysis_module.__name__, e)) LOG.error(error_text) if not config.get("quiet"): mail_analysis(project_name=project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=e) # Launch actual best-practice analysis try: charon_reported_status = charon_session.sample_get(project.project_id, sample).get('analysis_status') # Check Charon to ensure this hasn't already been processed if charon_reported_status == "UNDER_ANALYSIS": if not restart_running_jobs: error_text = ('Charon reports seqrun analysis for project "{}" ' '/ sample "{}" does not need processing (already ' '"{}")'.format(project, sample, charon_reported_status)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=error_text) continue elif charon_reported_status == "ANALYZED": if not restart_finished_jobs: error_text = ('Charon reports seqrun analysis for project "{}" ' '/ sample "{}" does not need processing (already ' '"{}")'.format(project, sample, charon_reported_status)) LOG.error(error_text) if not config.get('quiet') and not config.get('manual'): mail_analysis(project_name=project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=error_text) continue elif charon_reported_status == "FAILED": if not restart_failed_jobs: error_text = ('FAILED: Project "{}" / sample "{}" Charon reports ' 'FAILURE, manual investigation needed!'.format(project, sample)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=error_text) continue except CharonError as e: LOG.error(e) continue try: LOG.info('Attempting to launch sample analysis for ' 'project "{}" / sample "{}" / engine' '"{}"'.format(project, sample, analysis_module.__name__)) #actual analysis launch analysis_module.analyze(project=project, sample=sample, restart_finished_jobs=restart_finished_jobs, restart_running_jobs=restart_running_jobs, keep_existing_data=keep_existing_data, exec_mode=exec_mode, config=config, generate_bqsr_bam=generate_bqsr_bam) except Exception as e: error_text = ('Cannot process project "{}" / sample "{}" / ' 'engine "{}" : {}'.format(project, sample, analysis_module.__name__, e)) LOG.error(error_text) if not config.get("quiet"): mail_analysis(project_name=project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=e) continue
def setup_analysis_directory_structure(fc_dir, projects_to_analyze, restrict_to_projects=None, restrict_to_samples=None, create_files=True, fallback_libprep=None, quiet=False, config=None, config_file_path=None): """ Copy and sort files from their CASAVA-demultiplexed flowcell structure into their respective project/sample/libPrep/FCIDs. This collects samples split across multiple flowcells. :param str fc_dir: The directory created by CASAVA for this flowcell. :param dict config: The parsed configuration file. :param set projects_to_analyze: A dict (of Project objects, or empty) :param bool create_files: Alter the filesystem (as opposed to just parsing flowcells) (default True) :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None) :param list restrict_to_projects: Specific projects within the flowcell to process exclusively :param list restrict_to_samples: Specific samples within the flowcell to process exclusively :returns: A list of NGIProject objects that need to be run through the analysis pipeline :rtype: list :raises KeyError: If a required configuration key is not available. """ LOG.info( "Setting up analysis for demultiplexed data in source folder \"{}\"". format(fc_dir)) if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] config[ "quiet"] = quiet # Hack because I enter here from a script sometimes #Checks flowcell path to establish which group owns it pattern = ".+({}|{})\/.+".format(config["analysis"]["sthlm_root"], config["analysis"]["upps_root"]) matches = re.match(pattern, fc_dir) if matches: flowcell_uppnexid = matches.group(1) else: LOG.error( "cannot guess which project (sthlm/uppsala) the flowcell {} belongs to" .format(fc_dir)) raise RuntimeError analysis_top_dir = os.path.abspath( os.path.join(config["analysis"]["base_root"], flowcell_uppnexid, config["analysis"]["top_dir"])) try: safe_makedir(analysis_top_dir) except OSError as e: LOG.error( 'Error: Analysis top directory {} does not exist and could not ' 'be created.'.format(analysis_top_dir)) fc_dir = fc_dir if os.path.isabs(fc_dir) else os.path.join( analysis_top_dir, fc_dir) if not os.path.exists(fc_dir): LOG.error("Error: Flowcell directory {} does not exist".format(fc_dir)) return [] # Map the directory structure for this flowcell try: fc_dir_structure = parse_flowcell(fc_dir) except (OSError, ValueError) as e: LOG.error("Error when processing flowcell dir \"{}\": {}".format( fc_dir, e)) return [] fc_full_id = fc_dir_structure['fc_full_id'] if not fc_dir_structure.get('projects'): LOG.warning( "No projects found in specified flowcell directory \"{}\"".format( fc_dir)) # Iterate over the projects in the flowcell directory for project in fc_dir_structure.get('projects', []): project_name = project['project_name'] project_original_name = project['project_original_name'] samplesheet_path = fc_dir_structure.get("samplesheet_path") # parse the samplesheet and get the expected sample numbers assigned by bcl2fastq samplesheet_sample_numbers = get_sample_numbers_from_samplesheet( samplesheet_path) if samplesheet_path else None try: # Maps e.g. "Y.Mom_14_01" to "P123" project_id = get_project_id_from_name(project_name) except (CharonError, RuntimeError, ValueError) as e: LOG.warning( 'Could not retrieve project id from Charon (record missing?). ' 'Using project name ("{}") as project id ' '(error: {})'.format(project_name, e)) project_id = project_name # If specific projects are specified, skip those that do not match if restrict_to_projects and project_name not in restrict_to_projects and \ project_id not in restrict_to_projects: LOG.debug( "Skipping project {} (not in restrict_to_projects)".format( project_name)) continue LOG.info("Setting up project {}".format(project.get("project_name"))) # Create a project directory if it doesn't already exist, including # intervening "DATA" directory project_dir = os.path.join(analysis_top_dir, "DATA", project_id) project_sl_dir = os.path.join(analysis_top_dir, "DATA", project_name) project_analysis_dir = os.path.join(analysis_top_dir, "ANALYSIS", project_id) project_analysis_sl_dir = os.path.join(analysis_top_dir, "ANALYSIS", project_name) if create_files: safe_makedir(project_dir, 0o2770) safe_makedir(project_analysis_dir, 0o2770) if not project_dir == project_sl_dir and \ not os.path.exists(project_sl_dir): os.symlink(project_dir, project_sl_dir) if not project_analysis_dir == project_analysis_sl_dir and \ not os.path.exists(project_analysis_sl_dir): os.symlink(project_analysis_dir, project_analysis_sl_dir) try: project_obj = projects_to_analyze[project_dir] except KeyError: project_obj = NGIProject(name=project_name, dirname=project_id, project_id=project_id, base_path=analysis_top_dir) projects_to_analyze[project_dir] = project_obj # Iterate over the samples in the project for sample in project.get('samples', []): sample_name = sample['sample_name'] # If specific samples are specified, skip those that do not match if restrict_to_samples and sample_name not in restrict_to_samples: LOG.debug("Skipping sample {}: not in specified samples " "{}".format(sample_name, ", ".join(restrict_to_samples))) continue LOG.info("Setting up sample {}".format(sample_name)) # Create a directory for the sample if it doesn't already exist sample_dir = os.path.join(project_dir, sample_name) if create_files: safe_makedir(sample_dir, 0o2770) # This will only create a new sample object if it doesn't already exist in the project sample_obj = project_obj.add_sample(name=sample_name, dirname=sample_name) # Get the Library Prep ID for each file pattern = re.compile(".*\.(fastq|fq)(\.gz|\.gzip|\.bz2)?$") fastq_files = list(filter(pattern.match, sample.get('files', []))) # For each fastq file, create the libprep and seqrun objects # and add the fastq file to the seqprep object # Note again that these objects only get created if they don't yet exist; # if they do exist, the existing object is returned for fq_file in fastq_files: # Try to use assignment from SampleSheet samplesheet_sample = match_fastq_sample_number_to_samplesheet( fq_file, samplesheet_sample_numbers, project_id) if samplesheet_sample is not None and \ samplesheet_sample[6] is not None: libprep_name = samplesheet_sample[6] else: LOG.debug( 'Unable to determine library prep from sample sheet file; try to determine from Charon' ) try: # Requires Charon access libprep_name = determine_library_prep_from_fcid( project_id, sample_name, fc_full_id) LOG.debug('Found libprep name "{}" in Charon'.format( libprep_name)) except ValueError: charon_session = CharonSession() libpreps = charon_session.sample_get_libpreps( project_id, sample_name).get('libpreps') if len(libpreps) == 1: libprep_name = libpreps[0].get('libprepid') LOG.warning( 'Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" ' 'has no libprep information in Charon, but only one ' 'library prep is present in Charon ("{}"). Using ' 'this as the library prep.'.format( project_name, sample_name, fc_full_id, fq_file, libprep_name)) elif fallback_libprep: libprep_name = fallback_libprep LOG.warning( 'Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" ' 'has no libprep information in Charon, but a fallback ' 'libprep value of "{}" was supplied -- using this ' 'value.'.format(project_name, sample_name, fc_full_id, fq_file, libprep_name)) else: error_text = ( 'Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" ' 'has no libprep information in Charon. Skipping ' 'analysis.'.format(project_name, sample_name, fc_full_id, fq_file)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_name, level="ERROR", info_text=error_text) continue libprep_object = sample_obj.add_libprep(name=libprep_name, dirname=libprep_name) libprep_dir = os.path.join(sample_dir, libprep_name) if create_files: safe_makedir(libprep_dir, 0o2770) seqrun_object = libprep_object.add_seqrun(name=fc_full_id, dirname=fc_full_id) seqrun_dir = os.path.join(libprep_dir, fc_full_id) if create_files: safe_makedir(seqrun_dir, 0o2770) seqrun_object.add_fastq_files(fq_file) if fastq_files and create_files: src_sample_dir = os.path.join(fc_dir_structure['fc_dir'], project['data_dir'], project['project_dir'], sample['sample_dir']) for libprep_obj in sample_obj: for seqrun_obj in libprep_obj: src_fastq_files = [ os.path.join(src_sample_dir, fastq_file) for fastq_file in seqrun_obj.fastq_files ] seqrun_dst_dir = os.path.join(project_obj.base_path, "DATA", project_obj.dirname, sample_obj.dirname, libprep_obj.dirname, seqrun_obj.dirname) LOG.info( "Symlinking fastq files from {} to {}...".format( src_sample_dir, seqrun_dst_dir)) try: do_symlink(src_fastq_files, seqrun_dst_dir) except OSError: error_text = ( 'Could not symlink files for project/sample' 'libprep/seqrun {}/{}/{}/{}'.format( project_obj, sample_obj, libprep_obj, seqrun_obj)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_name, level="ERROR", info_text=error_text) continue return projects_to_analyze
def update_charon_with_local_jobs_status(quiet=False, config=None, config_file_path=None): """Check the status of all locally-tracked jobs and update Charon accordingly. """ if quiet and not config.get("quiet"): config['quiet'] = True LOG.info("Updating Charon with the status of all locally-tracked jobs...") multiqc_projects=set() with get_db_session() as session: charon_session = CharonSession() for sample_entry in session.query(SampleAnalysis).all(): # Local names workflow = sample_entry.workflow project_name = sample_entry.project_name project_id = sample_entry.project_id project_base_path = sample_entry.project_base_path sample_id = sample_entry.sample_id engine = sample_entry.engine # Only one of these id fields (slurm, pid) will have a value slurm_job_id = sample_entry.slurm_job_id process_id = sample_entry.process_id piper_exit_code = get_exit_code(workflow_name=workflow, project_base_path=project_base_path, project_name=project_name, project_id=project_id, sample_id=sample_id) label = "project/sample {}/{}".format(project_name, sample_id) if workflow not in ("merge_process_variantcall", "genotype_concordance",): LOG.error('Unknown workflow "{}" for {}; cannot update ' 'Charon. Skipping sample.'.format(workflow, label)) continue try: project_obj = create_project_obj_from_analysis_log(project_name, project_id, project_base_path, sample_id, workflow) except IOError as e: # analysis log file is missing! error_text = ('Could not find analysis log file! Cannot update ' 'Charon for {} run {}/{}: {}'.format(workflow, project_id, sample_id, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text, workflow=workflow) continue try: if piper_exit_code == 0: # 0 -> Job finished successfully if workflow == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" set_status = "ANALYZED" # sample level elif workflow == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" set_status = "DONE" # sample level recurse_status = "DONE" # For the seqrun level info_text = ('Workflow "{}" for {} finished succesfully. ' 'Recording status {} in Charon'.format(workflow, label, set_status)) LOG.info(info_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="INFO", info_text=info_text, workflow=workflow) charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=recurse_status, config=config) # Job is only deleted if the Charon status update succeeds session.delete(sample_entry) #add project to MultiQC multiqc_projects.add((project_base_path, project_id, project_name)) if workflow == "merge_process_variantcall": # Parse seqrun output results / update Charon # This is a semi-optional step -- failure here will send an # email but not more than once. The record is still removed # from the local jobs database, so this will have to be done # manually if you want it done at all. piper_qc_dir = os.path.join(project_base_path, "ANALYSIS", project_id, "piper_ngi", "02_preliminary_alignment_qc") update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir) update_sample_duplication_and_coverage(project_id, sample_id, project_base_path) elif workflow == "genotype_concordance": piper_gt_dir = os.path.join(project_base_path, "ANALYSIS", project_id, "piper_ngi", "03_genotype_concordance") try: update_gtc_for_sample(project_id, sample_id, piper_gt_dir) except (CharonError, IOError, ValueError) as e: LOG.error(e) elif type(piper_exit_code) is int and piper_exit_code > 0: # 1 -> Job failed set_status = "FAILED" error_text = ('Workflow "{}" for {} failed. Recording status ' '{} in Charon.'.format(workflow, label, set_status)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text, workflow=workflow) if workflow == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" elif workflow == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=set_status, config=config) # Job is only deleted if the Charon update succeeds session.delete(sample_entry) else: # None -> Job still running OR exit code was never written (failure) JOB_FAILED = None if slurm_job_id: try: slurm_exit_code = get_slurm_job_status(slurm_job_id) except ValueError as e: slurm_exit_code = 1 if slurm_exit_code is not None: # "None" indicates job is still running JOB_FAILED = True else: if not psutil.pid_exists(process_id): # Job did not write an exit code and is also not running JOB_FAILED = True if JOB_FAILED: set_status = "FAILED" error_text = ('No exit code found but job not running ' 'for {} / {}: setting status to {} in ' 'Charon'.format(label, workflow, set_status)) if slurm_job_id: exit_code_file_path = \ create_exit_code_file_path(workflow_subtask=workflow, project_base_path=project_base_path, project_name=project_name, project_id=project_id, sample_id=sample_id) error_text += (' (slurm job id "{}", exit code file path ' '"{}")'.format(slurm_job_id, exit_code_file_path)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text, workflow=workflow) if workflow == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" elif workflow == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=set_status, config=config) # Job is only deleted if the Charon update succeeds LOG.debug("Deleting local entry {}".format(sample_entry)) session.delete(sample_entry) else: # Job still running set_status = "UNDER_ANALYSIS" if workflow == "merge_process_variantcall": sample_status_field = "analysis_status" seqrun_status_field = "alignment_status" recurse_status = "RUNNING" elif workflow == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" recurse_status = "UNDER_ANALYSIS" try: remote_sample=charon_session.sample_get(projectid=project_id, sampleid=sample_id) charon_status = remote_sample.get(sample_status_field) if charon_status and not charon_status == set_status: LOG.warning('Tracking inconsistency for {}: Charon status ' 'for field "{}" is "{}" but local process tracking ' 'database indicates it is running. Setting value ' 'in Charon to {}.'.format(label, sample_status_field, charon_status, set_status)) charon_session.sample_update(projectid=project_id, sampleid=sample_id, **{sample_status_field: set_status}) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=recurse_status, config=config) except CharonError as e: error_text = ('Unable to update/verify Charon ' 'for {}: {}'.format(label, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", workflow=workflow, info_text=error_text) except CharonError as e: error_text = ('Unable to update Charon for {}: ' '{}'.format(label, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", workflow=workflow, info_text=error_text) except OSError as e: error_text = ('Permissions error when trying to update Charon ' '"{}" status for "{}": {}'.format(workflow, label, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", workflow=workflow, info_text=error_text) session.commit() #Run Multiqc for pj_tuple in multiqc_projects: LOG.info("Running MultiQC on project {}".format(pj_tuple[1])) run_multiqc(pj_tuple[0], pj_tuple[1], pj_tuple[2])
def update_charon_with_local_jobs_status(config=None, config_file_path=None): """Check the status of all locally-tracked jobs and update Charon accordingly. """ LOG.info("Updating Charon with the status of all locally-tracked jobs...") with get_db_session() as session: charon_session = CharonSession() for sample_entry in session.query(SampleAnalysis).all(): # Local names workflow = sample_entry.workflow project_name = sample_entry.project_name project_id = sample_entry.project_id project_base_path = sample_entry.project_base_path sample_id = sample_entry.sample_id engine=sample_entry.engine # Only one of these will have a value slurm_job_id = sample_entry.slurm_job_id process_id = sample_entry.process_id piper_exit_code = get_exit_code(workflow_name=workflow, project_base_path=project_base_path, project_name=project_name, project_id=project_id, sample_id=sample_id) label = "project/sample {}/{}".format(project_name, sample_id) try: project_obj = create_project_obj_from_analysis_log(project_name, project_id, project_base_path, sample_id, workflow) except IOError as e: # analysis log file is missing! error_text = ('Could not find analysis log file! Cannot update ' 'Charon for sample run {}/{}: {}'.format(project_id, sample_id, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text) continue try: if piper_exit_code and piper_exit_code == 0: # 0 -> Job finished successfully set_status = "ANALYZED" info_text = ('Workflow "{}" for {} finished succesfully. ' 'Recording status {} in Charon'.format(workflow, label, set_status)) LOG.info(info_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="INFO", info_text=info_text) charon_session.sample_update(projectid=project_id, sampleid=sample_id, analysis_status=set_status) recurse_status="DONE" recurse_status_for_sample(project_obj, recurse_status) # Job is only deleted if the Charon status update succeeds session.delete(sample_entry) # Parse seqrun output results / update Charon # This is a semi-optional step -- failure here will send an # email but not more than once. The record is still removed # from the local jobs database, so this will have to be done # manually if you want it done at all. piper_qc_dir = os.path.join(project_base_path, "ANALYSIS", project_id,"piper_ngi", "02_preliminary_alignment_qc") update_coverage_for_sample_seqruns(project_id, sample_id, piper_qc_dir) elif piper_exit_code and piper_exit_code >0: # 1 -> Job failed set_status = "FAILED" error_text = ('Workflow "{}" for {} failed. Recording status ' '{} in Charon.'.format(workflow, label, set_status)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text) charon_session.sample_update(projectid=project_id, sampleid=sample_id, analysis_status=set_status) recurse_status_for_sample(project_obj, set_status) # Job is only deleted if the Charon update succeeds session.delete(sample_entry) else: # None -> Job still running OR exit code was never written (failure) JOB_FAILED = None if slurm_job_id: try: slurm_exit_code = get_slurm_job_status(slurm_job_id) except ValueError as e: slurm_exit_code = 1 if slurm_exit_code is not None: # "None" indicates job is still running JOB_FAILED = True else: if not psutil.pid_exists(process_id): # Job did not write an exit code and is also not running JOB_FAILED = True if JOB_FAILED: set_status = "FAILED" error_text = ('No exit code found but job not running for ' '{}: setting status to {} in Charon'.format(label, set_status)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text) charon_session.sample_update(projectid=project_id, sampleid=sample_id, analysis_status=set_status) recurse_status_for_sample(project_obj, set_status) # Job is only deleted if the Charon update succeeds LOG.debug("Deleting local entry {}".format(sample_entry)) session.delete(sample_entry) else: # Job still running charon_status = charon_session.sample_get(projectid=project_id, sampleid=sample_id)['analysis_status'] if not charon_status == "UNDER_ANALYSIS": set_status = "UNDER_ANALYSIS" LOG.warn('Tracking inconsistency for {}: Charon status is "{}" but ' 'local process tracking database indicates it is running. ' 'Setting value in Charon to {}.'.format(label, charon_status, set_status)) charon_session.sample_update(projectid=project_id, sampleid=sample_id, analysis_status=set_status) recurse_status_for_sample(project_obj, "RUNNING") except CharonError as e: error_text = ('Unable to update Charon status for "{}": {}'.format(label, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text) except OSError as e: error_text = ('Permissions error when trying to update Charon ' 'status for "{}": {}'.format(label, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_id, engine_name=engine, level="ERROR", info_text=error_text) session.commit()
def record_process_sample(project, sample, workflow_subtask, analysis_module_name, process_id=None, slurm_job_id=None, config=None, config_file_path=None): LOG.info('Recording slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask)) with get_db_session() as session: sample_db_obj = SampleAnalysis(project_id=project.project_id, project_name=project.name, project_base_path=project.base_path, sample_id=sample.name, engine=analysis_module_name, workflow=workflow_subtask, process_id=process_id, slurm_job_id=slurm_job_id) try: session.add(sample_db_obj) for attempts in range(3): try: session.commit() LOG.info('Successfully recorded slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask)) break except OperationalError as e: LOG.warning('Database locked ("{}"). Waiting...'.format(e)) time.sleep(15) else: raise RuntimeError("Could not write to database after three attempts (locked?)") except (IntegrityError, RuntimeError) as e: raise RuntimeError('Could not record slurm job id "{}" for project "{}", ' 'sample "{}", workflow "{}": {}'.format(slurm_job_id, project, sample, workflow_subtask, e.message)) extra_args = None if workflow_subtask == "merge_process_variantcall": sample_status_field = "analysis_status" sample_status_value = "UNDER_ANALYSIS" sample_data_status_field = "status" sample_data_status_value = '' #in his way it will not be updated seqrun_status_field = "alignment_status" seqrun_status_value = "RUNNING" extra_args = {"mean_autosomal_coverage": 0} elif workflow_subtask == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" sample_status_value = seqrun_status_value = "UNDER_ANALYSIS" sample_data_status_field = "status" sample_data_status_value = "STALE" else: raise ValueError('Charon field for workflow "{}" unknown; ' 'cannot update Charon.'.format(workflow_subtask)) try: LOG.info('Updating Charon status for project/sample ' '{}/{} key : {} value : {}'.format(project, sample, sample_status_field, sample_status_value)) CharonSession().sample_update(projectid=project.project_id, sampleid=sample.name, **{sample_status_field: sample_status_value, sample_data_status_field: sample_data_status_value}) project_obj = create_project_obj_from_analysis_log(project.name, project.project_id, project.base_path, sample.name, workflow_subtask) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=seqrun_status_value, extra_args=extra_args, config=config) except CharonError as e: error_text = ('Could not update Charon status for project/sample ' '{}/{} due to error: {}'.format(project, sample, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project.project_id, sample_name=sample.name, engine_name='piper_ngi', level="ERROR", info_text=error_text, workflow=workflow_subtask)
def record_process_sample(project, sample, workflow_subtask, analysis_module_name, process_id=None, slurm_job_id=None, config=None, config_file_path=None): LOG.info('Recording slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask)) with get_db_session() as session: sample_db_obj = SampleAnalysis(project_id=project.project_id, project_name=project.name, project_base_path=project.base_path, sample_id=sample.name, engine=analysis_module_name, workflow=workflow_subtask, process_id=process_id, slurm_job_id=slurm_job_id) try: session.add(sample_db_obj) for attempts in range(3): try: session.commit() LOG.info('Successfully recorded slurm job id "{}" for project "{}", sample "{}", ' 'workflow "{}"'.format(slurm_job_id, project, sample, workflow_subtask)) break except OperationalError as e: LOG.warn('Database locked ("{}"). Waiting...'.format(e)) time.sleep(15) else: raise RuntimeError("Could not write to database after three attempts (locked?)") except (IntegrityError, RuntimeError) as e: raise RuntimeError('Could not record slurm job id "{}" for project "{}", ' 'sample "{}", workflow "{}": {}'.format(slurm_job_id, project, sample, workflow_subtask, e.message)) extra_args = None if workflow_subtask == "merge_process_variantcall": sample_status_field = "analysis_status" sample_status_value = "UNDER_ANALYSIS" seqrun_status_field = "alignment_status" seqrun_status_value = "RUNNING" extra_args = {"mean_autosomal_coverage": 0} elif workflow_subtask == "genotype_concordance": sample_status_field = seqrun_status_field = "genotype_status" sample_status_value = seqrun_status_value = "UNDER_ANALYSIS" else: raise ValueError('Charon field for workflow "{}" unknown; ' 'cannot update Charon.'.format(workflow_subtask)) try: LOG.info('Updating Charon status for project/sample ' '{}/{} key : {} value : {}'.format(project, sample, sample_status_field, sample_status_value)) CharonSession().sample_update(projectid=project.project_id, sampleid=sample.name, **{sample_status_field: sample_status_value}) project_obj = create_project_obj_from_analysis_log(project.name, project.project_id, project.base_path, sample.name, workflow_subtask) recurse_status_for_sample(project_obj, status_field=seqrun_status_field, status_value=seqrun_status_value, extra_args=extra_args, config=config) except CharonError as e: error_text = ('Could not update Charon status for project/sample ' '{}/{} due to error: {}'.format(project, sample, e)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project.project_id, sample_name=sample.name, engine_name='piper_ngi', level="ERROR", info_text=error_text, workflow=workflow_subtask)
def launch_analysis(projects_to_analyze, restart_failed_jobs=False, restart_finished_jobs=False, restart_running_jobs=False, keep_existing_data=False, no_qc=False, exec_mode="sbatch", quiet=False, manual=False, config=None, config_file_path=None, generate_bqsr_bam=False): """Launch the appropriate analysis for each fastq file in the project. :param list projects_to_analyze: The list of projects (Project objects) to analyze :param dict config: The parsed NGI configuration file; optional/has default. :param str config_file_path: The path to the NGI configuration file; optional/has default. """ charon_session = CharonSession() for project in projects_to_analyze: analysis=NGIAnalysis(project=project, restart_failed_jobs=restart_failed_jobs, restart_finished_jobs=restart_finished_jobs, restart_running_jobs=restart_running_jobs, keep_existing_data=keep_existing_data, no_qc=no_qc, exec_mode=exec_mode, quiet=quiet, manual=manual, config=config, config_file_path=config_file_path, generate_bqsr_bam=generate_bqsr_bam, log=LOG) #update charon with the current analysis status analysis.engine.local_process_tracking.update_charon_with_local_jobs_status(config=config) try: project_status = charon_session.project_get(project.project_id)['status'] except CharonError as e: LOG.error('Project {} could not be processed: {}'.format(project, e)) continue if not project_status == "OPEN": error_text = ('Data found on filesystem for project "{}" but Charon ' 'reports its status is not OPEN ("{}"). Not launching ' 'analysis for this project.'.format(project, project_status)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project.name, level="ERROR", info_text=error_text) continue try: analysis_module = get_engine_for_bp(project) except (RuntimeError, CharonError) as e: # BPA missing from Charon? LOG.error('Skipping project "{}" because of error: {}'.format(project, e)) continue if not no_qc: try: qc_analysis_module = load_engine_module("qc", config) except RuntimeError as e: LOG.error("Could not launch qc analysis: {}".format(e)) for sample in project: # Launch QC analysis if not no_qc: try: LOG.info('Attempting to launch sample QC analysis ' 'for project "{}" / sample "{}" / engine ' '"{}"'.format(project, sample, qc_analysis_module.__name__)) qc_analysis_module.analyze(project=project, sample=sample, config=config) except Exception as e: error_text = ('Cannot process project "{}" / sample "{}" / ' 'engine "{}" : {}'.format(project, sample, analysis_module.__name__, e)) LOG.error(error_text) if not config.get("quiet"): mail_analysis(project_name=project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=e) # Launch actual best-practice analysis analysis.engine.analyze(analysis)
def setup_analysis_directory_structure(fc_dir, projects_to_analyze, restrict_to_projects=None, restrict_to_samples=None, create_files=True, fallback_libprep=None, quiet=False, config=None, config_file_path=None): """ Copy and sort files from their CASAVA-demultiplexed flowcell structure into their respective project/sample/libPrep/FCIDs. This collects samples split across multiple flowcells. :param str fc_dir: The directory created by CASAVA for this flowcell. :param dict config: The parsed configuration file. :param set projects_to_analyze: A dict (of Project objects, or empty) :param bool create_files: Alter the filesystem (as opposed to just parsing flowcells) (default True) :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None) :param list restrict_to_projects: Specific projects within the flowcell to process exclusively :param list restrict_to_samples: Specific samples within the flowcell to process exclusively :returns: A list of NGIProject objects that need to be run through the analysis pipeline :rtype: list :raises KeyError: If a required configuration key is not available. """ LOG.info("Setting up analysis for demultiplexed data in source folder \"{}\"".format(fc_dir)) if not restrict_to_projects: restrict_to_projects = [] if not restrict_to_samples: restrict_to_samples = [] config["quiet"] = quiet # Hack because I enter here from a script sometimes pattern="(.+(?:{}|{}))\/.+".format(config["analysis"]["sthlm_root"], config["analysis"]["upps_root"]) matches=re.match(pattern, fc_dir) if matches: flowcell_root=matches.group(1) else: LOG.error("cannot guess which project the flowcell {} belongs to".format(fc_dir)) raise RuntimeError analysis_top_dir = os.path.abspath(os.path.join(flowcell_root,config["analysis"]["top_dir"])) try: safe_makedir(analysis_top_dir) except OSError as e: LOG.error('Error: Analysis top directory {} does not exist and could not ' 'be created.'.format(analysis_top_dir)) fc_dir = fc_dir if os.path.isabs(fc_dir) else os.path.join(analysis_top_dir, fc_dir) if not os.path.exists(fc_dir): LOG.error("Error: Flowcell directory {} does not exist".format(fc_dir)) return [] # Map the directory structure for this flowcell try: fc_dir_structure = parse_flowcell(fc_dir) except (OSError, ValueError) as e: LOG.error("Error when processing flowcell dir \"{}\": {}".format(fc_dir, e)) return [] fc_full_id = fc_dir_structure['fc_full_id'] if not fc_dir_structure.get('projects'): LOG.warn("No projects found in specified flowcell directory \"{}\"".format(fc_dir)) # Iterate over the projects in the flowcell directory for project in fc_dir_structure.get('projects', []): project_name = project['project_name'] project_original_name = project['project_original_name'] samplesheet_path = fc_dir_structure.get("samplesheet_path") try: # Maps e.g. "Y.Mom_14_01" to "P123" project_id = get_project_id_from_name(project_name) except (CharonError, RuntimeError, ValueError) as e: LOG.warn('Could not retrieve project id from Charon (record missing?). ' 'Using project name ("{}") as project id ' '(error: {})'.format(project_name, e)) project_id = project_name # If specific projects are specified, skip those that do not match if restrict_to_projects and project_name not in restrict_to_projects and \ project_id not in restrict_to_projects: LOG.debug("Skipping project {} (not in restrict_to_projects)".format(project_name)) continue LOG.info("Setting up project {}".format(project.get("project_name"))) # Create a project directory if it doesn't already exist, including # intervening "DATA" directory project_dir = os.path.join(analysis_top_dir, "DATA", project_id) project_sl_dir = os.path.join(analysis_top_dir, "DATA", project_name) project_analysis_dir = os.path.join(analysis_top_dir, "ANALYSIS", project_id) project_analysis_sl_dir = os.path.join(analysis_top_dir, "ANALYSIS", project_name) if create_files: safe_makedir(project_dir, 0o2770) safe_makedir(project_analysis_dir, 0o2770) if not project_dir == project_sl_dir and \ not os.path.exists(project_sl_dir): os.symlink(project_dir, project_sl_dir) if not project_analysis_dir == project_analysis_sl_dir and \ not os.path.exists(project_analysis_sl_dir): os.symlink(project_analysis_dir, project_analysis_sl_dir) try: project_obj = projects_to_analyze[project_dir] except KeyError: project_obj = NGIProject(name=project_name, dirname=project_id, project_id=project_id, base_path=analysis_top_dir) projects_to_analyze[project_dir] = project_obj # Iterate over the samples in the project for sample in project.get('samples', []): sample_name = sample['sample_name'] # If specific samples are specified, skip those that do not match if restrict_to_samples and sample_name not in restrict_to_samples: LOG.debug("Skipping sample {}: not in specified samples " "{}".format(sample_name, ", ".join(restrict_to_samples))) continue LOG.info("Setting up sample {}".format(sample_name)) # Create a directory for the sample if it doesn't already exist sample_dir = os.path.join(project_dir, sample_name) if create_files: safe_makedir(sample_dir, 0o2770) # This will only create a new sample object if it doesn't already exist in the project sample_obj = project_obj.add_sample(name=sample_name, dirname=sample_name) # Get the Library Prep ID for each file pattern = re.compile(".*\.(fastq|fq)(\.gz|\.gzip|\.bz2)?$") fastq_files = filter(pattern.match, sample.get('files', [])) # For each fastq file, create the libprep and seqrun objects # and add the fastq file to the seqprep object # Note again that these objects only get created if they don't yet exist; # if they do exist, the existing object is returned for fq_file in fastq_files: # Try to parse from SampleSheet try: if not samplesheet_path: raise ValueError() lane_num = re.match(r'[\w-]+_L\d{2}(\d)_\w+', fq_file).groups()[0] libprep_name = determine_library_prep_from_samplesheet(samplesheet_path, project_original_name, sample_name, lane_num) except (IndexError, ValueError) as e: LOG.debug('Unable to determine library prep from sample sheet file ' '("{}"); try to determine from Charon'.format(e)) try: # Requires Charon access libprep_name = determine_library_prep_from_fcid(project_id, sample_name, fc_full_id) LOG.debug('Found libprep name "{}" in Charon'.format(libprep_name)) except ValueError: charon_session = CharonSession() libpreps = charon_session.sample_get_libpreps(project_id, sample_name).get('libpreps') if len(libpreps) == 1: libprep_name = libpreps[0].get('libprepid') LOG.warn('Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" ' 'has no libprep information in Charon, but only one ' 'library prep is present in Charon ("{}"). Using ' 'this as the library prep.'.format(project_name, sample_name, fc_full_id, fq_file, libprep_name)) elif fallback_libprep: libprep_name = fallback_libprep LOG.warn('Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" ' 'has no libprep information in Charon, but a fallback ' 'libprep value of "{}" was supplied -- using this ' 'value.'.format(project_name, sample_name, fc_full_id, fq_file, libprep_name, fallback_libprep)) else: error_text = ('Project "{}" / sample "{}" / seqrun "{}" / fastq "{}" ' 'has no libprep information in Charon. Skipping ' 'analysis.'.format(project_name, sample_name, fc_full_id, fq_file)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_name, level="ERROR", info_text=error_text) continue libprep_object = sample_obj.add_libprep(name=libprep_name, dirname=libprep_name) libprep_dir = os.path.join(sample_dir, libprep_name) if create_files: safe_makedir(libprep_dir, 0o2770) seqrun_object = libprep_object.add_seqrun(name=fc_full_id, dirname=fc_full_id) seqrun_dir = os.path.join(libprep_dir, fc_full_id) if create_files: safe_makedir(seqrun_dir, 0o2770) seqrun_object.add_fastq_files(fq_file) if fastq_files and create_files: src_sample_dir = os.path.join(fc_dir_structure['fc_dir'], project['data_dir'], project['project_dir'], sample['sample_dir']) for libprep_obj in sample_obj: for seqrun_obj in libprep_obj: src_fastq_files = [os.path.join(src_sample_dir, fastq_file) for fastq_file in seqrun_obj.fastq_files] seqrun_dst_dir = os.path.join(project_obj.base_path, project_obj.dirname, sample_obj.dirname, libprep_obj.dirname, seqrun_obj.dirname) LOG.info("Symlinking fastq files from {} to {}...".format(src_sample_dir, seqrun_dir)) try: do_symlink(src_fastq_files, seqrun_dir) except OSError: error_text = ('Could not symlink files for project/sample' 'libprep/seqrun {}/{}/{}/{}'.format(project_obj, sample_obj, libprep_obj, seqrun_obj)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project_name, sample_name=sample_name, level="ERROR", info_text=error_text) continue return projects_to_analyze
def launch_analysis(projects_to_analyze, restart_failed_jobs=False, restart_finished_jobs=False, restart_running_jobs=False, keep_existing_data=False, no_qc=False, exec_mode="sbatch", quiet=False, manual=False, config=None, config_file_path=None, generate_bqsr_bam=False): """Launch the appropriate analysis for each fastq file in the project. :param list projects_to_analyze: The list of projects (Project objects) to analyze :param dict config: The parsed NGI configuration file; optional/has default. :param str config_file_path: The path to the NGI configuration file; optional/has default. """ charon_session = CharonSession() for project in projects_to_analyze: analysis = NGIAnalysis(project=project, restart_failed_jobs=restart_failed_jobs, restart_finished_jobs=restart_finished_jobs, restart_running_jobs=restart_running_jobs, keep_existing_data=keep_existing_data, no_qc=no_qc, exec_mode=exec_mode, quiet=quiet, manual=manual, config=config, config_file_path=config_file_path, generate_bqsr_bam=generate_bqsr_bam, log=LOG) #update charon with the current analysis status analysis.engine.local_process_tracking.update_charon_with_local_jobs_status( config=config) try: project_status = charon_session.project_get( project.project_id)['status'] except CharonError as e: LOG.error('Project {} could not be processed: {}'.format( project, e)) continue if not project_status == "OPEN": error_text = ( 'Data found on filesystem for project "{}" but Charon ' 'reports its status is not OPEN ("{}"). Not launching ' 'analysis for this project.'.format(project, project_status)) LOG.error(error_text) if not config.get('quiet'): mail_analysis(project_name=project.name, level="ERROR", info_text=error_text) continue try: analysis_module = get_engine_for_bp(project) except (RuntimeError, CharonError) as e: # BPA missing from Charon? LOG.error('Skipping project "{}" because of error: {}'.format( project, e)) continue if not no_qc: try: qc_analysis_module = load_engine_module("qc", config) except RuntimeError as e: LOG.error("Could not launch qc analysis: {}".format(e)) for sample in project: # Launch QC analysis if not no_qc: try: LOG.info('Attempting to launch sample QC analysis ' 'for project "{}" / sample "{}" / engine ' '"{}"'.format(project, sample, qc_analysis_module.__name__)) qc_analysis_module.analyze(project=project, sample=sample, config=config) except Exception as e: error_text = ( 'Cannot process project "{}" / sample "{}" / ' 'engine "{}" : {}'.format(project, sample, analysis_module.__name__, e)) LOG.error(error_text) if not config.get("quiet"): mail_analysis(project_name=project.name, sample_name=sample.name, engine_name=analysis_module.__name__, level="ERROR", info_text=e) # Launch actual best-practice analysis analysis.engine.analyze(analysis)