def analyze(analysis_object, config=None, config_file_path=None): charon_session = CharonSession() charon_pj=charon_session.project_get(analysis_object.project.project_id) reference_genome=charon_pj.get('reference') if charon_pj.get("sequencing_facility") == "NGI-S": analysis_object.sequencing_facility="sthlm" elif charon_pj.get("sequencing_facility") == "NGI-U": analysis_object.sequencing_facility="upps" else: LOG.error("charon project not registered with stockholm or uppsala. Which config file should we use for the RNA pipeline ?") raise RuntimeError fastq_files=[] if reference_genome and reference_genome != 'other': for sample in analysis_object.project: try: charon_reported_status = charon_session.sample_get(analysis_object.project.project_id, sample).get('analysis_status') # Check Charon to ensure this hasn't already been processed do_analyze=handle_sample_status(analysis_object, sample, charon_reported_status) if not do_analyze : continue except CharonError as e: LOG.error(e) for libprep in sample: charon_lp_status=charon_session.libprep_get(analysis_object.project.project_id, sample.name, libprep.name).get('qc') do_analyze=handle_libprep_status(analysis_object, libprep, charon_lp_status) if not do_analyze : continue else: for seqrun in libprep: charon_sr_status=charon_session.seqrun_get(analysis_object.project.project_id, sample.name, libprep.name, seqrun.name).get('alignment_status') do_analyze=handle_seqrun_status(analysis_object, seqrun, charon_sr_status) if not do_analyze : continue else: seqrun.being_analyzed=True sample.being_analyzed = sample.being_analyzed or True # filter out index files from analysis for fastq_file in filter(lambda f: not is_index_file(f), seqrun.fastq_files): fastq_path=os.path.join(analysis_object.project.base_path, "DATA", analysis_object.project.project_id, sample.name, libprep.name, seqrun.name, fastq_file) fastq_files.append(fastq_path) if not fastq_files: LOG.error("No fastq files obtained for the analysis fo project {}, please check the Charon status.".format(analysis_object.project.name)) else : if analysis_object.restart_running_jobs: stop_ongoing_analysis(analysis_object) fastq_dir=preprocess_analysis(analysis_object, fastq_files) sbatch_path=write_batch_job(analysis_object, reference_genome, fastq_dir) job_id=start_analysis(sbatch_path) analysis_path=os.path.join(analysis_object.project.base_path, "ANALYSIS", analysis_object.project.project_id, 'rna_ngi') record_project_job(analysis_object.project, job_id, analysis_path)
def analyze(analysis_object, level='sample', config=None, config_file_path=None): """Analyze data at the sample level. :param NGIAnalysis analysis_object: holds all the parameters for the analysis :raises ValueError: If exec_mode is an unsupported value """ charon_session = CharonSession() for sample in analysis_object.project: try: charon_reported_status = charon_session.sample_get( analysis_object.project.project_id, sample).get('analysis_status') # Check Charon to ensure this hasn't already been processed do_analyze = handle_sample_status(analysis_object, sample, charon_reported_status) if not do_analyze: continue except CharonError as e: LOG.error(e) continue if level == "sample": status_field = "alignment_status" elif level == "genotype": status_field = "genotype_status" else: LOG.warn('Unknown workflow level: "{}"'.format(level)) status_field = "alignment_status" # Or should we abort? try: check_for_preexisting_sample_runs( analysis_object.project, sample, analysis_object.restart_running_jobs, analysis_object.restart_finished_jobs, status_field) except RuntimeError as e: raise RuntimeError( 'Aborting processing of project/sample "{}/{}": ' '{}'.format(analysis_object.project, sample, e)) if analysis_object.exec_mode.lower() not in ("sbatch", "local"): raise ValueError( '"exec_mode" param must be one of "sbatch" or "local" ' 'value was "{}"'.format(analysis_object.exec_mode)) if analysis_object.exec_mode == "local": modules_to_load = analysis_object.config.get("piper", {}).get( "load_modules", []) load_modules(modules_to_load) for workflow_subtask in workflows.get_subtasks_for_level(level=level): if level == "genotype": genotype_status = None # Some records in Charon lack this field, I'm guessing try: charon_session = CharonSession() genotype_status = charon_session.sample_get( projectid=analysis_object.project.project_id, sampleid=sample.name).get("genotype_status") except CharonError as e: LOG.error( 'Couldn\'t determine genotyping status for project/' 'sample "{}/{}"; skipping analysis.'.format( analysis_object.project, sample)) continue if find_previous_genotype_analyses( analysis_object.project, sample) or genotype_status == "DONE": if not analysis_object.restart_finished_jobs: LOG.info( 'Project/sample "{}/{}" has completed genotype ' 'analysis previously; skipping (use flag to force ' 'analysis)'.format(analysis_object.project, sample)) continue if analysis_object.restart_running_jobs: # Kill currently-running jobs if they exist kill_running_sample_analysis( workflow_subtask=workflow_subtask, project_id=analysis_object.project.project_id, sample_id=sample.name) # This checks the local jobs database if not is_sample_analysis_running_local( workflow_subtask=workflow_subtask, project_id=analysis_object.project.project_id, sample_id=sample.name): LOG.info('Launching "{}" analysis for sample "{}" in project ' '"{}"'.format(workflow_subtask, sample, analysis_object.project)) try: log_file_path = create_log_file_path( workflow_subtask=workflow_subtask, project_base_path=analysis_object.project.base_path, project_name=analysis_object.project.dirname, project_id=analysis_object.project.project_id, sample_id=sample.name) rotate_file(log_file_path) exit_code_path = create_exit_code_file_path( workflow_subtask=workflow_subtask, project_base_path=analysis_object.project.base_path, project_name=analysis_object.project.dirname, project_id=analysis_object.project.project_id, sample_id=sample.name) if level == "sample": if not analysis_object.keep_existing_data: remove_previous_sample_analyses( analysis_object.project, sample) default_files_to_copy = None elif level == "genotype": if not analysis_object.keep_existing_data: remove_previous_genotype_analyses( analysis_object.project) default_files_to_copy = None # Update the project to keep only valid fastq files for setup.xml creation if level == "genotype": updated_project, default_files_to_copy = \ collect_files_for_sample_analysis(analysis_object.project, sample, restart_finished_jobs=True, status_field="genotype_status") else: updated_project, default_files_to_copy = \ collect_files_for_sample_analysis(analysis_object.project, sample, analysis_object.restart_finished_jobs, status_field="alignment_status") setup_xml_cl, setup_xml_path = build_setup_xml( project=updated_project, sample=sample, workflow=workflow_subtask, local_scratch_mode=( analysis_object.exec_mode == "sbatch"), config=analysis_object.config) piper_cl = build_piper_cl( project=analysis_object.project, workflow_name=workflow_subtask, setup_xml_path=setup_xml_path, exit_code_path=exit_code_path, config=analysis_object.config, exec_mode=analysis_object.exec_mode, generate_bqsr_bam=analysis_object.generate_bqsr_bam) if analysis_object.exec_mode == "sbatch": process_id = None slurm_job_id = sbatch_piper_sample( [setup_xml_cl, piper_cl], workflow_subtask, analysis_object.project, sample, restart_finished_jobs=analysis_object. restart_finished_jobs, files_to_copy=default_files_to_copy) for x in xrange(10): # Time delay to let sbatch get its act together # (takes a few seconds to be visible with sacct) try: get_slurm_job_status(slurm_job_id) break except ValueError: time.sleep(2) else: LOG.error('sbatch file for sample {}/{} did not ' 'queue properly! Job ID {} cannot be ' 'found.'.format(analysis_object.project, sample, slurm_job_id)) else: # "local" raise NotImplementedError( 'Local execution not currently implemented. ' 'I\'m sure Denis can help you with this.') #slurm_job_id = None #launch_piper_job(setup_xml_cl, project) #process_handle = launch_piper_job(piper_cl, project) #process_id = process_handle.pid try: record_process_sample( project=analysis_object.project, sample=sample, analysis_module_name="piper_ngi", slurm_job_id=slurm_job_id, process_id=process_id, workflow_subtask=workflow_subtask) except RuntimeError as e: LOG.error(e) ## Question: should we just kill the run in this case or let it go? continue except (NotImplementedError, RuntimeError, ValueError) as e: error_msg = ( 'Processing project "{}" / sample "{}" / workflow "{}" ' 'failed: {}'.format(analysis_object.project, sample, workflow_subtask, e)) LOG.error(error_msg)
def analyze(analysis_object, config=None, config_file_path=None): charon_session = CharonSession() charon_pj = charon_session.project_get(analysis_object.project.project_id) reference_genome = charon_pj.get('reference') if charon_pj.get("sequencing_facility") == "NGI-S": analysis_object.sequencing_facility = "sthlm" elif charon_pj.get("sequencing_facility") == "NGI-U": analysis_object.sequencing_facility = "upps" else: LOG.error( "charon project not registered with stockholm or uppsala. Which config file should we use for the RNA pipeline ?" ) raise RuntimeError fastq_files = [] if reference_genome and reference_genome != 'other': for sample in analysis_object.project: try: charon_reported_status = charon_session.sample_get( analysis_object.project.project_id, sample).get('analysis_status') # Check Charon to ensure this hasn't already been processed do_analyze = handle_sample_status(analysis_object, sample, charon_reported_status) if not do_analyze: continue except CharonError as e: LOG.error(e) for libprep in sample: charon_lp_status = charon_session.libprep_get( analysis_object.project.project_id, sample.name, libprep.name).get('qc') do_analyze = handle_libprep_status(analysis_object, libprep, charon_lp_status) if not do_analyze: continue else: for seqrun in libprep: charon_sr_status = charon_session.seqrun_get( analysis_object.project.project_id, sample.name, libprep.name, seqrun.name).get('alignment_status') do_analyze = handle_seqrun_status( analysis_object, seqrun, charon_sr_status) if not do_analyze: continue else: seqrun.being_analyzed = True sample.being_analyzed = sample.being_analyzed or True for fastq_file in seqrun.fastq_files: fastq_path = os.path.join( analysis_object.project.base_path, "DATA", analysis_object.project.project_id, sample.name, libprep.name, seqrun.name, fastq_file) fastq_files.append(fastq_path) if not fastq_files: LOG.error( "No fastq files obtained for the analysis fo project {}, please check the Charon status." .format(analysis_object.project.name)) else: if analysis_object.restart_running_jobs: stop_ongoing_analysis(analysis_object) fastq_dir = preprocess_analysis(analysis_object, fastq_files) sbatch_path = write_batch_job(analysis_object, reference_genome, fastq_dir) job_id = start_analysis(sbatch_path) analysis_path = os.path.join(analysis_object.project.base_path, "ANALYSIS", analysis_object.project.project_id, 'rna_ngi') record_project_job(analysis_object.project, job_id, analysis_path)
def analyze(analysis_object, level='sample', config=None, config_file_path=None): """Analyze data at the sample level. :param NGIAnalysis analysis_object: holds all the parameters for the analysis :raises ValueError: If exec_mode is an unsupported value """ charon_session = CharonSession() for sample in analysis_object.project: try: charon_reported_status = charon_session.sample_get(analysis_object.project.project_id, sample).get('analysis_status') # Check Charon to ensure this hasn't already been processed do_analyze=handle_sample_status(analysis_object, sample, charon_reported_status) if not do_analyze : continue except CharonError as e: LOG.error(e) continue if level == "sample": status_field = "alignment_status" elif level == "genotype": status_field = "genotype_status" else: LOG.warning('Unknown workflow level: "{}"'.format(level)) status_field = "alignment_status" # Or should we abort? try: check_for_preexisting_sample_runs(analysis_object.project, sample, analysis_object.restart_running_jobs, analysis_object.restart_finished_jobs, status_field) except RuntimeError as e: raise RuntimeError('Aborting processing of project/sample "{}/{}": ' '{}'.format(analysis_object.project, sample, e)) if analysis_object.exec_mode.lower() not in ("sbatch", "local"): raise ValueError('"exec_mode" param must be one of "sbatch" or "local" ' 'value was "{}"'.format(analysis_object.exec_mode)) if analysis_object.exec_mode == "local": modules_to_load = analysis_object.config.get("piper", {}).get("load_modules", []) load_modules(modules_to_load) for workflow_subtask in workflows.get_subtasks_for_level(level=level): if level == "genotype": genotype_status = None # Some records in Charon lack this field, I'm guessing try: charon_session = CharonSession() genotype_status = charon_session.sample_get(projectid=analysis_object.project.project_id, sampleid=sample.name).get("genotype_status") except CharonError as e: LOG.error('Couldn\'t determine genotyping status for project/' 'sample "{}/{}"; skipping analysis.'.format(analysis_object.project, sample)) continue if find_previous_genotype_analyses(analysis_object.project, sample) or genotype_status == "DONE": if not analysis_object.restart_finished_jobs: LOG.info('Project/sample "{}/{}" has completed genotype ' 'analysis previously; skipping (use flag to force ' 'analysis)'.format(analysis_object.project, sample)) continue if analysis_object.restart_running_jobs: # Kill currently-running jobs if they exist kill_running_sample_analysis(workflow_subtask=workflow_subtask, project_id=analysis_object.project.project_id, sample_id=sample.name) # This checks the local jobs database if not is_sample_analysis_running_local(workflow_subtask=workflow_subtask, project_id=analysis_object.project.project_id, sample_id=sample.name): LOG.info('Launching "{}" analysis for sample "{}" in project ' '"{}"'.format(workflow_subtask, sample, analysis_object.project)) try: log_file_path = create_log_file_path(workflow_subtask=workflow_subtask, project_base_path=analysis_object.project.base_path, project_name=analysis_object.project.dirname, project_id=analysis_object.project.project_id, sample_id=sample.name) rotate_file(log_file_path) exit_code_path = create_exit_code_file_path(workflow_subtask=workflow_subtask, project_base_path=analysis_object.project.base_path, project_name=analysis_object.project.dirname, project_id=analysis_object.project.project_id, sample_id=sample.name) if level == "sample": if not analysis_object.keep_existing_data: remove_previous_sample_analyses(analysis_object.project, sample) default_files_to_copy=None elif level == "genotype": if not analysis_object.keep_existing_data: remove_previous_genotype_analyses(analysis_object.project) default_files_to_copy=None # Update the project to keep only valid fastq files for setup.xml creation if level == "genotype": updated_project, default_files_to_copy = \ collect_files_for_sample_analysis(analysis_object.project, sample, restart_finished_jobs=True, status_field="genotype_status") else: updated_project, default_files_to_copy = \ collect_files_for_sample_analysis(analysis_object.project, sample, analysis_object.restart_finished_jobs, status_field="alignment_status") setup_xml_cl, setup_xml_path = build_setup_xml(project=updated_project, sample=sample, workflow=workflow_subtask, local_scratch_mode=(analysis_object.exec_mode == "sbatch"), config=analysis_object.config) piper_cl = build_piper_cl(project=analysis_object.project, workflow_name=workflow_subtask, setup_xml_path=setup_xml_path, exit_code_path=exit_code_path, config=analysis_object.config, exec_mode=analysis_object.exec_mode, generate_bqsr_bam=analysis_object.generate_bqsr_bam) if analysis_object.exec_mode == "sbatch": process_id = None slurm_job_id = sbatch_piper_sample([setup_xml_cl, piper_cl], workflow_subtask, analysis_object.project, sample, restart_finished_jobs=analysis_object.restart_finished_jobs, files_to_copy=default_files_to_copy) for x in xrange(10): # Time delay to let sbatch get its act together # (takes a few seconds to be visible with sacct) try: get_slurm_job_status(slurm_job_id) break except ValueError: time.sleep(2) else: LOG.error('sbatch file for sample {}/{} did not ' 'queue properly! Job ID {} cannot be ' 'found.'.format(analysis_object.project, sample, slurm_job_id)) else: # "local" raise NotImplementedError('Local execution not currently implemented. ' 'I\'m sure Denis can help you with this.') #slurm_job_id = None #launch_piper_job(setup_xml_cl, project) #process_handle = launch_piper_job(piper_cl, project) #process_id = process_handle.pid try: record_process_sample(project=analysis_object.project, sample=sample, analysis_module_name="piper_ngi", slurm_job_id=slurm_job_id, process_id=process_id, workflow_subtask=workflow_subtask) except RuntimeError as e: LOG.error(e) ## Question: should we just kill the run in this case or let it go? continue except (NotImplementedError, RuntimeError, ValueError) as e: error_msg = ('Processing project "{}" / sample "{}" / workflow "{}" ' 'failed: {}'.format(analysis_object.project, sample, workflow_subtask, e)) LOG.error(error_msg)