def start_automation(jira, version, args, run_options, analysis_info, data_dir, runs_dir, reference_dir, results_dir, storages): start = time.time() tantalus_analysis = TenXAnalysis(jira, version, args, run_options, storages=storages, update=run_options["update"]) try: tantalus_analysis.set_run_status() if run_options["skip_pipeline"]: log.info("skipping pipeline") else: log_utils.sentinel( 'Running SCRNA pipeline', tantalus_analysis.run_pipeline, version, data_dir, runs_dir, reference_dir, results_dir, args["library_id"], args["ref_genome"], ) except Exception: tantalus_analysis.set_error_status() raise tantalus_analysis.set_complete_status() output_dataset_ids = log_utils.sentinel( 'Creating output datasets', tantalus_analysis.create_output_datasets, update=run_options['update'], ) output_results_ids = log_utils.sentinel( 'Creating output results', tantalus_analysis.create_output_results, update=run_options['update'], skip_missing=run_options["skip_missing"], ) analysis_info.set_finish_status() # Update Jira ticket if not run_options["is_test_run"]: update_jira_tenx(jira, args) add_report(jira) log.info("Done!") log.info("------ %s hours ------" % ((time.time() - start) / 60 / 60))
def start_automation( analysis_name, jira_id, version, args, run_options, config, pipeline_dir, scpipeline_dir, tmp_dir, storages, job_subdir, ): start = time.time() if analysis_name == 'split_wgs_bam': create_from_args = workflows.analysis.dlp.split_wgs_bam.SplitWGSBamAnalysis.create_from_args elif analysis_name == 'merge_cell_bams': create_from_args = workflows.analysis.dlp.merge_cell_bams.MergeCellBamsAnalysis.create_from_args elif analysis_name == 'variant_calling': create_from_args = workflows.analysis.dlp.variant_calling.VariantCallingAnalysis.create_from_args elif analysis_name == 'breakpoint_calling': create_from_args = workflows.analysis.dlp.breakpoint_calling.BreakpointCallingAnalysis.create_from_args analysis = create_from_args( jira_id, version, args, update=run_options['update'], ) if storages["working_inputs"] != storages["remote_inputs"]: log_utils.sentinel( 'Transferring input datasets from {} to {}'.format( storages["remote_inputs"], storages["working_inputs"]), transfer_inputs, analysis.get_input_datasets(), analysis.get_input_results(), storages["remote_inputs"], storages["working_inputs"], ) if run_options['inputs_yaml'] is None: local_results_storage = tantalus_api.get( 'storage', name=storages['local_results'])['storage_directory'] inputs_yaml = os.path.join(local_results_storage, job_subdir, analysis_name, 'inputs.yaml') log_utils.sentinel( 'Generating inputs yaml', analysis.generate_inputs_yaml, storages, inputs_yaml, ) else: inputs_yaml = run_options['inputs_yaml'] analysis.add_inputs_yaml(inputs_yaml, update=run_options['update']) try: analysis.set_run_status() dirs = [ pipeline_dir, config['docker_path'], config['docker_sock_path'], ] # Pass all server storages to docker for storage_name in storages.values(): storage = tantalus_api.get('storage', name=storage_name) if storage['storage_type'] == 'server': dirs.append(storage['storage_directory']) if run_options['saltant']: context_config_file = config['context_config_file']['saltant'] else: context_config_file = config['context_config_file']['sisyphus'] log_utils.sentinel( f'Running single_cell {analysis_name}', analysis.run_pipeline, scpipeline_dir=scpipeline_dir, tmp_dir=tmp_dir, inputs_yaml=inputs_yaml, context_config_file=context_config_file, docker_env_file=config['docker_env_file'], docker_server=config['docker_server'], dirs=dirs, storages=storages, run_options=run_options, ) except Exception: analysis.set_error_status() raise Exception("pipeline failed") output_dataset_ids = log_utils.sentinel( 'Creating {} output datasets'.format(analysis_name), analysis.create_output_datasets, storages, update=run_options['update'], ) output_results_ids = log_utils.sentinel( 'Creating {} output results'.format(analysis_name), analysis.create_output_results, storages, update=run_options['update'], skip_missing=run_options['skip_missing'], ) if storages["working_inputs"] != storages[ "remote_inputs"] and output_dataset_ids != []: log_utils.sentinel( 'Transferring input datasets from {} to {}'.format( storages["working_inputs"], storages["remote_inputs"]), transfer_inputs, output_dataset_ids, output_results_ids, storages["remote_inputs"], storages["working_inputs"], ) log.info("Done!") log.info("------ %s hours ------" % ((time.time() - start) / 60 / 60)) analysis.set_complete_status() comment_jira(jira_id, f'finished {analysis_name} analysis')
def main( analysis_id, config_filename=None, reset_status=False, **run_options ): if config_filename is None: config_filename = default_config analysis = workflows.analysis.base.Analysis.get_by_id(tantalus_api, analysis_id) if reset_status: analysis.set_error_status() if analysis.status == 'complete': raise Exception(f'analysis {analysis_id} already complete') if analysis.status == 'running': raise Exception(f'analysis {analysis_id} already running') jira_id = analysis.jira analysis_name = analysis.name if not templates.JIRA_ID_RE.match(jira_id): raise Exception(f'Invalid SC ID: {jira_id}') config = file_utils.load_json(config_filename) pipeline_dir = os.path.join(config['analysis_directory'], jira_id, analysis_name) scpipeline_dir = os.path.join('singlecelllogs', 'pipeline', jira_id) tmp_dir = os.path.join('singlecelltemp', 'temp', jira_id) log_utils.init_pl_dir(pipeline_dir, run_options['clean']) log_file = log_utils.init_log_files(pipeline_dir) log_utils.setup_sentinel(run_options['sisyphus_interactive'], os.path.join(pipeline_dir, analysis_name)) storages = config['storages'] start = time.time() if storages["working_inputs"] != storages["remote_inputs"]: log_utils.sentinel( 'Transferring input datasets from {} to {}'.format(storages["remote_inputs"], storages["working_inputs"]), transfer_inputs, analysis.get_input_datasets(), analysis.get_input_results(), storages["remote_inputs"], storages["working_inputs"], ) if run_options['inputs_yaml'] is None: inputs_yaml = os.path.join(pipeline_dir, 'inputs.yaml') log_utils.sentinel( 'Generating inputs yaml', analysis.generate_inputs_yaml, storages, inputs_yaml, ) else: inputs_yaml = run_options['inputs_yaml'] try: analysis.set_run_status() dirs = [ pipeline_dir, config['docker_path'], config['docker_sock_path'], ] # Pass all server storages to docker for storage_name in storages.values(): storage = tantalus_api.get('storage', name=storage_name) if storage['storage_type'] == 'server': dirs.append(storage['storage_directory']) if run_options['saltant']: context_config_file = config['context_config_file']['saltant'] else: context_config_file = config['context_config_file']['sisyphus'] log_utils.sentinel( f'Running single_cell {analysis_name}', analysis.run_pipeline, scpipeline_dir=scpipeline_dir, tmp_dir=tmp_dir, inputs_yaml=inputs_yaml, context_config_file=context_config_file, docker_env_file=config['docker_env_file'], docker_server=config['docker_server'], dirs=dirs, storages=storages, run_options=run_options, ) except Exception: analysis.set_error_status() raise Exception("pipeline failed") output_dataset_ids = log_utils.sentinel( 'Creating {} output datasets'.format(analysis_name), analysis.create_output_datasets, storages, update=run_options['update'], ) output_results_ids = log_utils.sentinel( 'Creating {} output results'.format(analysis_name), analysis.create_output_results, storages, update=run_options['update'], skip_missing=run_options['skip_missing'], ) if storages["working_inputs"] != storages["remote_inputs"] and output_dataset_ids != []: log_utils.sentinel( 'Transferring input datasets from {} to {}'.format(storages["working_inputs"], storages["remote_inputs"]), transfer_inputs, output_dataset_ids, output_results_ids, storages["remote_inputs"], storages["working_inputs"], ) log.info("Done!") log.info("------ %s hours ------" % ((time.time() - start) / 60 / 60)) analysis.set_complete_status() comment_jira(jira_id, f'finished {analysis_name} analysis')
def start_automation( jira, version, args, run_options, config, pipeline_dir, scpipeline_dir, tmp_dir, storages, job_subdir, analysis_info, analysis_type, ): start = time.time() if analysis_type == "align": tantalus_analysis = workflow.models.AlignAnalysis( jira, version, args, storages, run_options, update=run_options['update'], ) elif analysis_type == "hmmcopy": tantalus_analysis = workflow.models.HmmcopyAnalysis( jira, version, args, storages, run_options, update=run_options['update'], ) elif analysis_type == "annotation": tantalus_analysis = workflows.models.AnnotationAnalysis( jira, version, args, storages, run_options, update=run_options['update'], ) elif analysis_type == "split_wgs_bam": tantalus_analysis = workflows.models.SplitWGSBamAnalysis( jira, version, args, storages, run_options, update=run_options['update'], ) else: raise Exception(f"{analysis_type} is not a valid analysis type") if storages["working_inputs"] != storages["remote_inputs"]: log_utils.sentinel( 'Transferring input datasets from {} to {}'.format( storages["remote_inputs"], storages["working_inputs"]), transfer_inputs, tantalus_analysis.get_input_datasets(), tantalus_analysis.get_input_results(), storages["remote_inputs"], storages["working_inputs"], ) if run_options['inputs_yaml'] is None: local_results_storage = tantalus_api.get( 'storage', name=storages['local_results'])['storage_directory'] inputs_yaml = os.path.join(local_results_storage, job_subdir, analysis_type, 'inputs.yaml') log_utils.sentinel( 'Generating inputs yaml', tantalus_analysis.generate_inputs_yaml, inputs_yaml, ) else: inputs_yaml = run_options['inputs_yaml'] tantalus_analysis.add_inputs_yaml(inputs_yaml, update=run_options['update']) try: tantalus_analysis.set_run_status() analysis_info.set_run_status() dirs = [ pipeline_dir, config['docker_path'], config['docker_sock_path'], ] # Pass all server storages to docker for storage_name in storages.values(): storage = tantalus_api.get('storage', name=storage_name) if storage['storage_type'] == 'server': dirs.append(storage['storage_directory']) if run_options['saltant']: context_config_file = config['context_config_file']['saltant'] else: context_config_file = config['context_config_file']['sisyphus'] log_utils.sentinel( f'Running single_cell {analysis_type}', tantalus_analysis.run_pipeline, scpipeline_dir=scpipeline_dir, tmp_dir=tmp_dir, inputs_yaml=inputs_yaml, context_config_file=context_config_file, docker_env_file=config['docker_env_file'], docker_server=config['docker_server'], dirs=dirs, ) except Exception: tantalus_analysis.set_error_status() analysis_info.set_error_status() if analysis_type == "align": analysis_type = "alignment" pipeline_log = os.path.join(scpipeline_dir, analysis_type, "log", "latest", "pipeline.log") if not run_options["skip_pipeline"] or not run_options[ "override_contamination"]: with open(pipeline_log) as f: lines = f.read() if "LibraryContaminationError" in lines: log.error( "LibraryContaminationError: over 20% of cells are contaminated" ) get_contamination_comment(jira) raise Exception("pipeline failed") output_dataset_ids = log_utils.sentinel( 'Creating output datasets', tantalus_analysis.create_output_datasets, update=run_options['update'], ) output_results_ids = log_utils.sentinel( 'Creating {} output results'.format(analysis_type), tantalus_analysis.create_output_results, update=run_options['update'], skip_missing=run_options['skip_missing'], analysis_type=analysis_type, ) if storages["working_inputs"] != storages[ "remote_inputs"] and output_dataset_ids != []: log_utils.sentinel( 'Transferring input datasets from {} to {}'.format( storages["working_inputs"], storages["remote_inputs"]), transfer_inputs, output_dataset_ids, output_results_ids, storages["remote_inputs"], storages["working_inputs"], ) # Update Jira ticket analysis_info.set_finish_status(analysis_type) if analysis_type == "annotation" and not run_options["is_test_run"]: update_jira_dlp(jira, args['aligner']) attach_qc_report(jira, args["library_id"], storages) analysis_info.set_finish_status() log.info("Done!") log.info("------ %s hours ------" % ((time.time() - start) / 60 / 60)) if analysis_type == "annotation": load_ticket(jira) # TODO: confirm with andrew whether to move this down here tantalus_analysis.set_complete_status()
def start_automation( jira_ticket, version, args, run_options, config, pipeline_dir, results_dir, scpipeline_dir, tmp_dir, storages, job_subdir, destruct_output, lumpy_output, haps_output, variants_output, ): start = time.time() analysis_type = 'multi_sample_pseudo_bulk' tantalus_analysis = PseudoBulkAnalysis( jira_ticket, version, args, run_options, storages=storages, update=run_options.get('update', False), ) if storages["working_inputs"] != storages["remote_inputs"]: log_utils.sentinel( 'Transferring input datasets from {} to {}'.format( storages["remote_inputs"], storages["working_inputs"]), transfer_inputs, tantalus_analysis.get_input_datasets(), tantalus_analysis.get_input_results(), storages["remote_inputs"], storages["working_inputs"], ) library_metrics_paths = get_alignment_metrics( storages["working_results"], tantalus_analysis.get_input_datasets(), args["matched_normal_library"], pipeline_dir) local_results_storage = tantalus_api.get( 'storage', name=storages['local_results'])['storage_directory'] inputs_yaml = os.path.join(local_results_storage, job_subdir, 'inputs.yaml') log_utils.sentinel( 'Generating inputs yaml', tantalus_analysis.generate_inputs_yaml, inputs_yaml, library_metrics_paths, ) tantalus_analysis.add_inputs_yaml(inputs_yaml, update=run_options['update']) try: tantalus_analysis.set_run_status() if run_options["skip_pipeline"]: log.info("skipping pipeline") else: log_utils.sentinel( 'Running single_cell {}'.format(analysis_type), tantalus_analysis.run_pipeline, results_dir, pipeline_dir, scpipeline_dir, tmp_dir, inputs_yaml, config, destruct_output, lumpy_output, haps_output, variants_output, ) except Exception: tantalus_analysis.set_error_status() raise tantalus_analysis.set_complete_status() output_dataset_ids = log_utils.sentinel( 'Creating output datasets', tantalus_analysis.create_output_datasets, update=run_options['update'], ) output_results_ids = log_utils.sentinel( 'Creating output results', tantalus_analysis.create_output_results, update=run_options['update'], skip_missing=run_options['skip_missing'], analysis_type="pseudobulk") if storages["working_inputs"] != storages[ "remote_inputs"] and output_datasets_ids != []: log_utils.sentinel( 'Transferring input datasets from {} to {}'.format( storages["working_inputs"], storages["remote_inputs"]), transfer_inputs, output_dataset_ids, output_results_ids, storages["remote_inputs"], storages["working_inputs"], ) log.info("Done!") log.info("------ %s hours ------" % ((time.time() - start) / 60 / 60))