Exemple #1
0
def start_automation(jira, version, args, run_options, analysis_info, data_dir,
                     runs_dir, reference_dir, results_dir, storages):

    start = time.time()
    tantalus_analysis = TenXAnalysis(jira,
                                     version,
                                     args,
                                     run_options,
                                     storages=storages,
                                     update=run_options["update"])

    try:
        tantalus_analysis.set_run_status()

        if run_options["skip_pipeline"]:
            log.info("skipping pipeline")

        else:
            log_utils.sentinel(
                'Running SCRNA pipeline',
                tantalus_analysis.run_pipeline,
                version,
                data_dir,
                runs_dir,
                reference_dir,
                results_dir,
                args["library_id"],
                args["ref_genome"],
            )

    except Exception:
        tantalus_analysis.set_error_status()
        raise

    tantalus_analysis.set_complete_status()

    output_dataset_ids = log_utils.sentinel(
        'Creating output datasets',
        tantalus_analysis.create_output_datasets,
        update=run_options['update'],
    )

    output_results_ids = log_utils.sentinel(
        'Creating output results',
        tantalus_analysis.create_output_results,
        update=run_options['update'],
        skip_missing=run_options["skip_missing"],
    )

    analysis_info.set_finish_status()

    # Update Jira ticket
    if not run_options["is_test_run"]:
        update_jira_tenx(jira, args)

    add_report(jira)

    log.info("Done!")
    log.info("------ %s hours ------" % ((time.time() - start) / 60 / 60))
def start_automation(
    analysis_name,
    jira_id,
    version,
    args,
    run_options,
    config,
    pipeline_dir,
    scpipeline_dir,
    tmp_dir,
    storages,
    job_subdir,
):
    start = time.time()

    if analysis_name == 'split_wgs_bam':
        create_from_args = workflows.analysis.dlp.split_wgs_bam.SplitWGSBamAnalysis.create_from_args
    elif analysis_name == 'merge_cell_bams':
        create_from_args = workflows.analysis.dlp.merge_cell_bams.MergeCellBamsAnalysis.create_from_args
    elif analysis_name == 'variant_calling':
        create_from_args = workflows.analysis.dlp.variant_calling.VariantCallingAnalysis.create_from_args
    elif analysis_name == 'breakpoint_calling':
        create_from_args = workflows.analysis.dlp.breakpoint_calling.BreakpointCallingAnalysis.create_from_args

    analysis = create_from_args(
        jira_id,
        version,
        args,
        update=run_options['update'],
    )

    if storages["working_inputs"] != storages["remote_inputs"]:
        log_utils.sentinel(
            'Transferring input datasets from {} to {}'.format(
                storages["remote_inputs"], storages["working_inputs"]),
            transfer_inputs,
            analysis.get_input_datasets(),
            analysis.get_input_results(),
            storages["remote_inputs"],
            storages["working_inputs"],
        )

    if run_options['inputs_yaml'] is None:
        local_results_storage = tantalus_api.get(
            'storage', name=storages['local_results'])['storage_directory']

        inputs_yaml = os.path.join(local_results_storage, job_subdir,
                                   analysis_name, 'inputs.yaml')
        log_utils.sentinel(
            'Generating inputs yaml',
            analysis.generate_inputs_yaml,
            storages,
            inputs_yaml,
        )
    else:
        inputs_yaml = run_options['inputs_yaml']

    analysis.add_inputs_yaml(inputs_yaml, update=run_options['update'])

    try:
        analysis.set_run_status()

        dirs = [
            pipeline_dir,
            config['docker_path'],
            config['docker_sock_path'],
        ]
        # Pass all server storages to docker
        for storage_name in storages.values():
            storage = tantalus_api.get('storage', name=storage_name)
            if storage['storage_type'] == 'server':
                dirs.append(storage['storage_directory'])

        if run_options['saltant']:
            context_config_file = config['context_config_file']['saltant']
        else:
            context_config_file = config['context_config_file']['sisyphus']

        log_utils.sentinel(
            f'Running single_cell {analysis_name}',
            analysis.run_pipeline,
            scpipeline_dir=scpipeline_dir,
            tmp_dir=tmp_dir,
            inputs_yaml=inputs_yaml,
            context_config_file=context_config_file,
            docker_env_file=config['docker_env_file'],
            docker_server=config['docker_server'],
            dirs=dirs,
            storages=storages,
            run_options=run_options,
        )

    except Exception:
        analysis.set_error_status()
        raise Exception("pipeline failed")

    output_dataset_ids = log_utils.sentinel(
        'Creating {} output datasets'.format(analysis_name),
        analysis.create_output_datasets,
        storages,
        update=run_options['update'],
    )

    output_results_ids = log_utils.sentinel(
        'Creating {} output results'.format(analysis_name),
        analysis.create_output_results,
        storages,
        update=run_options['update'],
        skip_missing=run_options['skip_missing'],
    )

    if storages["working_inputs"] != storages[
            "remote_inputs"] and output_dataset_ids != []:
        log_utils.sentinel(
            'Transferring input datasets from {} to {}'.format(
                storages["working_inputs"], storages["remote_inputs"]),
            transfer_inputs,
            output_dataset_ids,
            output_results_ids,
            storages["remote_inputs"],
            storages["working_inputs"],
        )

    log.info("Done!")
    log.info("------ %s hours ------" % ((time.time() - start) / 60 / 60))

    analysis.set_complete_status()

    comment_jira(jira_id, f'finished {analysis_name} analysis')
Exemple #3
0
def main(
        analysis_id,
        config_filename=None,
        reset_status=False,
        **run_options
    ):

    if config_filename is None:
        config_filename = default_config

    analysis = workflows.analysis.base.Analysis.get_by_id(tantalus_api, analysis_id)

    if reset_status:
        analysis.set_error_status()

    if analysis.status == 'complete':
        raise Exception(f'analysis {analysis_id} already complete')

    if analysis.status == 'running':
        raise Exception(f'analysis {analysis_id} already running')

    jira_id = analysis.jira
    analysis_name = analysis.name

    if not templates.JIRA_ID_RE.match(jira_id):
        raise Exception(f'Invalid SC ID: {jira_id}')

    config = file_utils.load_json(config_filename)

    pipeline_dir = os.path.join(config['analysis_directory'], jira_id, analysis_name)

    scpipeline_dir = os.path.join('singlecelllogs', 'pipeline', jira_id)
    tmp_dir = os.path.join('singlecelltemp', 'temp', jira_id)

    log_utils.init_pl_dir(pipeline_dir, run_options['clean'])

    log_file = log_utils.init_log_files(pipeline_dir)
    log_utils.setup_sentinel(run_options['sisyphus_interactive'], os.path.join(pipeline_dir, analysis_name))

    storages = config['storages']

    start = time.time()

    if storages["working_inputs"] != storages["remote_inputs"]:
        log_utils.sentinel(
            'Transferring input datasets from {} to {}'.format(storages["remote_inputs"], storages["working_inputs"]),
            transfer_inputs,
            analysis.get_input_datasets(),
            analysis.get_input_results(),
            storages["remote_inputs"],
            storages["working_inputs"],
        )

    if run_options['inputs_yaml'] is None:
        inputs_yaml = os.path.join(pipeline_dir, 'inputs.yaml')
        log_utils.sentinel(
            'Generating inputs yaml',
            analysis.generate_inputs_yaml,
            storages,
            inputs_yaml,
        )
    else:
        inputs_yaml = run_options['inputs_yaml']

    try:
        analysis.set_run_status()

        dirs = [
            pipeline_dir,
            config['docker_path'],
            config['docker_sock_path'],
        ]
        # Pass all server storages to docker
        for storage_name in storages.values():
            storage = tantalus_api.get('storage', name=storage_name)
            if storage['storage_type'] == 'server':
                dirs.append(storage['storage_directory'])

        if run_options['saltant']:
            context_config_file = config['context_config_file']['saltant']
        else:
            context_config_file = config['context_config_file']['sisyphus']

        log_utils.sentinel(
            f'Running single_cell {analysis_name}',
            analysis.run_pipeline,
            scpipeline_dir=scpipeline_dir,
            tmp_dir=tmp_dir,
            inputs_yaml=inputs_yaml,
            context_config_file=context_config_file,
            docker_env_file=config['docker_env_file'],
            docker_server=config['docker_server'],
            dirs=dirs,
            storages=storages,
            run_options=run_options,
        )

    except Exception:
        analysis.set_error_status()
        raise Exception("pipeline failed")

    output_dataset_ids = log_utils.sentinel(
        'Creating {} output datasets'.format(analysis_name),
        analysis.create_output_datasets,
        storages,
        update=run_options['update'],
    )

    output_results_ids = log_utils.sentinel(
        'Creating {} output results'.format(analysis_name),
        analysis.create_output_results,
        storages,
        update=run_options['update'],
        skip_missing=run_options['skip_missing'],
    )

    if storages["working_inputs"] != storages["remote_inputs"] and output_dataset_ids != []:
        log_utils.sentinel(
            'Transferring input datasets from {} to {}'.format(storages["working_inputs"], storages["remote_inputs"]),
            transfer_inputs,
            output_dataset_ids,
            output_results_ids,
            storages["remote_inputs"],
            storages["working_inputs"],
        )

    log.info("Done!")
    log.info("------ %s hours ------" % ((time.time() - start) / 60 / 60))

    analysis.set_complete_status()

    comment_jira(jira_id, f'finished {analysis_name} analysis')
Exemple #4
0
def start_automation(
    jira,
    version,
    args,
    run_options,
    config,
    pipeline_dir,
    scpipeline_dir,
    tmp_dir,
    storages,
    job_subdir,
    analysis_info,
    analysis_type,
):
    start = time.time()

    if analysis_type == "align":
        tantalus_analysis = workflow.models.AlignAnalysis(
            jira,
            version,
            args,
            storages,
            run_options,
            update=run_options['update'],
        )
    elif analysis_type == "hmmcopy":
        tantalus_analysis = workflow.models.HmmcopyAnalysis(
            jira,
            version,
            args,
            storages,
            run_options,
            update=run_options['update'],
        )
    elif analysis_type == "annotation":
        tantalus_analysis = workflows.models.AnnotationAnalysis(
            jira,
            version,
            args,
            storages,
            run_options,
            update=run_options['update'],
        )
    elif analysis_type == "split_wgs_bam":
        tantalus_analysis = workflows.models.SplitWGSBamAnalysis(
            jira,
            version,
            args,
            storages,
            run_options,
            update=run_options['update'],
        )
    else:
        raise Exception(f"{analysis_type} is not a valid analysis type")

    if storages["working_inputs"] != storages["remote_inputs"]:
        log_utils.sentinel(
            'Transferring input datasets from {} to {}'.format(
                storages["remote_inputs"], storages["working_inputs"]),
            transfer_inputs,
            tantalus_analysis.get_input_datasets(),
            tantalus_analysis.get_input_results(),
            storages["remote_inputs"],
            storages["working_inputs"],
        )

    if run_options['inputs_yaml'] is None:
        local_results_storage = tantalus_api.get(
            'storage', name=storages['local_results'])['storage_directory']

        inputs_yaml = os.path.join(local_results_storage, job_subdir,
                                   analysis_type, 'inputs.yaml')
        log_utils.sentinel(
            'Generating inputs yaml',
            tantalus_analysis.generate_inputs_yaml,
            inputs_yaml,
        )
    else:
        inputs_yaml = run_options['inputs_yaml']

    tantalus_analysis.add_inputs_yaml(inputs_yaml,
                                      update=run_options['update'])

    try:
        tantalus_analysis.set_run_status()
        analysis_info.set_run_status()

        dirs = [
            pipeline_dir,
            config['docker_path'],
            config['docker_sock_path'],
        ]
        # Pass all server storages to docker
        for storage_name in storages.values():
            storage = tantalus_api.get('storage', name=storage_name)
            if storage['storage_type'] == 'server':
                dirs.append(storage['storage_directory'])

        if run_options['saltant']:
            context_config_file = config['context_config_file']['saltant']
        else:
            context_config_file = config['context_config_file']['sisyphus']

        log_utils.sentinel(
            f'Running single_cell {analysis_type}',
            tantalus_analysis.run_pipeline,
            scpipeline_dir=scpipeline_dir,
            tmp_dir=tmp_dir,
            inputs_yaml=inputs_yaml,
            context_config_file=context_config_file,
            docker_env_file=config['docker_env_file'],
            docker_server=config['docker_server'],
            dirs=dirs,
        )

    except Exception:
        tantalus_analysis.set_error_status()
        analysis_info.set_error_status()
        if analysis_type == "align":
            analysis_type = "alignment"
        pipeline_log = os.path.join(scpipeline_dir, analysis_type, "log",
                                    "latest", "pipeline.log")

        if not run_options["skip_pipeline"] or not run_options[
                "override_contamination"]:
            with open(pipeline_log) as f:
                lines = f.read()
                if "LibraryContaminationError" in lines:
                    log.error(
                        "LibraryContaminationError: over 20% of cells are contaminated"
                    )

                    get_contamination_comment(jira)

        raise Exception("pipeline failed")

    output_dataset_ids = log_utils.sentinel(
        'Creating output datasets',
        tantalus_analysis.create_output_datasets,
        update=run_options['update'],
    )

    output_results_ids = log_utils.sentinel(
        'Creating {} output results'.format(analysis_type),
        tantalus_analysis.create_output_results,
        update=run_options['update'],
        skip_missing=run_options['skip_missing'],
        analysis_type=analysis_type,
    )

    if storages["working_inputs"] != storages[
            "remote_inputs"] and output_dataset_ids != []:
        log_utils.sentinel(
            'Transferring input datasets from {} to {}'.format(
                storages["working_inputs"], storages["remote_inputs"]),
            transfer_inputs,
            output_dataset_ids,
            output_results_ids,
            storages["remote_inputs"],
            storages["working_inputs"],
        )

    # Update Jira ticket
    analysis_info.set_finish_status(analysis_type)
    if analysis_type == "annotation" and not run_options["is_test_run"]:
        update_jira_dlp(jira, args['aligner'])
        attach_qc_report(jira, args["library_id"], storages)
        analysis_info.set_finish_status()

    log.info("Done!")
    log.info("------ %s hours ------" % ((time.time() - start) / 60 / 60))

    if analysis_type == "annotation":
        load_ticket(jira)

    # TODO: confirm with andrew whether to move this down here
    tantalus_analysis.set_complete_status()
Exemple #5
0
def start_automation(
    jira_ticket,
    version,
    args,
    run_options,
    config,
    pipeline_dir,
    results_dir,
    scpipeline_dir,
    tmp_dir,
    storages,
    job_subdir,
    destruct_output,
    lumpy_output,
    haps_output,
    variants_output,
):
    start = time.time()

    analysis_type = 'multi_sample_pseudo_bulk'

    tantalus_analysis = PseudoBulkAnalysis(
        jira_ticket,
        version,
        args,
        run_options,
        storages=storages,
        update=run_options.get('update', False),
    )

    if storages["working_inputs"] != storages["remote_inputs"]:
        log_utils.sentinel(
            'Transferring input datasets from {} to {}'.format(
                storages["remote_inputs"], storages["working_inputs"]),
            transfer_inputs,
            tantalus_analysis.get_input_datasets(),
            tantalus_analysis.get_input_results(),
            storages["remote_inputs"],
            storages["working_inputs"],
        )

    library_metrics_paths = get_alignment_metrics(
        storages["working_results"], tantalus_analysis.get_input_datasets(),
        args["matched_normal_library"], pipeline_dir)

    local_results_storage = tantalus_api.get(
        'storage', name=storages['local_results'])['storage_directory']

    inputs_yaml = os.path.join(local_results_storage, job_subdir,
                               'inputs.yaml')
    log_utils.sentinel(
        'Generating inputs yaml',
        tantalus_analysis.generate_inputs_yaml,
        inputs_yaml,
        library_metrics_paths,
    )

    tantalus_analysis.add_inputs_yaml(inputs_yaml,
                                      update=run_options['update'])

    try:
        tantalus_analysis.set_run_status()

        if run_options["skip_pipeline"]:
            log.info("skipping pipeline")

        else:
            log_utils.sentinel(
                'Running single_cell {}'.format(analysis_type),
                tantalus_analysis.run_pipeline,
                results_dir,
                pipeline_dir,
                scpipeline_dir,
                tmp_dir,
                inputs_yaml,
                config,
                destruct_output,
                lumpy_output,
                haps_output,
                variants_output,
            )

    except Exception:
        tantalus_analysis.set_error_status()
        raise

    tantalus_analysis.set_complete_status()

    output_dataset_ids = log_utils.sentinel(
        'Creating output datasets',
        tantalus_analysis.create_output_datasets,
        update=run_options['update'],
    )

    output_results_ids = log_utils.sentinel(
        'Creating output results',
        tantalus_analysis.create_output_results,
        update=run_options['update'],
        skip_missing=run_options['skip_missing'],
        analysis_type="pseudobulk")

    if storages["working_inputs"] != storages[
            "remote_inputs"] and output_datasets_ids != []:
        log_utils.sentinel(
            'Transferring input datasets from {} to {}'.format(
                storages["working_inputs"], storages["remote_inputs"]),
            transfer_inputs,
            output_dataset_ids,
            output_results_ids,
            storages["remote_inputs"],
            storages["working_inputs"],
        )

    log.info("Done!")
    log.info("------ %s hours ------" % ((time.time() - start) / 60 / 60))