def main(analysis_name,
         jira_id,
         version,
         args,
         config_filename=None,
         **run_options):

    if config_filename is None:
        config_filename = default_config

    if not templates.JIRA_ID_RE.match(jira_id):
        raise Exception(f'Invalid SC ID: {jira_id}')

    config = file_utils.load_json(config_filename)

    job_subdir = jira_id + run_options['tag']

    run_options['job_subdir'] = job_subdir

    pipeline_dir = os.path.join(
        tantalus_api.get(
            "storage",
            name=config["storages"]["local_results"])["storage_directory"],
        job_subdir)

    scpipeline_dir = os.path.join('singlecelllogs', 'pipeline', job_subdir)
    tmp_dir = os.path.join('singlecelltemp', 'temp', job_subdir)

    log_utils.init_pl_dir(pipeline_dir, run_options['clean'])

    log_file = log_utils.init_log_files(pipeline_dir)
    log_utils.setup_sentinel(run_options['sisyphus_interactive'],
                             os.path.join(pipeline_dir, analysis_name))

    start_automation(
        analysis_name,
        jira_id,
        version,
        args,
        run_options,
        config,
        pipeline_dir,
        scpipeline_dir,
        tmp_dir,
        config['storages'],
        job_subdir,
    )
Exemple #2
0
def main(
        analysis_id,
        config_filename=None,
        reset_status=False,
        **run_options
    ):

    if config_filename is None:
        config_filename = default_config

    analysis = workflows.analysis.base.Analysis.get_by_id(tantalus_api, analysis_id)

    if reset_status:
        analysis.set_error_status()

    if analysis.status == 'complete':
        raise Exception(f'analysis {analysis_id} already complete')

    if analysis.status == 'running':
        raise Exception(f'analysis {analysis_id} already running')

    jira_id = analysis.jira
    analysis_name = analysis.name

    if not templates.JIRA_ID_RE.match(jira_id):
        raise Exception(f'Invalid SC ID: {jira_id}')

    config = file_utils.load_json(config_filename)

    pipeline_dir = os.path.join(config['analysis_directory'], jira_id, analysis_name)

    scpipeline_dir = os.path.join('singlecelllogs', 'pipeline', jira_id)
    tmp_dir = os.path.join('singlecelltemp', 'temp', jira_id)

    log_utils.init_pl_dir(pipeline_dir, run_options['clean'])

    log_file = log_utils.init_log_files(pipeline_dir)
    log_utils.setup_sentinel(run_options['sisyphus_interactive'], os.path.join(pipeline_dir, analysis_name))

    storages = config['storages']

    start = time.time()

    if storages["working_inputs"] != storages["remote_inputs"]:
        log_utils.sentinel(
            'Transferring input datasets from {} to {}'.format(storages["remote_inputs"], storages["working_inputs"]),
            transfer_inputs,
            analysis.get_input_datasets(),
            analysis.get_input_results(),
            storages["remote_inputs"],
            storages["working_inputs"],
        )

    if run_options['inputs_yaml'] is None:
        inputs_yaml = os.path.join(pipeline_dir, 'inputs.yaml')
        log_utils.sentinel(
            'Generating inputs yaml',
            analysis.generate_inputs_yaml,
            storages,
            inputs_yaml,
        )
    else:
        inputs_yaml = run_options['inputs_yaml']

    try:
        analysis.set_run_status()

        dirs = [
            pipeline_dir,
            config['docker_path'],
            config['docker_sock_path'],
        ]
        # Pass all server storages to docker
        for storage_name in storages.values():
            storage = tantalus_api.get('storage', name=storage_name)
            if storage['storage_type'] == 'server':
                dirs.append(storage['storage_directory'])

        if run_options['saltant']:
            context_config_file = config['context_config_file']['saltant']
        else:
            context_config_file = config['context_config_file']['sisyphus']

        log_utils.sentinel(
            f'Running single_cell {analysis_name}',
            analysis.run_pipeline,
            scpipeline_dir=scpipeline_dir,
            tmp_dir=tmp_dir,
            inputs_yaml=inputs_yaml,
            context_config_file=context_config_file,
            docker_env_file=config['docker_env_file'],
            docker_server=config['docker_server'],
            dirs=dirs,
            storages=storages,
            run_options=run_options,
        )

    except Exception:
        analysis.set_error_status()
        raise Exception("pipeline failed")

    output_dataset_ids = log_utils.sentinel(
        'Creating {} output datasets'.format(analysis_name),
        analysis.create_output_datasets,
        storages,
        update=run_options['update'],
    )

    output_results_ids = log_utils.sentinel(
        'Creating {} output results'.format(analysis_name),
        analysis.create_output_results,
        storages,
        update=run_options['update'],
        skip_missing=run_options['skip_missing'],
    )

    if storages["working_inputs"] != storages["remote_inputs"] and output_dataset_ids != []:
        log_utils.sentinel(
            'Transferring input datasets from {} to {}'.format(storages["working_inputs"], storages["remote_inputs"]),
            transfer_inputs,
            output_dataset_ids,
            output_results_ids,
            storages["remote_inputs"],
            storages["working_inputs"],
        )

    log.info("Done!")
    log.info("------ %s hours ------" % ((time.time() - start) / 60 / 60))

    analysis.set_complete_status()

    comment_jira(jira_id, f'finished {analysis_name} analysis')
Exemple #3
0
def main(jira,
         version,
         library_id,
         aligner,
         analysis_type,
         load_only=False,
         gsc_lanes=None,
         brc_flowcell_ids=None,
         config_filename=None,
         **run_options):

    if load_only:
        load_ticket(jira)
        return "complete"

    if config_filename is None:
        config_filename = default_config

    if not templates.JIRA_ID_RE.match(jira):
        raise Exception(f'Invalid SC ID: {jira}')

    aligner_map = {'A': 'BWA_ALN_0_5_7', 'M': 'BWA_MEM_0_7_6A'}

    aligner = aligner_map[aligner]

    # Get reference genome
    library_info = colossus_api.get("library", pool_id=library_id)
    reference_genome = colossus_utils.get_ref_genome(library_info)

    if gsc_lanes is not None:
        gsc_lanes = gsc_lanes.split(',')

    if brc_flowcell_ids is not None:
        brc_flowcell_ids = brc_flowcell_ids.split(',')

    config = file_utils.load_json(config_filename)

    job_subdir = jira + run_options['tag']

    run_options['job_subdir'] = job_subdir

    pipeline_dir = os.path.join(
        tantalus_api.get(
            "storage",
            name=config["storages"]["local_results"])["storage_directory"],
        job_subdir)

    scpipeline_dir = os.path.join('singlecelllogs', 'pipeline', job_subdir)
    tmp_dir = os.path.join('singlecelltemp', 'temp', job_subdir)

    log_utils.init_pl_dir(pipeline_dir, run_options['clean'])

    log_file = log_utils.init_log_files(pipeline_dir)
    log_utils.setup_sentinel(run_options['sisyphus_interactive'],
                             os.path.join(pipeline_dir, analysis_type))

    # Create analysis information object on Colossus
    analysis_info = workflow.models.AnalysisInfo(jira)

    log.info('Library ID: {}'.format(library_id))

    library_id = library_id
    if run_options["is_test_run"]:
        library_id += "TEST"

    args = {}
    args['aligner'] = aligner
    args['ref_genome'] = reference_genome
    args['library_id'] = library_id
    args['gsc_lanes'] = gsc_lanes
    args['brc_flowcell_ids'] = brc_flowcell_ids
    args['smoothing'] = run_options['smoothing']

    start_automation(
        jira,
        version,
        args,
        run_options,
        config,
        pipeline_dir,
        scpipeline_dir,
        tmp_dir,
        config['storages'],
        job_subdir,
        analysis_info,
        analysis_type,
    )
Exemple #4
0
def run(
    analysis_id,
    version,
    jira=None,
    no_download=False,
    config_filename=None,
    data_dir=None,
    runs_dir=None,
    results_dir=None,
    **run_options,
):
    run_options = run_options

    if config_filename is None:
        config_filename = default_config

    config = file_utils.load_json(config_filename)
    storages = config["storages"]

    analysis = tantalus_api.get("analysis", id=analysis_id)

    if analysis["status"] in ("running", "complete"):
        raise Exception(f'analysis {analysis_id} already {analysis["status"]}')

    jira_ticket = analysis["jira_ticket"]
    library_id = analysis["args"]["library_id"]

    # get colossus library
    library = colossus_api.get(
        "tenxlibrary",
        name=library_id,
    )

    log.info("Running {}".format(jira_ticket))
    job_subdir = jira_ticket + run_options['tag']

    # init pipeline dir
    pipeline_dir = os.path.join(
        tantalus_api.get(
            "storage",
            name=config["storages"]["local_results"])["storage_directory"],
        job_subdir,
    )

    log_utils.init_pl_dir(pipeline_dir, run_options['clean'])

    log_file = log_utils.init_log_files(pipeline_dir)
    log_utils.setup_sentinel(run_options['sisyphus_interactive'],
                             os.path.join(pipeline_dir, "tenx"))

    # SCNRA pipeline working directories
    if data_dir is None:
        data_dir = os.path.join("/datadrive", "data")
    if runs_dir is None:
        runs_dir = os.path.join("/datadrive", "runs", library_id)
    if results_dir is None:
        results_dir = os.path.join("/datadrive", "results", library_id)

    reference_dir = os.path.join("/datadrive", "reference")

    if run_options["testing"]:
        ref_genome = "test"

    elif run_options["ref_genome"]:
        ref_genome = run_options["ref_genome"]
        log.info("Default reference genome being overwritten; using {}".format(
            run_options["ref_genome"]))

    else:
        ref_genome = get_ref_genome(library, is_tenx=True)

    args = {}
    args['library_id'] = library_id
    args['ref_genome'] = ref_genome
    args['version'] = version

    analysis_info = TenXAnalysisInfo(
        jira_ticket,
        config['version'],
        run_options,
        library["id"],
    )

    if not no_download:
        download_data(storages["working_inputs"], data_dir, library_id)

    start_automation(
        jira_ticket,
        config['version'],
        args,
        run_options,
        analysis_info,
        data_dir,
        runs_dir,
        reference_dir,
        results_dir,
        storages,
        library["id"],
        analysis_id,
    )
Exemple #5
0
def run_pseudobulk(jira_ticket,
                   version,
                   inputs_tag_name,
                   matched_normal_sample,
                   matched_normal_library,
                   config_filename=None,
                   **run_options):
    if config_filename is None:
        config_filename = default_config

    config = file_utils.load_json(config_filename)

    args = dict(
        inputs_tag_name=inputs_tag_name,
        matched_normal_sample=matched_normal_sample,
        matched_normal_library=matched_normal_library,
    )

    job_subdir = jira_ticket

    run_options['job_subdir'] = job_subdir

    pipeline_dir = os.path.join(
        tantalus_api.get(
            "storage",
            name=config["storages"]["local_results"])["storage_directory"],
        job_subdir)

    results_dir = os.path.join('singlecellresults', 'results', job_subdir)

    scpipeline_dir = os.path.join('singlecelllogs', 'pipeline', job_subdir)

    tmp_dir = os.path.join('singlecelltemp', 'temp', job_subdir)

    log_utils.init_pl_dir(pipeline_dir, run_options['clean'])

    storage_result_prefix = tantalus_api.get_storage_client(
        "singlecellresults").prefix
    destruct_output = os.path.join(storage_result_prefix, jira_ticket,
                                   "results", "destruct")
    lumpy_output = os.path.join(storage_result_prefix, jira_ticket, "results",
                                "lumpy")
    haps_output = os.path.join(storage_result_prefix, jira_ticket, "results",
                               "haps")
    variants_output = os.path.join(storage_result_prefix, jira_ticket,
                                   "results", "variants")

    log_file = log_utils.init_log_files(pipeline_dir)
    log_utils.setup_sentinel(run_options['sisyphus_interactive'], pipeline_dir)

    start_automation(
        jira_ticket,
        version,
        args,
        run_options,
        config,
        pipeline_dir,
        results_dir,
        scpipeline_dir,
        tmp_dir,
        config['storages'],
        job_subdir,
        destruct_output,
        lumpy_output,
        haps_output,
        variants_output,
    )
Exemple #6
0
def main(version, library_id, config_filename=None, **run_options):

    if config_filename is None:
        config_filename = default_config

    log.info(config_filename)
    config = file_utils.load_json(config_filename)

    storages = config["storages"]

    library = colossus_api.get("tenxlibrary", name=library_id)
    sample = library["sample"]["sample_id"]
    library_ticket = library["jira_ticket"]

    # TODO: Move this to tenx automated scripts
    if len(library["analysis_set"]) == 0:
        jira = create_analysis_jira_ticket(library_id, sample, library_ticket)

    else:
        analysis_id = library["analysis_set"][0]
        analysis_object = colossus_api.get("analysis", id=analysis_id)
        jira = analysis_object["jira_ticket"]

    log.info("Running {}".format(jira))
    job_subdir = jira + run_options['tag']

    pipeline_dir = os.path.join(
        tantalus_api.get(
            "storage",
            name=config["storages"]["local_results"])["storage_directory"],
        job_subdir)

    log_utils.init_pl_dir(pipeline_dir, run_options['clean'])

    log_file = log_utils.init_log_files(pipeline_dir)
    log_utils.setup_sentinel(run_options['sisyphus_interactive'],
                             os.path.join(pipeline_dir, "tenx"))

    # SCNRA pipeline working directories
    data_dir = os.path.join("/datadrive", "data", library_id)
    runs_dir = os.path.join("/datadrive", "runs", library_id)
    reference_dir = os.path.join("/datadrive", "reference")
    results_dir = os.path.join("/datadrive", "results", library_id)

    analysis_info = TenXAnalysisInfo(
        jira,
        version,
        library_id,
    )

    if run_options["testing"]:
        ref_genome = "test"

    else:
        ref_genome = get_ref_genome(library, is_tenx=True)

    args = {}
    args['library_id'] = library_id
    args['ref_genome'] = ref_genome
    args['version'] = version

    start_automation(jira, version, args, run_options, analysis_info, data_dir,
                     runs_dir, reference_dir, results_dir, storages)