Esempio n. 1
0
def test_workflow_tumor_only(tumor_only_config):
    # GIVEN a sample config dict and snakefile
    workflow = 'single'
    snakefile = get_snakefile(workflow)
    config_json = tumor_only_config

    # WHEN invoking snakemake module with dryrun option
    # THEN it should return true
    assert snakemake.snakemake(snakefile,
                               configfiles=[config_json],
                               dryrun=True)
Esempio n. 2
0
def status(context, sample_config, show_only_missing, print_files):
    """
    cli for status sub-command.
    """
    LOG.info(f"BALSAMIC started with log level {context.obj['loglevel']}.")
    LOG.debug("Reading input sample config")
    with open(sample_config, "r") as fn:
        sample_config_dict = json.load(fn)

    result_dir = get_result_dir(sample_config_dict)
    analysis_type = sample_config_dict["analysis"]["analysis_type"]
    sequencing_type = sample_config_dict["analysis"]["sequencing_type"]
    snakefile = get_snakefile(analysis_type, sequencing_type)

    with CaptureStdout() as summary:
        snakemake.snakemake(
            snakefile=snakefile,
            dryrun=True,
            summary=True,
            configfiles=[sample_config],
            quiet=True,
        )
    summary = [i.split("\t") for i in summary]
    summary_dict = [dict(zip(summary[0], value)) for value in summary[1:]]

    if not os.path.isfile(os.path.join(result_dir, "analysis_finish")):
        LOG.warning(
            "analysis_finish file is missing. Analysis might be incomplete or running."
        )

    existing_files = set()
    missing_files = set()

    for entries in summary_dict:
        delivery_file = entries["output_file"]

        file_status_str, file_status = get_file_status_string(delivery_file)
        if file_status and print_files:
            click.echo(file_status_str)

        if not file_status and (show_only_missing or print_files):
            click.echo(file_status_str)

        if file_status:
            existing_files.add(delivery_file)
        if not file_status:
            missing_files.add(delivery_file)

    finish_file_count = 'Finished file count: {}'.format(len(existing_files))
    missing_file_count = 'Missing file count: {}'.format(len(missing_files))
    click.echo(Color('{yellow}Final tally:{/yellow}'))
    click.echo(Color('{yellow}\t' + finish_file_count + '{/yellow}'))
    click.echo(Color('{yellow}\t' + missing_file_count + '{/yellow}'))
Esempio n. 3
0
def test_workflow_sentieon(tumor_normal_wgs_config, tumor_only_wgs_config):
    # GIVEN a sample config dict and snakefile
    workflows = [('single', tumor_only_wgs_config),
                 ('paired', tumor_normal_wgs_config)]
    sequencing_type = "wgs"

    # WHEN invoking snakemake module with dryrun option
    # THEN it should return true
    for workflow in workflows:
        analysis_type = workflow[0]
        config = workflow[1]
        snakefile = get_snakefile(analysis_type, sequencing_type)
        assert snakemake.snakemake(snakefile,
                                   configfiles=[config],
                                   dryrun=True)
Esempio n. 4
0
def deliver(context, sample_config, analysis_type, rules_to_deliver,
            delivery_mode):
    """
    cli for deliver sub-command.
    Writes <case_id>.hk in result_directory.
    """
    LOG.info(f"BALSAMIC started with log level {context.obj['loglevel']}.")
    LOG.debug("Reading input sample config")
    with open(sample_config, "r") as fn:
        sample_config_dict = json.load(fn)

    default_rules_to_deliver = [
        "fastp", "multiqc", "vep_somatic", "vep_germline", "vep_stat",
        "ngs_filter_vardict", "mergeBam_tumor", "mergeBam_normal",
        "cnvkit_paired", "cnvkit_single", "sentieon_dedup"
    ]

    if not rules_to_deliver:
        rules_to_deliver = default_rules_to_deliver

    rules_to_deliver = list(rules_to_deliver)
    if delivery_mode == 'a':
        rules_to_deliver.extend(default_rules_to_deliver)

    case_name = sample_config_dict['analysis']['case_id']
    result_dir = get_result_dir(sample_config_dict)
    dst_directory = os.path.join(result_dir, "delivery_report")
    LOG.info("Creatiing delivery_report directory")
    os.makedirs(dst_directory, exist_ok=True)

    yaml_write_directory = os.path.join(result_dir, "delivery_report")
    os.makedirs(yaml_write_directory, exist_ok=True)

    analysis_type = analysis_type if analysis_type else sample_config_dict[
        'analysis']['analysis_type']
    sequencing_type = sample_config_dict["analysis"]["sequencing_type"]
    snakefile = get_snakefile(analysis_type, sequencing_type)

    report_file_name = os.path.join(
        yaml_write_directory,
        sample_config_dict["analysis"]["case_id"] + "_report.html")
    LOG.info("Creating report file {}".format(report_file_name))

    # write report.html file
    report = SnakeMake()
    report.case_name = case_name
    report.working_dir = os.path.join(sample_config_dict['analysis']['analysis_dir'] , \
        sample_config_dict['analysis']['case_id'], 'BALSAMIC_run')
    report.report = report_file_name
    report.configfile = sample_config
    report.snakefile = snakefile
    report.run_mode = 'local'
    report.use_singularity = False
    report.run_analysis = True
    report.sm_opt = ["--quiet"]
    cmd = sys.executable + " -m  " + report.build_cmd()
    subprocess.check_output(cmd.split(), shell=False)
    LOG.info(f"Workflow report file {report_file_name}")

    snakemake.snakemake(
        snakefile=snakefile,
        config={
            "delivery": "True",
            "rules_to_deliver": ",".join(rules_to_deliver)
        },
        dryrun=True,
        configfiles=[sample_config],
        quiet=True,
    )

    delivery_file_name = os.path.join(yaml_write_directory, case_name + ".hk")

    delivery_file_ready = os.path.join(
        yaml_write_directory,
        case_name + "_delivery_ready.hk",
    )
    with open(delivery_file_ready, "r") as fn:
        delivery_file_ready_dict = json.load(fn)

    delivery_json = dict()
    delivery_json["files"] = delivery_file_ready_dict

    # Add Housekeeper file to report
    delivery_json["files"].append({
        "path":
        report_file_name,
        "step":
        "balsamic_delivery",
        "format":
        get_file_extension(report_file_name),
        "tag":
        "balsamic-report",
        "id":
        case_name,
    })
    # Add CASE_ID.JSON to report
    delivery_json["files"].append({
        "path":
        Path(sample_config).resolve().as_posix(),
        "step":
        "case_config",
        "format":
        get_file_extension(sample_config),
        "tag":
        "balsamic-config",
        "id":
        case_name,
    })
    # Add DAG Graph to report
    delivery_json["files"].append({
        "path":
        sample_config_dict["analysis"]["dag"],
        "step":
        "case_config",
        "format":
        get_file_extension(sample_config_dict["analysis"]["dag"]),
        "tag":
        "balsamic-dag",
        "id":
        case_name,
    })

    write_json(delivery_json, delivery_file_name)
    with open(delivery_file_name + ".yaml", "w") as fn:
        yaml.dump(delivery_json, fn, default_flow_style=False)

    LOG.info(f"Housekeeper delivery file {delivery_file_name}")
Esempio n. 5
0
LOG = logging.getLogger(__name__)


@click.command("reference",
               short_help="config workflow for generate reference")
@click.option("-o",
              "--outdir",
              required=True,
              help="output directory for ref files eg: reference")
@click.option("-c",
              "--cosmic-key",
              required=True,
              help="cosmic db authentication key")
@click.option("-s",
              "--snakefile",
              default=get_snakefile('generate_ref'),
              type=click.Path(),
              show_default=True,
              help="snakefile for reference generation")
@click.option("-d",
              "--dagfile",
              default="generate_ref_worflow_graph",
              show_default=True,
              help="DAG file for overview")
@click.option("--singularity",
              type=click.Path(),
              required=True,
              help='Download singularity image for BALSAMIC')
@click.option("-g",
              "--genome-version",
              default="hg19",
Esempio n. 6
0
def analysis(context, snake_file, sample_config, run_mode, cluster_config,
             run_analysis, force_all, snakemake_opt, mail_type, mail_user,
             account, analysis_type, qos, profile, disable_variant_caller):
    """
    Runs BALSAMIC workflow on the provided sample's config file
    """
    LOG.info(f"BALSAMIC started with log level {context.obj['loglevel']}.")

    if run_mode == 'cluster' and not run_analysis:
        LOG.info('Changing run-mode to local on dry-run')
        run_mode = 'local'

    if run_mode == 'cluster' and not account:
        LOG.info(
            'slurm-account, qsub-account, or account is required for slurm run mode'
        )
        raise click.Abort()

    sample_config_path = os.path.abspath(sample_config)

    with open(sample_config, 'r') as sample_fh:
        sample_config = json.load(sample_fh)

    logpath = sample_config['analysis']['log']
    scriptpath = sample_config['analysis']['script']
    resultpath = sample_config['analysis']['result']
    benchmarkpath = sample_config['analysis']['benchmark']
    case_name = sample_config['analysis']['case_id']
    sequencing_type = sample_config['analysis']['sequencing_type']

    if run_analysis:
        # if not dry run, then create (new) log/script directory
        for dirpath, dirnames, files in os.walk(logpath):
            if files:
                logpath = createDir(logpath, [])
                scriptpath = createDir(scriptpath, [])
                sample_config['analysis']['benchmark'] = createDir(
                    benchmarkpath, [])

    # Create result directory
    os.makedirs(resultpath, exist_ok=True)

    if not os.path.exists(logpath):
        os.makedirs(logpath, exist_ok=True)
        os.makedirs(scriptpath, exist_ok=True)
        os.makedirs(benchmarkpath, exist_ok=True)

    if not analysis_type:
        analysis_type = sample_config['analysis']['analysis_type']

    # Singularity bind path
    bind_path = list()
    bind_path.append(os.path.commonpath(sample_config['reference'].values()))
    if 'panel' in sample_config:
        bind_path.append(sample_config.get('panel').get('capture_kit'))
    bind_path.append(sample_config['analysis']['analysis_dir'])
    bind_path.extend(
        get_fastq_bind_path(sample_config["analysis"]["fastq_path"]))

    # Construct snakemake command to run workflow
    balsamic_run = SnakeMake()
    balsamic_run.case_name = case_name
    balsamic_run.working_dir = Path(sample_config['analysis']['analysis_dir'],
                                    case_name, 'BALSAMIC_run').as_posix() + "/"
    balsamic_run.snakefile = snake_file if snake_file else get_snakefile(
        analysis_type, sequencing_type)
    balsamic_run.configfile = sample_config_path
    balsamic_run.run_mode = run_mode
    balsamic_run.cluster_config = cluster_config
    balsamic_run.scheduler = get_schedulerpy()
    balsamic_run.profile = profile
    balsamic_run.log_path = logpath
    balsamic_run.script_path = scriptpath
    balsamic_run.result_path = resultpath
    balsamic_run.qos = qos
    balsamic_run.account = account
    if mail_type:
        balsamic_run.mail_type = mail_type
    balsamic_run.mail_user = mail_user
    balsamic_run.forceall = force_all
    balsamic_run.run_analysis = run_analysis
    # Always use singularity
    balsamic_run.use_singularity = True
    balsamic_run.singularity_bind = bind_path
    balsamic_run.sm_opt = snakemake_opt

    if disable_variant_caller:
        balsamic_run.disable_variant_caller = disable_variant_caller

    try:
        cmd = sys.executable + " -m  " + balsamic_run.build_cmd()
        subprocess.run(cmd, shell=True)
    except Exception as e:
        print(e)
        raise click.Abort()

    if run_analysis:
        jobid_file = os.path.join(
            logpath, sample_config["analysis"]["case_id"] + ".sacct")
        jobid_dump = os.path.join(resultpath, profile + "_jobids.yaml")
        with open(jobid_file, "r") as jobid_in, open(jobid_dump,
                                                     "w") as jobid_out:
            jobid_list = jobid_in.read().splitlines()
            yaml.dump({sample_config['analysis']['case_id']: jobid_list},
                      jobid_out)