Beispiel #1
0
def realign_bam_workflow(args):
    pyp = pypeliner.app.Pypeline(config=args)
    workflow = pypeliner.workflow.Workflow(ctx=helpers.get_default_ctx(docker_image=config.containers('wgs')))

    outdir = args['out_dir']
    meta_yaml = os.path.join(outdir, 'metadata.yaml')
    input_yaml_blob = os.path.join(outdir, 'input.yaml')

    yamldata = yaml.safe_load(open(args['input_yaml']))

    samples = list(yamldata.keys())

    input_bams = {sample: yamldata[sample]['input'] for sample in samples}

    output_bams = os.path.join(outdir, '{sample_id}', '{sample_id}.bam')
    metrics = os.path.join(outdir, '{sample_id}', '{sample_id}.txt')
    metrics_tar = os.path.join(outdir, '{sample_id}', '{sample_id}.tar')

    workflow.setobj(
        obj=mgd.OutputChunks('sample_id'),
        value=samples)

    workflow.subworkflow(
        name="realign",
        func=realign_bams,
        ctx=helpers.get_default_ctx(),
        args=(
            samples,
            mgd.InputFile("input.bam", 'sample_id', fnames=input_bams,
                          extensions=['.bai'], axes_origin=[]),
            mgd.OutputFile("realigned.bam", 'sample_id', template=output_bams,
                           extensions=['.bai', '.tdf'], axes_origin=[]),
            mgd.OutputFile("realigned.txt", 'sample_id', template=metrics,
                           extensions=['.bai'], axes_origin=[]),
            mgd.OutputFile("realigned.tar", 'sample_id', template=metrics_tar,
                           extensions=['.bai'], axes_origin=[]),
            args['refdir'],
        ),
        kwargs={'single_node': args['single_node']}
    )

    outputted_filenames = helpers.expand_list([output_bams, metrics, metrics_tar], samples, 'sample_id')

    workflow.transform(
        name='generate_meta_files_results',
        func='wgs.utils.helpers.generate_and_upload_metadata',
        args=(
            sys.argv[0:],
            args["out_dir"],
            outputted_filenames,
            mgd.OutputFile(meta_yaml)
        ),
        kwargs={
            'input_yaml_data': helpers.load_yaml(args['input_yaml']),
            'input_yaml': mgd.OutputFile(input_yaml_blob),
            'metadata': {'type': 'realignment'}
        }
    )

    pyp.run(workflow)
Beispiel #2
0
def alignment_workflow(args):
    pyp = pypeliner.app.Pypeline(config=args)
    workflow = pypeliner.workflow.Workflow()

    config = helpers.load_yaml(args['config_file'])
    inputs = helpers.load_yaml(args['input_yaml'])

    fastqs_r1 = helpers.get_values_from_input(inputs, 'fastq1')
    fastqs_r2 = helpers.get_values_from_input(inputs, 'fastq2')
    outputs = helpers.get_values_from_input(inputs, 'bam')

    outdir = args['out_dir']

    workflow.subworkflow(name="align_samples",
                         func=alignment.align_samples,
                         args=(config, fastqs_r1, fastqs_r2, outputs, outdir))

    pyp.run(workflow)
Beispiel #3
0
def cna_calling_workflow(args):
    pyp = pypeliner.app.Pypeline(config=args)
    workflow = pypeliner.workflow.Workflow()

    config = helpers.load_yaml(args['config_file'])
    inputs = helpers.load_yaml(args['input_yaml'])

    tumours = helpers.get_values_from_input(inputs, 'tumour')
    normals = helpers.get_values_from_input(inputs, 'normal')
    targets = helpers.get_values_from_input(inputs, 'target_list')
    breakpoints = helpers.get_values_from_input(inputs, 'breakpoints')
    samples = tumours.keys()

    cna_outdir = os.path.join(args['out_dir'], 'copynumber', '{sample_id}')
    remixt_results_filename = os.path.join(cna_outdir, 'remixt', 'results.h5')
    remixt_raw_dir = os.path.join(cna_outdir, 'remixt', 'raw_data')

    titan_raw_dir = os.path.join(cna_outdir, 'titan')
    titan_segments_filename = os.path.join(titan_raw_dir, 'segments.h5')
    titan_markers_filename = os.path.join(titan_raw_dir, 'markers.h5')
    titan_params_filename = os.path.join(titan_raw_dir, 'params.h5')

    workflow.setobj(
        obj=mgd.OutputChunks('sample_id'),
        value=samples)

    workflow.subworkflow(
        name='titan',
        func=titan.create_titan_workflow,
        axes=('sample_id',),
        args=(
            mgd.InputFile("tumour.bam", 'sample_id', fnames=tumours,
                          extensions=['.bai'], axes_origin=[]),
            mgd.InputFile("normal.bam", 'sample_id', fnames=normals,
                          extensions=['.bai'], axes_origin=[]),
            mgd.InputFile("target_list", 'sample_id', fnames=targets,
                          axes_origin=[]),
            mgd.Template(titan_raw_dir, 'sample_id'),
            mgd.OutputFile('titan_segments_filename', 'sample_id',
                           axes_origin=[], template=titan_segments_filename),
            mgd.OutputFile('titan_params_filename', 'sample_id',
                           axes_origin=[], template=titan_params_filename),
            mgd.OutputFile('titan_markers_filename', 'sample_id',
                           axes_origin=[], template=titan_markers_filename),
            config['globals'],
            config['cna_calling'],
            config['cna_calling']['titan_intervals'],
            mgd.InputInstance('sample_id'),
        ),
    )

    workflow.subworkflow(
        name='remixt',
        func=remixt.create_remixt_workflow,
        axes=('sample_id',),
        args=(
            mgd.InputFile('tumour_bam', 'sample_id',
                          fnames=tumours, extensions=['.bai']),
            mgd.InputFile('normal_bam', 'sample_id',
                          fnames=normals, extensions=['.bai']),
            mgd.InputFile('destruct_breakpoints', 'sample_id',
                          axes_origin=[], fnames=breakpoints),
            mgd.InputInstance('sample_id'),
            config['cna_calling']['remixt_refdata'],
            mgd.OutputFile('remixt_results_filename', 'sample_id',
                           axes_origin=[], template=remixt_results_filename),
            mgd.Template(remixt_raw_dir, 'sample_id'),
            config['cna_calling']['min_num_reads']
        ),
    )

    pyp.run(workflow)
Beispiel #4
0
def sv_calling_workflow(args):
    pyp = pypeliner.app.Pypeline(config=args)
    workflow = pypeliner.workflow.Workflow()

    config = helpers.load_yaml(args['config_file'])
    inputs = helpers.load_yaml(args['input_yaml'])

    tumours = helpers.get_values_from_input(inputs, 'tumour')
    normals = helpers.get_values_from_input(inputs, 'normal')
    samples = tumours.keys()

    sv_outdir = os.path.join(args['out_dir'], 'breakpoints', '{sample_id}')
    destruct_breakpoints = os.path.join(sv_outdir, 'destruct_breakpoints.csv')
    destruct_library = os.path.join(sv_outdir, 'destruct_library.csv')
    destruct_raw_breakpoints = os.path.join(sv_outdir,
                                            'destruct_raw_breakpoints.csv')
    destruct_raw_library = os.path.join(sv_outdir, 'destruct_raw_library.csv')
    destruct_reads = os.path.join(sv_outdir, 'destruct_reads.csv')
    lumpy_vcf = os.path.join(sv_outdir, 'lumpy.vcf')
    parsed_csv = os.path.join(sv_outdir, 'filtered_consensus_calls.csv')

    workflow.setobj(obj=mgd.OutputChunks('sample_id'), value=samples)

    workflow.subworkflow(name="call_breakpoints",
                         func=call_breakpoints,
                         args=(samples, config,
                               mgd.InputFile("tumour.bam",
                                             'sample_id',
                                             fnames=tumours,
                                             extensions=['.bai'],
                                             axes_origin=[]),
                               mgd.InputFile("normal.bam",
                                             'sample_id',
                                             fnames=normals,
                                             extensions=['.bai'],
                                             axes_origin=[]),
                               mgd.OutputFile(
                                   'destruct_raw_breakpoints',
                                   'sample_id',
                                   template=destruct_raw_breakpoints,
                                   axes_origin=[]),
                               mgd.OutputFile('destruct_raw_library',
                                              'sample_id',
                                              template=destruct_raw_library,
                                              axes_origin=[]),
                               mgd.OutputFile('destruct_breakpoints',
                                              'sample_id',
                                              template=destruct_breakpoints,
                                              axes_origin=[]),
                               mgd.OutputFile('destruct_library',
                                              'sample_id',
                                              template=destruct_library,
                                              axes_origin=[]),
                               mgd.OutputFile('destruct_reads',
                                              'sample_id',
                                              template=destruct_reads,
                                              axes_origin=[]),
                               mgd.OutputFile('lumpy_vcf',
                                              'sample_id',
                                              template=lumpy_vcf,
                                              axes_origin=[]),
                               mgd.OutputFile('parsed_csv',
                                              'sample_id',
                                              template=parsed_csv,
                                              axes_origin=[])))

    pyp.run(workflow)
Beispiel #5
0
def wgs_workflow(args):
    pyp = pypeliner.app.Pypeline(config=args)
    workflow = pypeliner.workflow.Workflow()

    config = helpers.load_yaml(args['config_file'])
    inputs = helpers.load_yaml(args['input_yaml'])

    tumours = helpers.get_values_from_input(inputs, 'tumour')
    normals = helpers.get_values_from_input(inputs, 'normal')
    targets = helpers.get_values_from_input(inputs, 'target_list')
    samples = tumours.keys()

    workflow.setobj(
        obj=mgd.OutputChunks('sample_id'),
        value=samples,
    )

    if args['alignment']:
        tumour_fastqs_r1, tumour_fastqs_r2 = get_fastqs(inputs, samples, 'tumour')
        normal_fastqs_r1, normal_fastqs_r2 = get_fastqs(inputs, samples, 'normal')

        normal_alignment_template = os.path.join(
            args['out_dir'], 'alignment', '{norm_sample_id}', '{norm_lane}', 'normal'
        )
        tumour_alignment_template = os.path.join(
            args['out_dir'], 'alignment', '{tum_sample_id}', '{tum_lane}', 'tumour'
        )

        workflow.subworkflow(
            name='wgs_alignment_paired_lanes',
            func=paired_alignment,
            args=(
                config,
                mgd.OutputFile("tumour.bam", 'sample_id', fnames=tumours,
                               extensions=['.bai'], axes_origin=[]),
                mgd.OutputFile("normal.bam", 'sample_id', fnames=normals,
                               extensions=['.bai'], axes_origin=[]),
                samples,
                tumour_fastqs_r1,
                tumour_fastqs_r2,
                normal_fastqs_r1,
                normal_fastqs_r2,
                normal_alignment_template,
                tumour_alignment_template,
            )
        )

    museq_dir = os.path.join(args['out_dir'], 'variants')
    museq_vcf = os.path.join(museq_dir, '{sample_id}', 'museq_paired_annotated.vcf.gz')
    museq_ss_vcf = os.path.join(museq_dir, '{sample_id}', 'museq_single_annotated.vcf.gz')
    strelka_snv_vcf = os.path.join(museq_dir, '{sample_id}', 'strelka_snv_annotated.vcf.gz')
    strelka_indel_vcf = os.path.join(museq_dir, '{sample_id}', 'strelka_indel_annotated.vcf.gz')
    parsed_snv_csv = os.path.join(museq_dir, '{sample_id}', 'allcalls.csv')
    museq_paired_pdf = os.path.join(museq_dir, '{sample_id}', 'paired_museqportrait.pdf')
    museq_single_pdf = os.path.join(museq_dir, '{sample_id}', 'single_museqportrait.pdf')
    workflow.subworkflow(
        name='variant_calling',
        func=call_variants,
        args=(
            samples,
            config,
            mgd.OutputFile('parsed_snv_csv', 'sample_id', template=parsed_snv_csv, axes_origin=[]),
            mgd.InputFile("tumour.bam", 'sample_id', fnames=tumours,
                          extensions=['.bai'], axes_origin=[]),
            mgd.InputFile("normal.bam", 'sample_id', fnames=normals,
                          extensions=['.bai'], axes_origin=[]),
            mgd.OutputFile('museq', 'sample_id', template=museq_vcf, axes_origin=[]),
            mgd.OutputFile('museq_ss', 'sample_id', template=museq_ss_vcf, axes_origin=[]),
            mgd.OutputFile('strelka_snv', 'sample_id', template=strelka_snv_vcf, axes_origin=[]),
            mgd.OutputFile('strelka_indel', 'sample_id', template=strelka_indel_vcf, axes_origin=[]),
            mgd.OutputFile('museq_paired_pdf', 'sample_id', template=museq_paired_pdf, axes_origin=[]),
            mgd.OutputFile('museq_single_pdf', 'sample_id', template=museq_single_pdf, axes_origin=[]),
        )
    )

    sv_outdir = os.path.join(args['out_dir'], 'breakpoints', '{sample_id}')
    destruct_breakpoints = os.path.join(sv_outdir, 'destruct_breakpoints.csv')
    destruct_library = os.path.join(sv_outdir, 'destruct_library.csv')
    destruct_raw_breakpoints = os.path.join(sv_outdir, 'destruct_raw_breakpoints.csv')
    destruct_raw_library = os.path.join(sv_outdir, 'destruct_raw_library.csv')
    destruct_reads = os.path.join(sv_outdir, 'destruct_reads.csv')
    lumpy_vcf = os.path.join(sv_outdir, 'lumpy.vcf')
    parsed_csv = os.path.join(sv_outdir, 'filtered_consensus_calls.csv')
    workflow.subworkflow(
        name="call_breakpoints",
        func=call_breakpoints,
        args=(
            samples,
            config,
            mgd.InputFile("tumour.bam", 'sample_id', fnames=tumours,
                          extensions=['.bai'], axes_origin=[]),
            mgd.InputFile("normal.bam", 'sample_id', fnames=normals,
                          extensions=['.bai'], axes_origin=[]),
            mgd.OutputFile('destruct_raw_breakpoints', 'sample_id', template=destruct_raw_breakpoints, axes_origin=[]),
            mgd.OutputFile('destruct_raw_library', 'sample_id', template=destruct_raw_library, axes_origin=[]),
            mgd.OutputFile('destruct_breakpoints', 'sample_id', template=destruct_breakpoints, axes_origin=[]),
            mgd.OutputFile('destruct_library', 'sample_id', template=destruct_library, axes_origin=[]),
            mgd.OutputFile('destruct_reads', 'sample_id', template=destruct_reads, axes_origin=[]),
            mgd.OutputFile('lumpy_vcf', 'sample_id', template=lumpy_vcf, axes_origin=[]),
            mgd.OutputFile('parsed_csv', 'sample_id', template=parsed_csv, axes_origin=[])
        )
    )

    cna_outdir = os.path.join(args['out_dir'], 'copynumber', '{sample_id}')
    remixt_raw_dir = os.path.join(cna_outdir, 'remixt', 'raw_data')
    titan_raw_dir = os.path.join(cna_outdir, 'titan')
    remixt_results_filename = os.path.join(cna_outdir, 'remixt', 'results.h5')
    titan_segments_filename = os.path.join(titan_raw_dir, 'segments.h5')
    titan_markers_filename = os.path.join(titan_raw_dir, 'markers.h5')
    titan_params_filename = os.path.join(titan_raw_dir, 'params.h5')
    workflow.subworkflow(
        name='titan',
        func=titan.create_titan_workflow,
        axes=('sample_id',),
        args=(
            mgd.InputFile('tumour.bam', 'sample_id', fnames=tumours, extensions=['.bai']),
            mgd.InputFile('normal.bam', 'sample_id', fnames=normals, extensions=['.bai']),
            mgd.InputFile("target_list", 'sample_id', fnames=targets, axes_origin=[]),
            mgd.Template(titan_raw_dir, 'sample_id'),
            mgd.OutputFile('titan_segments_filename', 'sample_id', axes_origin=[], template=titan_segments_filename),
            mgd.OutputFile('titan_params_filename', 'sample_id', axes_origin=[], template=titan_params_filename),
            mgd.OutputFile('titan_markers_filename', 'sample_id', axes_origin=[], template=titan_markers_filename),
            config['globals'],
            config['cna_calling'],
            config['cna_calling']['titan_intervals'],
            mgd.InputInstance('sample_id'),
        ),
    )
    workflow.subworkflow(
        name='remixt',
        func=remixt.create_remixt_workflow,
        axes=('sample_id',),
        args=(
            mgd.InputFile('tumour.bam', 'sample_id', fnames=tumours, extensions=['.bai']),
            mgd.InputFile('normal.bam', 'sample_id', fnames=normals, extensions=['.bai']),
            mgd.InputFile('destruct_breakpoints', 'sample_id', axes_origin=[], template=destruct_breakpoints),
            mgd.InputInstance('sample_id'),
            config['cna_calling']['remixt_refdata'],
            mgd.OutputFile('remixt_results_filename', 'sample_id', axes_origin=[], template=remixt_results_filename),
            mgd.Template(remixt_raw_dir, 'sample_id'),
            config['cna_calling']['min_num_reads']
        ),
    )

    pyp.run(workflow)
Beispiel #6
0
def somatic_calling_workflow(args):
    inputs = helpers.load_yaml(args['input_yaml'])

    meta_yaml = os.path.join(args['out_dir'], 'metadata.yaml')
    input_yaml_blob = os.path.join(args['out_dir'], 'input.yaml')

    tumours = helpers.get_values_from_input(inputs, 'tumour')
    normals = helpers.get_values_from_input(inputs, 'normal')
    samples = list(tumours.keys())

    tumour_ids = helpers.get_values_from_input(inputs, 'tumour_id')
    normal_ids = helpers.get_values_from_input(inputs, 'normal_id')

    var_dir = os.path.join(args['out_dir'], 'somatic')
    museq_vcf = os.path.join(var_dir, '{sample_id}',
                             '{sample_id}_museq_paired_annotated.vcf.gz')
    museq_maf = os.path.join(var_dir, '{sample_id}',
                             '{sample_id}_museq_paired_annotated.maf')
    museq_paired_pdf = os.path.join(var_dir, '{sample_id}',
                                    '{sample_id}_paired_museqportrait.pdf')

    strelka_snv_vcf = os.path.join(var_dir, '{sample_id}',
                                   '{sample_id}_strelka_snv_annotated.vcf.gz')
    strelka_snv_maf = os.path.join(var_dir, '{sample_id}',
                                   '{sample_id}_strelka_snv_annotated.maf')
    strelka_indel_vcf = os.path.join(
        var_dir, '{sample_id}', '{sample_id}_strelka_indel_annotated.vcf.gz')
    strelka_indel_maf = os.path.join(
        var_dir, '{sample_id}', '{sample_id}_strelka_indel_annotated.maf')

    mutect_vcf = os.path.join(var_dir, '{sample_id}',
                              '{sample_id}_mutect.vcf.gz')
    mutect_maf = os.path.join(var_dir, '{sample_id}', '{sample_id}_mutect.maf')

    consensus_somatic_maf = os.path.join(var_dir, '{sample_id}',
                                         '{sample_id}_consensus_somatic.maf')

    pyp = pypeliner.app.Pypeline(config=args)

    workflow = pypeliner.workflow.Workflow()

    workflow.setobj(
        obj=mgd.OutputChunks('sample_id'),
        value=samples,
    )

    workflow.subworkflow(name='variant_calling',
                         func=somatic_calling.create_somatic_calling_workflow,
                         args=(
                             samples,
                             mgd.InputFile("tumour.bam",
                                           'sample_id',
                                           fnames=tumours,
                                           extensions=['.bai'],
                                           axes_origin=[]),
                             mgd.InputFile("normal.bam",
                                           'sample_id',
                                           fnames=normals,
                                           extensions=['.bai'],
                                           axes_origin=[]),
                             mgd.OutputFile('museq_vcf',
                                            'sample_id',
                                            template=museq_vcf,
                                            axes_origin=[]),
                             mgd.OutputFile('museq_maf',
                                            'sample_id',
                                            template=museq_maf,
                                            axes_origin=[]),
                             mgd.OutputFile('museq_paired_pdf',
                                            'sample_id',
                                            template=museq_paired_pdf,
                                            axes_origin=[]),
                             mgd.OutputFile('strelka_snv_vcf',
                                            'sample_id',
                                            template=strelka_snv_vcf,
                                            axes_origin=[]),
                             mgd.OutputFile('strelka_snv_maf',
                                            'sample_id',
                                            template=strelka_snv_maf,
                                            axes_origin=[]),
                             mgd.OutputFile('strelka_indel_vcf',
                                            'sample_id',
                                            template=strelka_indel_vcf,
                                            axes_origin=[]),
                             mgd.OutputFile('strelka_indel_maf',
                                            'sample_id',
                                            template=strelka_indel_maf,
                                            axes_origin=[]),
                             mgd.OutputFile('mutect_vcf',
                                            'sample_id',
                                            template=mutect_vcf,
                                            axes_origin=[]),
                             mgd.OutputFile('mutect_maf',
                                            'sample_id',
                                            template=mutect_maf,
                                            axes_origin=[]),
                             mgd.OutputFile('consensus_somatic_maf',
                                            'sample_id',
                                            template=consensus_somatic_maf,
                                            axes_origin=[]),
                             args['refdir'],
                             normal_ids,
                             tumour_ids,
                         ),
                         kwargs={
                             'single_node': args['single_node'],
                             'is_exome': args['is_exome'],
                         })

    filenames = [
        museq_vcf, museq_maf, museq_paired_pdf, strelka_snv_vcf,
        strelka_snv_maf, strelka_indel_vcf, strelka_indel_maf, mutect_vcf,
        mutect_maf, consensus_somatic_maf
    ]

    outputted_filenames = helpers.expand_list(filenames, samples, "sample_id")

    workflow.transform(name='generate_meta_files_results',
                       func='wgs.utils.helpers.generate_and_upload_metadata',
                       args=(sys.argv[0:], args['out_dir'],
                             outputted_filenames, mgd.OutputFile(meta_yaml)),
                       kwargs={
                           'input_yaml_data':
                           helpers.load_yaml(args['input_yaml']),
                           'input_yaml': mgd.OutputFile(input_yaml_blob),
                           'metadata': {
                               'type': 'variant_calling'
                           }
                       })

    pyp.run(workflow)
    return new


def delete_keys_from_dict(dict_del, lst_keys):
    """
    Delete the keys present in lst_keys from the dictionary.
    Loops recursively over nested dictionaries.
    """
    dict_foo = dict_del.copy()  #Used as iterator to avoid the 'DictionaryHasChanged' error
    for field in dict_foo.keys():
        if field in lst_keys:
            del dict_del[field]
        if type(dict_foo[field]) == dict:
            delete_keys_from_dict(dict_del[field], lst_keys)
    return dict_del

yam = helpers.load_yaml("pseudobulkqc.yaml")


new = addlumpy(yam)
new = delete_keys_from_dict(new, ["breakpoint_annotation", "breakpoint_counts"])


print(len(new.keys()))
for k, v in new.items():
    print(k, "\n", v, "\n\n\n\n\n")
    out = {k: v}
    print(out, "\n\n\n\n\n")
    with open('yamls/pseudobulkqc_{}.yaml'.format(k), 'w') as outfile:
        yaml.dump(out, outfile, default_flow_style=False)
Beispiel #8
0
def cohort_qc_workflow(args):
    pypeline = pypeliner.app.Pypeline(config=args)

    workflow = pypeliner.workflow.Workflow()

    inputs = helpers.load_qc_input_yaml_flat(args['input_yaml'])
    out_dir = args["out_dir"]
    api_key = args["API_key"]
    metadata = helpers.load_yaml(os.path.join(args["refdir"], "metadata.yaml"))
    gtf = os.path.join(args["refdir"], metadata["paths"]["gtf"])

    germline_mafs = {
        label: data["germline_maf"]
        for label, data in inputs.items()
    }
    somatic_mafs = {
        label: data["somatic_maf"]
        for label, data in inputs.items()
    }
    remixt_data = {label: data["remixt"] for label, data in inputs.items()}

    report_path = {
        label[0]: os.path.join(out_dir, label[0], "report.html")
        for label, data in inputs.items()
    }
    cna_table = {
        label[0]: os.path.join(out_dir, label[0], "cna_table.tsv")
        for label, data in inputs.items()
    }

    segmental_copynumber = {
        label[0]: os.path.join(out_dir, label[0], "segmental_copynumber.tsv")
        for label, data in inputs.items()
    }

    cohort_maf_oncogenic_filtered = {
        label[0]: os.path.join(out_dir, label[0],
                               "cohort_oncogenic_filtered.maf")
        for label, data in inputs.items()
    }

    workflow.setobj(
        obj=mgd.OutputChunks('cohort_label', 'sample_label'),
        value=list(inputs.keys()),
    )

    workflow.subworkflow(
        name="classifycopynumber",
        func="wgs.workflows.cohort_qc.cna_annotation_workflow",
        axes=("cohort_label", ),
        args=(
            mgd.InputFile('remixt_dict',
                          'cohort_label',
                          'sample_label',
                          fnames=remixt_data,
                          axes_origin=[]),
            mgd.TempOutputFile('cna_maftools_table', 'cohort_label'),
            mgd.OutputFile('segmental_copynumber',
                           'cohort_label',
                           fnames=segmental_copynumber),
            mgd.OutputFile('cna_table_cbio', 'cohort_label', fnames=cna_table),
            gtf,
        ),
    )

    workflow.subworkflow(
        name="maf_annotation_workflow",
        func="wgs.workflows.cohort_qc.preprocess_mafs_workflow",
        axes=("cohort_label", ),
        args=(mgd.InputFile('germline_mafs_dict',
                            'cohort_label',
                            'sample_label',
                            fnames=germline_mafs,
                            axes_origin=[]),
              mgd.InputFile('somatic_mafs_dict',
                            'cohort_label',
                            'sample_label',
                            fnames=somatic_mafs,
                            axes_origin=[]),
              mgd.OutputFile('cohort_maf_oncogenic_filtered',
                             'cohort_label',
                             fnames=cohort_maf_oncogenic_filtered), api_key),
    )

    workflow.subworkflow(
        name="make_plots_and_report",
        func="wgs.workflows.cohort_qc.create_cohort_qc_report",
        axes=("cohort_label", ),
        args=(mgd.InputInstance("cohort_label", ), out_dir,
              mgd.InputFile('cohort_maf_oncogenic_filtered',
                            'cohort_label',
                            fnames=cohort_maf_oncogenic_filtered),
              mgd.TempInputFile('cna_maftools_table', 'cohort_label'),
              mgd.OutputFile('report_path', 'cohort_label',
                             fnames=report_path)),
    )

    meta_yaml = os.path.join(out_dir, 'metadata.yaml')
    input_yaml_blob = os.path.join(out_dir, 'input.yaml')

    cohort_labels = sorted(set([v[0] for v in inputs.keys()]))

    outputted_filenames = helpers.expand_list([
        segmental_copynumber, cna_table, cohort_maf_oncogenic_filtered,
        report_path
    ], cohort_labels, "cohort_label")

    workflow.transform(name='generate_meta_files_results',
                       func='wgs.utils.helpers.generate_and_upload_metadata',
                       args=(sys.argv[0:], args["out_dir"],
                             outputted_filenames, mgd.OutputFile(meta_yaml)),
                       kwargs={
                           'input_yaml_data':
                           helpers.load_yaml(args['input_yaml']),
                           'input_yaml': mgd.OutputFile(input_yaml_blob),
                           'metadata': {
                               'type': 'sample_qc'
                           }
                       })

    pypeline.run(workflow)
Beispiel #9
0
def single_sample_copynumber_calling_workflow(args):
    pyp = pypeliner.app.Pypeline(config=args)

    inputs = helpers.load_yaml(args['input_yaml'])

    outdir = args['out_dir']
    meta_yaml = os.path.join(outdir, 'metadata.yaml')
    input_yaml_blob = os.path.join(outdir, 'input.yaml')

    bams = helpers.get_values_from_input(inputs, 'bam')
    samples = list(bams.keys())

    cna_outdir = os.path.join(args['out_dir'], 'copynumber', '{sample_id}')

    hmmcopy_raw_dir = os.path.join(cna_outdir, 'hmmcopy')
    bias_pdf = os.path.join(hmmcopy_raw_dir, 'plots', '{sample_id}_bias.pdf')
    correction_pdf = os.path.join(hmmcopy_raw_dir, 'plots',
                                  '{sample_id}_correction.pdf')
    hmmcopy_pdf = os.path.join(hmmcopy_raw_dir, 'plots',
                               '{sample_id}_hmmcopy.pdf')
    correction_table = os.path.join(hmmcopy_raw_dir,
                                    '{sample_id}_correctreads_with_state.txt')
    pygenes = os.path.join(hmmcopy_raw_dir, '{sample_id}_hmmcopy.seg.pygenes')

    refdir_paths = config.refdir_data(args['refdir'])['paths']
    chromosomes = config.refdir_data(args['refdir'])['params']['chromosomes']

    workflow = pypeliner.workflow.Workflow()

    workflow.setobj(obj=mgd.OutputChunks('sample_id'), value=samples)

    workflow.subworkflow(
        name='hmmcopy',
        func=hmmcopy.create_hmmcopy_workflow,
        axes=('sample_id', ),
        args=(mgd.InputFile("sample.bam",
                            'sample_id',
                            fnames=bams,
                            extensions=['.bai'
                                        ]), mgd.InputInstance('sample_id'),
              mgd.OutputFile('bias', 'sample_id', template=bias_pdf),
              mgd.OutputFile('correction',
                             'sample_id',
                             template=correction_pdf),
              mgd.OutputFile('hmmcopy', 'sample_id', template=hmmcopy_pdf),
              mgd.OutputFile('correction_table',
                             'sample_id',
                             template=correction_table),
              mgd.OutputFile('pygenes', 'sample_id', template=pygenes),
              chromosomes, refdir_paths['map_wig'], refdir_paths['gc_wig'],
              refdir_paths['gtf']),
    )

    filenames = [
        bias_pdf,
        correction_pdf,
        hmmcopy_pdf,
        correction_table,
        pygenes,
    ]

    outputted_filenames = helpers.expand_list(filenames, samples, "sample_id")

    workflow.transform(name='generate_meta_files_results',
                       func='wgs.utils.helpers.generate_and_upload_metadata',
                       args=(sys.argv[0:], args["out_dir"],
                             outputted_filenames, mgd.OutputFile(meta_yaml)),
                       kwargs={
                           'input_yaml_data':
                           helpers.load_yaml(args['input_yaml']),
                           'input_yaml': mgd.OutputFile(input_yaml_blob),
                           'metadata': {
                               'type': 'single_sample_copynumber_calling'
                           }
                       })

    pyp.run(workflow)
Beispiel #10
0
def sample_qc_workflow(args):
    inputs = helpers.load_yaml(args['input_yaml'])
    normal_only = args['normal_only']
    samples = list(inputs.keys())

    # inputs
    chromosomes = config.refdir_data(args['refdir'])['params']['chromosomes']
    files = make_inputs(inputs, normal_only=normal_only)

    # outputs
    out_dir = args['out_dir']
    normal_coverage = os.path.join(out_dir, '{sample_id}',
                                   '{sample_id}_normal_coverage.tsv')
    genome_wide_plot = os.path.join(out_dir, '{sample_id}',
                                    '{sample_id}_genome_wide.pdf')

    if not normal_only:
        circos_plot_remixt = os.path.join(out_dir, '{sample_id}',
                                          '{sample_id}_circos_remixt.pdf')
        circos_plot_titan = os.path.join(out_dir, '{sample_id}',
                                         '{sample_id}_circos_titan.pdf')
        tumour_coverage = os.path.join(out_dir, '{sample_id}',
                                       '{sample_id}_tumour_coverage.tsv')

    pyp = pypeliner.app.Pypeline(config=args)
    workflow = pypeliner.workflow.Workflow()

    workflow.setobj(obj=mgd.OutputChunks('sample_id'), value=samples)

    if normal_only:
        workflow.subworkflow(
            name="normal_sample_qc",
            func=sample_qc.create_sample_qc_workflow_normal_only,
            ctx=helpers.get_default_ctx(),
            axes=('sample_id', ),
            args=(mgd.InputInstance('sample_id'), args["refdir"],
                  mgd.InputFile('normal.bam',
                                'sample_id',
                                fnames=files["normal"]),
                  mgd.InputFile('roh', 'sample_id', fnames=files["roh"]),
                  mgd.InputFile('germline_calls',
                                'sample_id',
                                fnames=files["germline"]),
                  mgd.OutputFile('genome_wide_plot.pdf',
                                 'sample_id',
                                 template=genome_wide_plot),
                  mgd.OutputFile('normcov',
                                 'sample_id',
                                 template=normal_coverage), chromosomes,
                  args['bins'], args['mapping_qual_threshold']),
            # kwargs={'single_node': args['single_node']}
        )
        outputted_filenames = helpers.expand_list(
            [normal_coverage, genome_wide_plot], samples, "sample_id")
    else:
        workflow.subworkflow(
            name="sample_qc",
            func=sample_qc.create_sample_qc_workflow,
            ctx=helpers.get_default_ctx(),
            axes=('sample_id', ),
            args=(mgd.InputInstance('sample_id'), args["refdir"],
                  mgd.InputFile('normal.bam',
                                'sample_id',
                                fnames=files["normal"]),
                  mgd.InputFile('tumour.bam',
                                'sample_id',
                                fnames=files["tumor"]),
                  mgd.InputFile('titan', 'sample_id', fnames=files["titan"]),
                  mgd.InputFile('remixt', 'sample_id', fnames=files["remixt"]),
                  mgd.InputFile('breakpoints_consensus',
                                'sample_id',
                                fnames=files["breakpoints"]),
                  mgd.InputFile('roh', 'sample_id', fnames=files["roh"]),
                  mgd.InputFile('germline_calls',
                                'sample_id',
                                fnames=files["germline"]),
                  mgd.InputFile('somatic_calls',
                                'sample_id',
                                fnames=files["somatic"]),
                  mgd.OutputFile('genome_wide_plot.pdf',
                                 'sample_id',
                                 template=genome_wide_plot),
                  mgd.OutputFile('normcov',
                                 'sample_id',
                                 template=normal_coverage),
                  mgd.OutputFile('tumcov',
                                 'sample_id',
                                 template=tumour_coverage), chromosomes,
                  args['bins'], args['mapping_qual_threshold']),
            kwargs={'single_node': args['single_node']})
        workflow.subworkflow(
            name='generate_circos_plot',
            ctx=helpers.get_default_ctx(memory=10, walltime='24:00', disk=400),
            axes=('sample_id', ),
            func=sample_qc.circos_plot,
            args=(
                mgd.InputFile('titan', 'sample_id', fnames=files["titan"]),
                mgd.InputFile('remixt', 'sample_id', fnames=files["remixt"]),
                mgd.InputInstance("sample_id"),
                mgd.InputFile('breakpoints_consensus',
                              'sample_id',
                              fnames=files["breakpoints"]),
                mgd.OutputFile('circos_remixt',
                               'sample_id',
                               template=circos_plot_remixt),
                mgd.OutputFile('circos_titan',
                               'sample_id',
                               template=circos_plot_titan),
            ),
        )
        outputted_filenames = helpers.expand_list([
            circos_plot_remixt, circos_plot_titan, normal_coverage,
            tumour_coverage, genome_wide_plot
        ], samples, "sample_id")

    meta_yaml = os.path.join(out_dir, 'metadata.yaml')
    input_yaml_blob = os.path.join(out_dir, 'input.yaml')

    workflow.transform(name='generate_meta_files_results',
                       func='wgs.utils.helpers.generate_and_upload_metadata',
                       args=(sys.argv[0:], args["out_dir"],
                             outputted_filenames, mgd.OutputFile(meta_yaml)),
                       kwargs={
                           'input_yaml_data':
                           helpers.load_yaml(args['input_yaml']),
                           'input_yaml': mgd.OutputFile(input_yaml_blob),
                           'metadata': {
                               'type': 'sample_qc'
                           }
                       })

    pyp.run(workflow)
Beispiel #11
0
def copynumber_calling_workflow(args):
    pyp = pypeliner.app.Pypeline(config=args)

    run_hmmcopy = args['hmmcopy']
    run_titan = args['titan']
    run_remixt = args['remixt']

    if not run_hmmcopy and not run_titan and not run_remixt:
        run_hmmcopy = True
        run_titan = True
        run_remixt = True

    inputs = helpers.load_yaml(args['input_yaml'])

    outdir = args['out_dir']
    meta_yaml = os.path.join(outdir, 'metadata.yaml')
    input_yaml_blob = os.path.join(outdir, 'input.yaml')

    tumours = helpers.get_values_from_input(inputs, 'tumour')
    normals = helpers.get_values_from_input(inputs, 'normal')
    targets = helpers.get_values_from_input(inputs, 'target_list')
    breakpoints = helpers.get_values_from_input(inputs, 'breakpoints')
    samples = list(tumours.keys())

    cna_outdir = os.path.join(args['out_dir'], 'copynumber', '{sample_id}')

    titan_raw_dir = os.path.join(cna_outdir, 'titan')

    titan_outfile = os.path.join(titan_raw_dir,
                                 '{sample_id}_titan_markers.csv.gz')
    titan_params = os.path.join(titan_raw_dir,
                                '{sample_id}_titan_params.csv.gz')
    titan_segs = os.path.join(titan_raw_dir, '{sample_id}_titan_segs.csv.gz')
    titan_igv_segs = os.path.join(titan_raw_dir,
                                  '{sample_id}_titan_igv_segs.seg')
    titan_parsed = os.path.join(titan_raw_dir,
                                '{sample_id}_titan_parsed.csv.gz')
    titan_plots = os.path.join(titan_raw_dir, '{sample_id}_titan_plots.pdf')
    titan_tar_outputs = os.path.join(titan_raw_dir,
                                     '{sample_id}_data_all_parameters.tar.gz')
    museq_vcf = os.path.join(titan_raw_dir, '{sample_id}_museq.vcf')

    hmmcopy_normal_raw_dir = os.path.join(cna_outdir, 'hmmcopy_normal')
    normal_bias_pdf = os.path.join(hmmcopy_normal_raw_dir, 'plots',
                                   '{sample_id}_bias.pdf')
    normal_correction_pdf = os.path.join(hmmcopy_normal_raw_dir, 'plots',
                                         '{sample_id}_correction.pdf')
    normal_hmmcopy_pdf = os.path.join(hmmcopy_normal_raw_dir, 'plots',
                                      '{sample_id}_hmmcopy.pdf')
    normal_correction_table = os.path.join(
        hmmcopy_normal_raw_dir, '{sample_id}_correctreads_with_state.txt')
    normal_pygenes = os.path.join(hmmcopy_normal_raw_dir,
                                  '{sample_id}_hmmcopy.seg.pygenes')

    hmmcopy_tumour_raw_dir = os.path.join(cna_outdir, 'hmmcopy_tumour')
    tumour_bias_pdf = os.path.join(hmmcopy_tumour_raw_dir, 'plots',
                                   '{sample_id}_bias.pdf')
    tumour_correction_pdf = os.path.join(hmmcopy_tumour_raw_dir, 'plots',
                                         '{sample_id}_correction.pdf')
    tumour_hmmcopy_pdf = os.path.join(hmmcopy_tumour_raw_dir, 'plots',
                                      '{sample_id}_hmmcopy.pdf')
    tumour_correction_table = os.path.join(
        hmmcopy_tumour_raw_dir, '{sample_id}_correctreads_with_state.txt')
    tumour_pygenes = os.path.join(hmmcopy_tumour_raw_dir,
                                  '{sample_id}_hmmcopy.seg.pygenes')

    remixt_outdir = os.path.join(args['out_dir'], 'remixt', '{sample_id}')
    remixt_outfile = os.path.join(remixt_outdir, '{sample_id}_remixt.h5')

    remixt_brk_cn_csv = os.path.join(remixt_outdir,
                                     '{sample_id}_remixt_brk_cn.csv.gz')
    remixt_cn_csv = os.path.join(remixt_outdir, '{sample_id}_remixt_cn.csv.gz')
    remixt_minor_modes_csv = os.path.join(
        remixt_outdir, '{sample_id}_remixt_minor_modes.csv.gz')
    remixt_mix_csv = os.path.join(remixt_outdir,
                                  '{sample_id}_remixt_mix.csv.gz')
    remixt_read_depth_csv = os.path.join(
        remixt_outdir, '{sample_id}_remixt_read_depth.csv.gz')
    remixt_stats_csv = os.path.join(remixt_outdir,
                                    '{sample_id}_remixt_stats.csv.gz')

    refdir_paths = config.refdir_data(args['refdir'])['paths']
    chromosomes = config.refdir_data(args['refdir'])['params']['chromosomes']

    workflow = pypeliner.workflow.Workflow(ctx=helpers.get_default_ctx(
        docker_image=config.containers('wgs')))

    workflow.setobj(obj=mgd.OutputChunks('sample_id'), value=samples)

    if run_remixt:
        workflow.subworkflow(
            name='remixt',
            func=remixt.create_remixt_workflow,
            axes=('sample_id', ),
            args=(
                mgd.InputFile("tumour.bam",
                              'sample_id',
                              fnames=tumours,
                              extensions=['.bai']),
                mgd.InputFile("normal.bam",
                              'sample_id',
                              fnames=normals,
                              extensions=['.bai']),
                mgd.InputFile("breakpoints", 'sample_id', fnames=breakpoints),
                mgd.InputInstance('sample_id'),
                mgd.OutputFile('remixt.h5',
                               'sample_id',
                               template=remixt_outfile),
                mgd.OutputFile('remixt_brk_cn.csv',
                               'sample_id',
                               template=remixt_brk_cn_csv),
                mgd.OutputFile('remixt_cn.csv',
                               'sample_id',
                               template=remixt_cn_csv),
                mgd.OutputFile('remixt_minor_modes.csv',
                               'sample_id',
                               template=remixt_minor_modes_csv),
                mgd.OutputFile('remixt_mix.csv',
                               'sample_id',
                               template=remixt_mix_csv),
                mgd.OutputFile('remixt_read_depth.csv',
                               'sample_id',
                               template=remixt_read_depth_csv),
                mgd.OutputFile('remixt_stats.csv',
                               'sample_id',
                               template=remixt_stats_csv),
                refdir_paths['refdata_remixt'],
                refdir_paths['reference'],
            ),
            kwargs={'single_node': args['single_node']})

    if run_titan:
        workflow.subworkflow(name='titan',
                             func=titan.create_titan_workflow,
                             axes=('sample_id', ),
                             args=(
                                 mgd.InputFile("tumour.bam",
                                               'sample_id',
                                               fnames=tumours,
                                               extensions=['.bai']),
                                 mgd.InputFile("normal.bam",
                                               'sample_id',
                                               fnames=normals,
                                               extensions=['.bai']),
                                 mgd.InputFile("target_list",
                                               'sample_id',
                                               fnames=targets),
                                 mgd.OutputFile('outfile',
                                                'sample_id',
                                                template=titan_outfile),
                                 mgd.OutputFile('params',
                                                'sample_id',
                                                template=titan_params),
                                 mgd.OutputFile('segs',
                                                'sample_id',
                                                template=titan_segs),
                                 mgd.OutputFile('igv_segs',
                                                'sample_id',
                                                template=titan_igv_segs),
                                 mgd.OutputFile('parsed',
                                                'sample_id',
                                                template=titan_parsed),
                                 mgd.OutputFile('plots',
                                                'sample_id',
                                                template=titan_plots),
                                 mgd.OutputFile('tar_outputs',
                                                'sample_id',
                                                template=titan_tar_outputs),
                                 mgd.OutputFile('museq.vcf',
                                                'sample_id',
                                                template=museq_vcf),
                                 mgd.InputInstance('sample_id'),
                                 refdir_paths['reference'],
                                 chromosomes,
                                 refdir_paths['het_positions_titan'],
                                 refdir_paths['map_wig'],
                                 refdir_paths['gc_wig'],
                                 refdir_paths['gtf'],
                             ),
                             kwargs={'single_node': args['single_node']})

    if run_hmmcopy:
        workflow.subworkflow(
            name='hmmcopy_normal',
            func=hmmcopy.create_hmmcopy_workflow,
            axes=('sample_id', ),
            args=(mgd.InputFile("normal.bam",
                                'sample_id',
                                fnames=normals,
                                extensions=['.bai']),
                  mgd.InputInstance('sample_id'),
                  mgd.OutputFile('normal_bias',
                                 'sample_id',
                                 template=normal_bias_pdf),
                  mgd.OutputFile('normal_correction',
                                 'sample_id',
                                 template=normal_correction_pdf),
                  mgd.OutputFile('normal_hmmcopy',
                                 'sample_id',
                                 template=normal_hmmcopy_pdf),
                  mgd.OutputFile('normal_correction_table',
                                 'sample_id',
                                 template=normal_correction_table),
                  mgd.OutputFile('normal_pygenes',
                                 'sample_id',
                                 template=normal_pygenes), chromosomes,
                  refdir_paths['map_wig'], refdir_paths['gc_wig'],
                  refdir_paths['gtf']),
        )

        workflow.subworkflow(
            name='hmmcopy_tumour',
            func=hmmcopy.create_hmmcopy_workflow,
            axes=('sample_id', ),
            args=(mgd.InputFile("tumour.bam",
                                'sample_id',
                                fnames=tumours,
                                extensions=['.bai']),
                  mgd.InputInstance('sample_id'),
                  mgd.OutputFile('tumour_bias',
                                 'sample_id',
                                 template=tumour_bias_pdf),
                  mgd.OutputFile('tumour_correction',
                                 'sample_id',
                                 template=tumour_correction_pdf),
                  mgd.OutputFile('tumour_hmmcopy',
                                 'sample_id',
                                 template=tumour_hmmcopy_pdf),
                  mgd.OutputFile('tumour_correction_table',
                                 'sample_id',
                                 template=tumour_correction_table),
                  mgd.OutputFile('tumour_pygenes',
                                 'sample_id',
                                 template=tumour_pygenes), chromosomes,
                  refdir_paths['map_wig'], refdir_paths['gc_wig'],
                  refdir_paths['gtf']),
        )

    filenames = []
    if run_titan:
        filenames += [
            titan_outfile,
            titan_params,
            titan_segs,
            titan_igv_segs,
            titan_parsed,
            titan_plots,
            titan_tar_outputs,
            museq_vcf,
        ]
    if run_hmmcopy:
        filenames += [
            normal_bias_pdf, normal_correction_pdf, normal_hmmcopy_pdf,
            normal_correction_table, normal_pygenes, tumour_bias_pdf,
            tumour_correction_pdf, tumour_hmmcopy_pdf, tumour_correction_table,
            tumour_pygenes
        ]

    outputted_filenames = helpers.expand_list(filenames, samples, "sample_id")

    workflow.transform(name='generate_meta_files_results',
                       func='wgs.utils.helpers.generate_and_upload_metadata',
                       args=(sys.argv[0:], args["out_dir"],
                             outputted_filenames, mgd.OutputFile(meta_yaml)),
                       kwargs={
                           'input_yaml_data':
                           helpers.load_yaml(args['input_yaml']),
                           'input_yaml': mgd.OutputFile(input_yaml_blob),
                           'metadata': {
                               'type': 'copynumber_calling'
                           }
                       })

    pyp.run(workflow)
Beispiel #12
0
def alignment_workflow(args):
    inputs = helpers.load_yaml(args['input_yaml'])
    outdir = args['out_dir']
    meta_yaml = os.path.join(outdir, 'metadata.yaml')
    input_yaml_blob = os.path.join(outdir, 'input.yaml')

    outputs = os.path.join(outdir, '{sample_id}', '{sample_id}.bam')
    outputs_tdf = os.path.join(outdir, '{sample_id}', '{sample_id}.bam.tdf')
    metrics_output = os.path.join(outdir, '{sample_id}',
                                  '{sample_id}_metrics.csv')
    metrics_tar = os.path.join(outdir, '{sample_id}',
                               '{sample_id}_metrics.tar.gz')

    samples = list(inputs.keys())
    fastqs_r1, fastqs_r2 = helpers.get_fastqs(inputs, samples, None)

    sample_info = helpers.get_sample_info(inputs)

    pyp = pypeliner.app.Pypeline(config=args)
    workflow = pypeliner.workflow.Workflow()

    workflow.setobj(
        obj=mgd.OutputChunks('sample_id', 'lane_id'),
        value=list(fastqs_r1.keys()),
    )

    workflow.subworkflow(name="align_samples",
                         func=alignment.align_samples,
                         args=(mgd.InputFile('input.r1.fastq.gz',
                                             'sample_id',
                                             'lane_id',
                                             fnames=fastqs_r1),
                               mgd.InputFile('input.r2.fastq.gz',
                                             'sample_id',
                                             'lane_id',
                                             fnames=fastqs_r2),
                               mgd.Template('output.bam',
                                            'sample_id',
                                            template=outputs),
                               mgd.Template('metrics.txt',
                                            'sample_id',
                                            template=metrics_output),
                               mgd.Template('metrics.tar',
                                            'sample_id',
                                            template=metrics_tar),
                               mgd.Template('output.bam.tdf',
                                            'sample_id',
                                            template=outputs_tdf), sample_info,
                               args['refdir']),
                         kwargs={
                             'single_node': args['single_node'],
                             'picard_mem': args['picard_mem']
                         })

    outputted_filenames = helpers.expand_list([
        outputs,
        outputs_tdf,
        metrics_output,
        metrics_tar,
    ], samples, "sample_id")
    workflow.transform(name='generate_meta_files_results',
                       func='wgs.utils.helpers.generate_and_upload_metadata',
                       args=(sys.argv[0:], outdir, outputted_filenames,
                             mgd.OutputFile(meta_yaml)),
                       kwargs={
                           'input_yaml_data': inputs,
                           'input_yaml': mgd.OutputFile(input_yaml_blob),
                           'metadata': {
                               'type': 'alignment'
                           }
                       })

    pyp.run(workflow)
Beispiel #13
0
def germline_calling_workflow(args):
    inputs = helpers.load_yaml(args['input_yaml'])

    meta_yaml = os.path.join(args['out_dir'], 'metadata.yaml')
    input_yaml_blob = os.path.join(args['out_dir'], 'input.yaml')

    normals = helpers.get_values_from_input(inputs, 'normal')
    samples = list(normals.keys())

    normal_ids = helpers.get_values_from_input(inputs, 'normal_id')

    var_dir = os.path.join(args['out_dir'], 'germline')

    museq_ss_vcf = os.path.join(var_dir, '{sample_id}', '{sample_id}_museq_single_annotated.vcf.gz')
    museq_ss_maf = os.path.join(var_dir, '{sample_id}', '{sample_id}_museq_single_annotated.maf')
    museq_single_pdf = os.path.join(var_dir, '{sample_id}', '{sample_id}_single_museqportrait.pdf')

    samtools_germline_vcf = os.path.join(var_dir, '{sample_id}', '{sample_id}_samtools_germline.vcf.gz')
    samtools_germline_maf = os.path.join(var_dir, '{sample_id}', '{sample_id}_samtools_germline.maf')
    samtools_roh = os.path.join(var_dir, '{sample_id}', '{sample_id}_roh.csv.gz')

    freebayes_germline_vcf = os.path.join(var_dir, '{sample_id}', '{sample_id}_freebayes_germline.vcf.gz')
    freebayes_germline_maf = os.path.join(var_dir, '{sample_id}', '{sample_id}_freebayes_germline.maf')

    rtg_germline_vcf = os.path.join(var_dir, '{sample_id}', '{sample_id}_rtg_germline.vcf.gz')
    rtg_germline_maf = os.path.join(var_dir, '{sample_id}', '{sample_id}_rtg_germline.maf')

    consensus_germline_maf = os.path.join(var_dir, '{sample_id}', '{sample_id}_consensus_germline.maf')

    pyp = pypeliner.app.Pypeline(config=args)

    workflow = pypeliner.workflow.Workflow(
        ctx=helpers.get_default_ctx()
    )

    workflow.setobj(
        obj=mgd.OutputChunks('sample_id'),
        value=samples,
    )

    workflow.subworkflow(
        name='germline_calling',
        func=germline_calling.create_germline_calling_workflow,
        args=(
            samples,
            mgd.InputFile("normal.bam", 'sample_id', fnames=normals,
                          extensions=['.bai'], axes_origin=[]),
            mgd.OutputFile('museq_ss_vcf', 'sample_id', template=museq_ss_vcf, axes_origin=[]),
            mgd.OutputFile('museq_ss_maf', 'sample_id', template=museq_ss_maf, axes_origin=[]),
            mgd.OutputFile('museq_single_pdf', 'sample_id', template=museq_single_pdf, axes_origin=[]),
            mgd.OutputFile('samtools_germline_vcf', 'sample_id', template=samtools_germline_vcf, axes_origin=[]),
            mgd.OutputFile('samtools_germline_maf', 'sample_id', template=samtools_germline_maf, axes_origin=[]),
            mgd.OutputFile('samtools_roh', 'sample_id', template=samtools_roh, axes_origin=[]),
            mgd.OutputFile('freebayes_germline_vcf', 'sample_id', template=freebayes_germline_vcf, axes_origin=[]),
            mgd.OutputFile('freebayes_germline_maf', 'sample_id', template=freebayes_germline_maf, axes_origin=[]),
            mgd.OutputFile('rtg_germline_vcf', 'sample_id', template=rtg_germline_vcf, axes_origin=[]),
            mgd.OutputFile('rtg_germline_maf', 'sample_id', template=rtg_germline_maf, axes_origin=[]),
            mgd.OutputFile('consensus_germline_maf', 'sample_id', template=consensus_germline_maf, axes_origin=[]),
            args['refdir'],
            normal_ids
        ),
        kwargs={
            'single_node': args['single_node'],
        }
    )

    filenames = [
        museq_ss_vcf,
        museq_ss_maf,
        museq_single_pdf,
        samtools_germline_vcf,
        samtools_germline_maf,
        samtools_roh,
        freebayes_germline_vcf,
        freebayes_germline_maf,
        rtg_germline_vcf,
        rtg_germline_maf,
        consensus_germline_maf
    ]

    outputted_filenames = helpers.expand_list(filenames, samples, "sample_id")

    workflow.transform(
        name='generate_meta_files_results',
        func='wgs.utils.helpers.generate_and_upload_metadata',
        args=(
            sys.argv[0:],
            args['out_dir'],
            outputted_filenames,
            mgd.OutputFile(meta_yaml)
        ),
        kwargs={
            'input_yaml_data': helpers.load_yaml(args['input_yaml']),
            'input_yaml': mgd.OutputFile(input_yaml_blob),
            'metadata': {'type': 'variant_calling'}
        }
    )

    pyp.run(workflow)
Beispiel #14
0
def variant_calling_workflow(args):
    pyp = pypeliner.app.Pypeline(config=args)
    workflow = pypeliner.workflow.Workflow()

    config = helpers.load_yaml(args['config_file'])
    inputs = helpers.load_yaml(args['input_yaml'])

    tumours = helpers.get_values_from_input(inputs, 'tumour')
    normals = helpers.get_values_from_input(inputs, 'normal')
    samples = tumours.keys()

    museq_dir = os.path.join(args['out_dir'], 'variants')
    museq_vcf = os.path.join(museq_dir, '{sample_id}',
                             'museq_paired_annotated.vcf.gz')
    museq_ss_vcf = os.path.join(museq_dir, '{sample_id}',
                                'museq_single_annotated.vcf.gz')
    strelka_snv_vcf = os.path.join(museq_dir, '{sample_id}',
                                   'strelka_snv_annotated.vcf.gz')
    strelka_indel_vcf = os.path.join(museq_dir, '{sample_id}',
                                     'strelka_indel_annotated.vcf.gz')
    parsed_snv_csv = os.path.join(museq_dir, '{sample_id}', 'allcalls.csv')
    museq_paired_pdf = os.path.join(museq_dir, '{sample_id}',
                                    'paired_museqportrait.pdf')
    museq_paired_pdf_txt = os.path.join(museq_dir, '{sample_id}',
                                        'paired_museqportrait.txt')
    museq_single_pdf = os.path.join(museq_dir, '{sample_id}',
                                    'single_museqportrait.pdf')
    museq_single_pdf_txt = os.path.join(museq_dir, '{sample_id}',
                                        'single_museqportrait.txt')

    workflow.setobj(
        obj=mgd.OutputChunks('sample_id'),
        value=samples,
    )

    workflow.subworkflow(name='variant_calling',
                         func=call_variants,
                         args=(
                             samples,
                             museq_dir,
                             config,
                             mgd.OutputFile('parsed_snv_csv',
                                            'sample_id',
                                            template=parsed_snv_csv,
                                            axes_origin=[]),
                             mgd.InputFile("tumour.bam",
                                           'sample_id',
                                           fnames=tumours,
                                           extensions=['.bai'],
                                           axes_origin=[]),
                             mgd.InputFile("normal.bam",
                                           'sample_id',
                                           fnames=normals,
                                           extensions=['.bai'],
                                           axes_origin=[]),
                             mgd.OutputFile('museq',
                                            'sample_id',
                                            template=museq_vcf,
                                            axes_origin=[]),
                             mgd.OutputFile('museq_ss',
                                            'sample_id',
                                            template=museq_ss_vcf,
                                            axes_origin=[]),
                             mgd.OutputFile('strelka_snv',
                                            'sample_id',
                                            template=strelka_snv_vcf,
                                            axes_origin=[]),
                             mgd.OutputFile('strelka_indel',
                                            'sample_id',
                                            template=strelka_indel_vcf,
                                            axes_origin=[]),
                             mgd.OutputFile('museq_paired_pdf',
                                            'sample_id',
                                            template=museq_paired_pdf,
                                            axes_origin=[]),
                             mgd.OutputFile('museq_paired_pdf_txt',
                                            'sample_id',
                                            template=museq_paired_pdf_txt,
                                            axes_origin=[]),
                             mgd.OutputFile('museq_single_pdf',
                                            'sample_id',
                                            template=museq_single_pdf,
                                            axes_origin=[]),
                             mgd.OutputFile('museq_single_pdf_txt',
                                            'sample_id',
                                            template=museq_single_pdf_txt,
                                            axes_origin=[]),
                         ))

    pyp.run(workflow)
Beispiel #15
0
def variant_calling_workflow(args):
    inputs = helpers.load_yaml(args['input_yaml'])

    meta_yaml = os.path.join(args['out_dir'], 'metadata.yaml')
    input_yaml_blob = os.path.join(args['out_dir'], 'input.yaml')

    tumours = helpers.get_values_from_input(inputs, 'tumour')
    normals = helpers.get_values_from_input(inputs, 'normal')
    samples = list(tumours.keys())

    var_dir = os.path.join(args['out_dir'], 'variants')
    museq_vcf = os.path.join(var_dir, '{sample_id}',
                             '{sample_id}_museq_paired_annotated.vcf.gz')
    museq_ss_vcf = os.path.join(var_dir, '{sample_id}',
                                '{sample_id}_museq_single_annotated.vcf.gz')

    samtools_germline_vcf = os.path.join(
        var_dir, '{sample_id}', '{sample_id}_samtools_germline.vcf.gz')
    samtools_roh = os.path.join(var_dir, '{sample_id}', '{sample_id}_roh.csv')

    strelka_snv_vcf = os.path.join(var_dir, '{sample_id}',
                                   '{sample_id}_strelka_snv_annotated.vcf.gz')
    strelka_indel_vcf = os.path.join(
        var_dir, '{sample_id}', '{sample_id}_strelka_indel_annotated.vcf.gz')
    museq_paired_pdf = os.path.join(var_dir, '{sample_id}',
                                    '{sample_id}_paired_museqportrait.pdf')
    museq_single_pdf = os.path.join(var_dir, '{sample_id}',
                                    '{sample_id}_single_museqportrait.pdf')

    somatic_csv = os.path.join(var_dir, '{sample_id}',
                               '{sample_id}_consensus_somatic.csv.gz')
    somatic_snpeff = os.path.join(
        var_dir, '{sample_id}', '{sample_id}_consensus_somatic_snpeff.csv.gz')
    somatic_ma = os.path.join(var_dir, '{sample_id}',
                              '{sample_id}_consensus_somatic_ma.csv.gz')
    somatic_ids = os.path.join(var_dir, '{sample_id}',
                               '{sample_id}_consensus_somatic_ids.csv.gz')

    indel_csv = os.path.join(var_dir, '{sample_id}',
                             '{sample_id}_indel.csv.gz')
    indel_snpeff = os.path.join(var_dir, '{sample_id}',
                                '{sample_id}_indel_snpeff.csv.gz')
    indel_ma = os.path.join(var_dir, '{sample_id}',
                            '{sample_id}_indel_ma.csv.gz')
    indel_ids = os.path.join(var_dir, '{sample_id}',
                             '{sample_id}_indel_ids.csv.gz')

    germline_csv = os.path.join(var_dir, '{sample_id}',
                                '{sample_id}_germline.csv.gz')
    germline_snpeff = os.path.join(var_dir, '{sample_id}',
                                   '{sample_id}_germline_snpeff.csv.gz')
    germline_ma = os.path.join(var_dir, '{sample_id}',
                               '{sample_id}_germline_ma.csv.gz')
    germline_ids = os.path.join(var_dir, '{sample_id}',
                                '{sample_id}_germline_ids.csv.gz')

    pyp = pypeliner.app.Pypeline(config=args)

    workflow = pypeliner.workflow.Workflow(ctx=helpers.get_default_ctx(
        docker_image=config.containers('wgs')))

    workflow.setobj(
        obj=mgd.OutputChunks('sample_id'),
        value=samples,
    )

    if not all(tumours.values()):
        workflow.subworkflow(
            name='variant_calling',
            func=call_germlines_only,
            args=(samples,
                  mgd.InputFile("normal.bam",
                                'sample_id',
                                fnames=normals,
                                extensions=['.bai'],
                                axes_origin=[]),
                  mgd.OutputFile('museq_ss',
                                 'sample_id',
                                 template=museq_ss_vcf,
                                 axes_origin=[]),
                  mgd.OutputFile('samtools_germline',
                                 'sample_id',
                                 template=samtools_germline_vcf,
                                 axes_origin=[]),
                  mgd.OutputFile('samtools_roh',
                                 'sample_id',
                                 template=samtools_roh,
                                 axes_origin=[]),
                  mgd.OutputFile('museq_single_pdf',
                                 'sample_id',
                                 template=museq_single_pdf,
                                 axes_origin=[]), args['refdir']),
            kwargs={'single_node': args['single_node']})
    else:
        workflow.subworkflow(name='variant_calling',
                             func=call_variants,
                             args=(
                                 samples,
                                 mgd.OutputFile('somatic_csv',
                                                'sample_id',
                                                template=somatic_csv,
                                                axes_origin=[]),
                                 mgd.OutputFile('somatic_snpeff',
                                                'sample_id',
                                                template=somatic_snpeff,
                                                axes_origin=[]),
                                 mgd.OutputFile('somatic_ma',
                                                'sample_id',
                                                template=somatic_ma,
                                                axes_origin=[]),
                                 mgd.OutputFile('somatic_ids',
                                                'sample_id',
                                                template=somatic_ids,
                                                axes_origin=[]),
                                 mgd.OutputFile('indel_csv',
                                                'sample_id',
                                                template=indel_csv,
                                                axes_origin=[]),
                                 mgd.OutputFile('indel_snpeff',
                                                'sample_id',
                                                template=indel_snpeff,
                                                axes_origin=[]),
                                 mgd.OutputFile('indel_ma',
                                                'sample_id',
                                                template=indel_ma,
                                                axes_origin=[]),
                                 mgd.OutputFile('indel_ids',
                                                'sample_id',
                                                template=indel_ids,
                                                axes_origin=[]),
                                 mgd.OutputFile('germline_csv',
                                                'sample_id',
                                                template=germline_csv,
                                                axes_origin=[]),
                                 mgd.OutputFile('germline_snpeff',
                                                'sample_id',
                                                template=germline_snpeff,
                                                axes_origin=[]),
                                 mgd.OutputFile('germline_ma',
                                                'sample_id',
                                                template=germline_ma,
                                                axes_origin=[]),
                                 mgd.OutputFile('germline_ids',
                                                'sample_id',
                                                template=germline_ids,
                                                axes_origin=[]),
                                 mgd.InputFile("tumour.bam",
                                               'sample_id',
                                               fnames=tumours,
                                               extensions=['.bai'],
                                               axes_origin=[]),
                                 mgd.InputFile("normal.bam",
                                               'sample_id',
                                               fnames=normals,
                                               extensions=['.bai'],
                                               axes_origin=[]),
                                 mgd.OutputFile('museq',
                                                'sample_id',
                                                template=museq_vcf,
                                                axes_origin=[]),
                                 mgd.OutputFile('museq_ss',
                                                'sample_id',
                                                template=museq_ss_vcf,
                                                axes_origin=[]),
                                 mgd.OutputFile('samtools_germline',
                                                'sample_id',
                                                template=samtools_germline_vcf,
                                                axes_origin=[]),
                                 mgd.OutputFile('roh_calls',
                                                'sample_id',
                                                template=samtools_roh,
                                                axes_origin=[]),
                                 mgd.OutputFile('strelka_snv',
                                                'sample_id',
                                                template=strelka_snv_vcf,
                                                axes_origin=[]),
                                 mgd.OutputFile('strelka_indel',
                                                'sample_id',
                                                template=strelka_indel_vcf,
                                                axes_origin=[]),
                                 mgd.OutputFile('museq_paired_pdf',
                                                'sample_id',
                                                template=museq_paired_pdf,
                                                axes_origin=[]),
                                 mgd.OutputFile('museq_single_pdf',
                                                'sample_id',
                                                template=museq_single_pdf,
                                                axes_origin=[]),
                                 args['refdir'],
                             ),
                             kwargs={
                                 'single_node': args['single_node'],
                                 'is_exome': args['is_exome'],
                             })

        filenames = [
            somatic_csv, somatic_snpeff, somatic_ma, somatic_ids, indel_csv,
            indel_snpeff, indel_ma, indel_ids, germline_csv, germline_snpeff,
            germline_ma, germline_ids, museq_vcf, museq_ss_vcf,
            strelka_snv_vcf, strelka_indel_vcf, museq_paired_pdf,
            museq_single_pdf
        ]

        outputted_filenames = helpers.expand_list(filenames, samples,
                                                  "sample_id")

        workflow.transform(
            name='generate_meta_files_results',
            func='wgs.utils.helpers.generate_and_upload_metadata',
            args=(sys.argv[0:], args['out_dir'], outputted_filenames,
                  mgd.OutputFile(meta_yaml)),
            kwargs={
                'input_yaml_data': helpers.load_yaml(args['input_yaml']),
                'input_yaml': mgd.OutputFile(input_yaml_blob),
                'metadata': {
                    'type': 'variant_calling'
                }
            })

    pyp.run(workflow)
Beispiel #16
0
def breakpoint_calling_workflow(args):
    pyp = pypeliner.app.Pypeline(config=args)

    inputs = helpers.load_yaml(args['input_yaml'])

    meta_yaml = os.path.join(args["out_dir"], 'metadata.yaml')
    input_yaml_blob = os.path.join(args["out_dir"], 'input.yaml')

    tumours = helpers.get_values_from_input(inputs, 'tumour')
    normals = helpers.get_values_from_input(inputs, 'normal')
    samples = list(tumours.keys())

    sv_outdir = os.path.join(args['out_dir'], 'breakpoints', '{sample_id}')
    destruct_breakpoints = os.path.join(
        sv_outdir, '{sample_id}_destruct_breakpoints.csv.gz')
    destruct_library = os.path.join(sv_outdir,
                                    '{sample_id}_destruct_library.csv.gz')
    destruct_raw_breakpoints = os.path.join(
        sv_outdir, '{sample_id}_destruct_raw_breakpoints.csv.gz')
    destruct_raw_library = os.path.join(
        sv_outdir, '{sample_id}_destruct_raw_library.csv.gz')
    destruct_reads = os.path.join(sv_outdir,
                                  '{sample_id}_destruct_reads.csv.gz')
    lumpy_vcf = os.path.join(sv_outdir, '{sample_id}_lumpy.vcf')
    parsed_csv = os.path.join(sv_outdir,
                              '{sample_id}_filtered_consensus_calls.csv.gz')

    svaba_vcf = os.path.join(sv_outdir, '{sample_id}_svaba.vcf')

    single_node = args['single_node']

    refdir_paths = config.refdir_data(args['refdir'])['paths']
    chromosomes = config.refdir_data(args['refdir'])['params']['chromosomes']

    workflow = pypeliner.workflow.Workflow()

    workflow.setobj(obj=mgd.OutputChunks('sample_id'), value=samples)

    workflow.subworkflow(
        name='destruct',
        func=destruct_wgs.create_destruct_wgs_workflow,
        axes=('sample_id', ),
        args=(mgd.InputFile("tumour.bam",
                            'sample_id',
                            fnames=tumours,
                            extensions=['.bai'],
                            axes_origin=[]),
              mgd.InputFile("normal.bam",
                            'sample_id',
                            fnames=normals,
                            extensions=['.bai'],
                            axes_origin=[]),
              mgd.OutputFile('destruct_raw_breakpoints',
                             'sample_id',
                             template=destruct_raw_breakpoints),
              mgd.OutputFile('destruct_raw_library',
                             'sample_id',
                             template=destruct_raw_library),
              mgd.OutputFile('destruct_breakpoints',
                             'sample_id',
                             template=destruct_breakpoints),
              mgd.OutputFile('destruct_library',
                             'sample_id',
                             template=destruct_library),
              mgd.OutputFile('destruct_reads',
                             'sample_id',
                             template=destruct_reads),
              mgd.InputInstance('sample_id'), refdir_paths['reference'],
              refdir_paths['refdata_destruct'], refdir_paths['gtf'],
              refdir_paths['blacklist_destruct']),
        kwargs={'single_node': single_node})

    workflow.subworkflow(
        name='lumpy',
        func=lumpy.create_lumpy_workflow,
        axes=('sample_id', ),
        args=(mgd.OutputFile('lumpy_vcf', 'sample_id', template=lumpy_vcf), ),
        kwargs={
            'tumour_bam':
            mgd.InputFile("tumour.bam",
                          'sample_id',
                          fnames=tumours,
                          extensions=['.bai'],
                          axes_origin=[]),
            'normal_bam':
            mgd.InputFile("normal.bam",
                          'sample_id',
                          fnames=normals,
                          extensions=['.bai'],
                          axes_origin=[]),
            'single_node':
            single_node
        },
    )

    if args['svaba']:
        workflow.subworkflow(
            name='svaba',
            func=svaba.create_svaba_workflow,
            axes=('sample_id', ),
            args=(
                mgd.InputFile("tumour.bam",
                              'sample_id',
                              fnames=tumours,
                              extensions=['.bai'],
                              axes_origin=[]),
                mgd.InputFile("normal.bam",
                              'sample_id',
                              fnames=normals,
                              extensions=['.bai'],
                              axes_origin=[]),
                mgd.OutputFile('svaba_vcf', 'sample_id', template=svaba_vcf),
                refdir_paths['reference'],
            ),
        )

    workflow.subworkflow(
        name="consensus_calling",
        func=breakpoint_calling_consensus.create_consensus_workflow,
        axes=('sample_id', ),
        args=(mgd.InputFile('destruct_breakpoints',
                            'sample_id',
                            template=destruct_breakpoints),
              mgd.InputFile('lumpy_vcf', 'sample_id', template=lumpy_vcf),
              mgd.OutputFile('consensus_calls',
                             'sample_id',
                             template=parsed_csv,
                             extensions=['.yaml']), chromosomes),
    )

    filenames = [
        destruct_breakpoints, destruct_library, destruct_raw_breakpoints,
        destruct_raw_library, destruct_reads, lumpy_vcf, parsed_csv
    ]

    if args['svaba']:
        filenames.append(svaba_vcf)

    outputted_filenames = helpers.expand_list(filenames, samples, "sample_id")

    workflow.transform(name='generate_meta_files_results',
                       func=helpers.generate_and_upload_metadata,
                       args=(sys.argv[0:], args["out_dir"],
                             outputted_filenames, mgd.OutputFile(meta_yaml)),
                       kwargs={
                           'input_yaml_data':
                           helpers.load_yaml(args['input_yaml']),
                           'input_yaml': mgd.OutputFile(input_yaml_blob),
                           'metadata': {
                               'type': 'breakpoint_calling'
                           }
                       })

    pyp.run(workflow)
Beispiel #17
0
def postprocessing_workflow(args):

    yamldata = yaml.safe_load(open(args['input_yaml']))

    samples = list(yamldata.keys())

    normals = {sample: yamldata[sample]['normal_bam'] for sample in samples}
    tumours = {sample: yamldata[sample]['tumour_bam'] for sample in samples}

    titan = {sample: yamldata[sample]['titan'] for sample in samples}
    remixt = {sample: yamldata[sample]['remixt'] for sample in samples}
    breakpoints_consensus = {
        sample: yamldata[sample]['breakpoints_consensus']
        for sample in samples
    }
    roh = {sample: yamldata[sample]['roh'] for sample in samples}
    germline_calls = {
        sample: yamldata[sample]['germline_calls']
        for sample in samples
    }
    somatic_calls = {
        sample: yamldata[sample]['somatic_calls']
        for sample in samples
    }

    out_dir = args['out_dir']

    meta_yaml = os.path.join(out_dir, 'pipeline_metadata.yaml')
    input_yaml_blob = os.path.join(out_dir, 'input.yaml')

    circos_plot_remixt = os.path.join(out_dir, '{sample_id}',
                                      '{sample_id}_circos_remixt.pdf')
    circos_plot_titan = os.path.join(out_dir, '{sample_id}',
                                     '{sample_id}_circos_titan.pdf')

    genome_wide_plot = os.path.join(out_dir, '{sample_id}',
                                    '{sample_id}_genome_wide.pdf')

    pyp = pypeliner.app.Pypeline(config=args)
    workflow = pypeliner.workflow.Workflow(ctx=helpers.get_default_ctx(
        docker_image=config.containers('wgs')))

    workflow.setobj(obj=mgd.OutputChunks('sample_id'), value=samples)

    workflow.subworkflow(name="postprocessing",
                         func=postprocessing.create_postprocessing_workflow,
                         ctx=helpers.get_default_ctx(),
                         axes=('sample_id', ),
                         args=(
                             mgd.InputFile('normal.bam',
                                           'sample_id',
                                           fnames=normals),
                             mgd.InputFile('tumour.bam',
                                           'sample_id',
                                           fnames=tumours),
                             titan,
                             remixt,
                             breakpoints_consensus,
                             roh,
                             germline_calls,
                             somatic_calls,
                             mgd.OutputFile('circos_plot_remixt.pdf',
                                            'sample_id',
                                            template=circos_plot_remixt),
                             mgd.OutputFile('circos_plot_titan.pdf',
                                            'sample_id',
                                            template=circos_plot_titan),
                             mgd.OutputFile('genome_wide_plot.pdf',
                                            'sample_id',
                                            template=genome_wide_plot),
                             args['refdir'],
                             mgd.InputInstance('sample_id'),
                         ),
                         kwargs={'single_node': args['single_node']})

    outputted_filenames = helpers.expand_list(
        [circos_plot_remixt, circos_plot_titan, genome_wide_plot], samples,
        "sample_id")

    workflow.transform(name='generate_meta_files_results',
                       func='wgs.utils.helpers.generate_and_upload_metadata',
                       args=(sys.argv[0:], args["out_dir"],
                             outputted_filenames, mgd.OutputFile(meta_yaml)),
                       kwargs={
                           'input_yaml_data':
                           helpers.load_yaml(args['input_yaml']),
                           'input_yaml': mgd.OutputFile(input_yaml_blob),
                           'metadata': {
                               'type': 'postprocessing'
                           }
                       })

    pyp.run(workflow)