Esempio n. 1
0
def produce_fastqc_report(fastq_filename, output_html, output_plots, temp_dir,
                          **kwargs):
    helpers.makedirs(temp_dir)

    pypeliner.commandline.execute(
        'fastqc',
        '--outdir=' + temp_dir,
        fastq_filename,
        **kwargs)

    fastq_basename = os.path.basename(fastq_filename)
    if fastq_basename.endswith(".fastq.gz"):
        fastq_basename = fastq_basename[:-len(".fastq.gz")]
    elif fastq_basename.endswith(".fq.gz"):
        fastq_basename = fastq_basename[:-len(".fq.gz")]
    elif fastq_basename.endswith(".fq"):
        fastq_basename = fastq_basename[:-len(".fq")]
    elif fastq_basename.endswith(".fastq"):
        fastq_basename = fastq_basename[:-len(".fastq")]
    else:
        raise Exception("Unknown file type")

    output_basename = os.path.join(temp_dir, fastq_basename)

    shutil.move(output_basename + '_fastqc.zip', output_plots)
    shutil.move(output_basename + '_fastqc.html', output_html)
Esempio n. 2
0
def bam_collect_wgs_metrics(bam_filename,
                            ref_genome,
                            metrics_filename,
                            config,
                            tempdir,
                            mem="2G",
                            docker_image=None):
    helpers.makedirs(tempdir)

    pypeliner.commandline.execute(
        'picard',
        '-Xmx' + mem,
        '-Xms' + mem,
        '-XX:ParallelGCThreads=1',
        'CollectWgsMetrics',
        'INPUT=' + bam_filename,
        'OUTPUT=' + metrics_filename,
        'REFERENCE_SEQUENCE=' + ref_genome,
        'MINIMUM_BASE_QUALITY=' + str(config['min_bqual']),
        'MINIMUM_MAPPING_QUALITY=' + str(config['min_mqual']),
        'COVERAGE_CAP=500',
        'VALIDATION_STRINGENCY=LENIENT',
        'COUNT_UNPAIRED=' + ('True' if config['count_unpaired'] else 'False'),
        'TMP_DIR=' + tempdir,
        'MAX_RECORDS_IN_RAM=150000',
        docker_image=docker_image)
Esempio n. 3
0
def bam_collect_insert_metrics(bam_filename,
                               flagstat_metrics_filename,
                               metrics_filename,
                               histogram_filename,
                               tempdir,
                               mem="2G",
                               picard_docker=None,
                               samtools_docker=None):
    bam_flagstat(bam_filename,
                 flagstat_metrics_filename,
                 docker_image=samtools_docker)

    # Check if any paired reads exist
    has_paired = None
    with open(flagstat_metrics_filename) as f:
        for line in f:
            if 'properly paired' in line:
                if line.startswith('0 '):
                    has_paired = False
                else:
                    has_paired = True

    if has_paired is None:
        raise Exception(
            'Unable to determine number of properly paired reads from {}'.
            format(flagstat_metrics_filename))

    if not has_paired:
        with open(metrics_filename, 'w') as f:
            f.write('## FAILED: No properly paired reads\n')
        with open(histogram_filename, 'w'):
            pass
        return

    helpers.makedirs(tempdir)

    pypeliner.commandline.execute('picard',
                                  '-Xmx' + mem,
                                  '-Xms' + mem,
                                  '-XX:ParallelGCThreads=1',
                                  'CollectInsertSizeMetrics',
                                  'INPUT=' + bam_filename,
                                  'OUTPUT=' + metrics_filename,
                                  'HISTOGRAM_FILE=' + histogram_filename,
                                  'ASSUME_SORTED=True',
                                  'VALIDATION_STRINGENCY=LENIENT',
                                  'TMP_DIR=' + tempdir,
                                  'MAX_RECORDS_IN_RAM=150000',
                                  docker_image=picard_docker)
Esempio n. 4
0
def merge_pdfs(infiles, outfile):
    if isinstance(infiles, dict):
        infiles = infiles.values()

    merger = PdfFileMerger()

    for infile in infiles:
        # add it to list if not empty. skip empty files to avoid errors later
        if os.path.getsize(infile):
            merger.append(open(infile, 'rb'))

    helpers.makedirs(outfile, isfile=True)

    with open(outfile, 'wb') as fout:
        merger.write(fout)
Esempio n. 5
0
def run_museq_one_job(
        tempdir, museq_vcf, reference, intervals, museq_params,
        tumour_bam=None, normal_bam=None, museq_docker_image=None,
        vcftools_docker_image=None, titan_mode=False
):
    '''
    Run museq script for all chromosomes and merge VCF files

    :param tumour: path to tumour bam
    :param normal: path to normal bam
    :param out: path to the temporary output VCF file for the merged VCF files
    :param log: path to the log file
    :param config: path to the config YAML file
    '''

    commands = []
    for i, interval in enumerate(intervals):
        ival_temp_dir = os.path.join(tempdir, str(i))
        helpers.makedirs(ival_temp_dir)
        output = os.path.join(ival_temp_dir, 'museq.vcf')
        log = os.path.join(ival_temp_dir, 'museq.log')

        command = run_museq(
            output, log, reference, interval, museq_params,
            tumour_bam=tumour_bam, normal_bam=normal_bam,
            return_cmd=True, titan_mode=titan_mode
        )

        commands.append(command)

    parallel_temp_dir = os.path.join(tempdir, 'gnu_parallel_temp')
    helpers.run_in_gnu_parallel(commands, parallel_temp_dir, museq_docker_image)

    vcf_files = [os.path.join(tempdir, str(i), 'museq.vcf') for i in range(len(intervals))]
    merge_tempdir = os.path.join(tempdir, 'museq_merge')
    helpers.makedirs(merge_tempdir)
    merge_vcfs(vcf_files, museq_vcf, merge_tempdir, docker_image=vcftools_docker_image)
Esempio n. 6
0
def generate_submit_config_in_temp(args):
    azure_submit = [
        'azurebatch', 'pypeliner.contrib.azure.batchqueue.AzureJobQueue'
    ]
    if not args.get("submit", None) in azure_submit:
        return args

    if args['which'] == 'generate_config':
        return args

    batch_yaml = "batch.yaml"
    tmpdir = args.get("tmpdir", None)
    pipelinedir = args.get("pipelinedir", None)

    # use pypeliner tmpdir to store yaml
    if pipelinedir:
        batch_yaml = os.path.join(pipelinedir, batch_yaml)
    elif tmpdir:
        batch_yaml = os.path.join(tmpdir, batch_yaml)
    else:
        logging.getLogger("wgs.generate_batch_config").warn(
            "no tmpdir specified, generating configs in working dir")
        batch_yaml = os.path.join(os.getcwd(), batch_yaml)

    helpers.makedirs(batch_yaml, isfile=True)

    batch_yaml = helpers.get_incrementing_filename(batch_yaml)

    params_override = args.get("config_override", {})

    config_params = get_batch_params(override=params_override)
    config = get_batch_config(config_params, override=params_override)
    write_config(config, batch_yaml)

    args["submit_config"] = batch_yaml

    return args
Esempio n. 7
0
def bam_collect_gc_metrics(bam_filename,
                           ref_genome,
                           metrics_filename,
                           summary_filename,
                           chart_filename,
                           tempdir,
                           mem="2G",
                           docker_image=None):
    helpers.makedirs(tempdir)

    pypeliner.commandline.execute('picard',
                                  '-Xmx' + mem,
                                  '-Xms' + mem,
                                  '-XX:ParallelGCThreads=1',
                                  'CollectGcBiasMetrics',
                                  'INPUT=' + bam_filename,
                                  'OUTPUT=' + metrics_filename,
                                  'REFERENCE_SEQUENCE=' + ref_genome,
                                  'S=' + summary_filename,
                                  'CHART_OUTPUT=' + chart_filename,
                                  'VALIDATION_STRINGENCY=LENIENT',
                                  'TMP_DIR=' + tempdir,
                                  'MAX_RECORDS_IN_RAM=150000',
                                  docker_image=docker_image)
Esempio n. 8
0
def merge_vcfs(inputs, outfile, tempdir, docker_image=None):
    helpers.makedirs(tempdir)
    mergedfile = os.path.join(tempdir, 'merged.vcf')
    vcfutils.concatenate_vcf(inputs, mergedfile)
    vcfutils.sort_vcf(mergedfile, outfile, docker_image=docker_image)