Example #1
def platypus_single(job, config, name, samples, input_bam):
    """Run Platypus on an an unmatched tumour sample and call somatic variants
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The output vcf file name.

    platypus_vcf = "{}.platypus.vcf".format(name)
    platypus_log = "{}.platypus.log".format(name)
    internal_log = "{}.platypus_internal.log".format(name)

    platypus_command = ["{}".format(config['platypus']['bin']),

    job.fileStore.logToMaster("Platypus Command: {}\n".format(platypus_command))
    pipeline.run_and_log_command(" ".join(platypus_command), platypus_log)

    return platypus_vcf
Example #2
def scalpel_single(job, config, name, samples, input_bam):
    """Run Scalpel on an an unmatched tumour sample and call somatic variants
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The output vcf file name.

    cwd = os.getcwd()
    output_dir = os.path.join(cwd, "{}-scalpel-output".format(name))
    scalpel_vcf = os.path.join(output_dir, "variants.indel.vcf")
    fixed_vcf = "{}.scalpel.vcf".format(name)
    logfile = "{}.scalpel.log".format(name)
    logfile2 = "{}.scalpel_fix.log".format(name)

    scalpel_command = ["{}".format(config['scalpel']['bin']),
                       # "--covthr",
                       # "3",
                       # "--lowcov",
                       # "1",

    fix_sample_name_command = ["cat",

    job.fileStore.logToMaster("Scalpel Command: {}\n".format(scalpel_command))
    pipeline.run_and_log_command(" ".join(scalpel_command), logfile)

    job.fileStore.logToMaster("Scalpel Fix Command: {}\n".format(fix_sample_name_command))
    pipeline.run_and_log_command(" ".join(fix_sample_name_command), logfile2)

    file_path = os.path.join(cwd, fixed_vcf)
    if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
        return scalpel_vcf
        job.fileStore.logToMaster("Scalpel ran into a problem and no output was generated for file {}. Check logfile"
                                  "{} for details\n".format(scalpel_vcf, logfile))
        return JobException("Scalpel ran into a problem and no output was generated for file {}. Check logfile"
                            "{} for details\n".format(scalpel_vcf, logfile))
Example #3
def freebayes_single(job, config, name, input_bam):
    """Run FreeBayes without a matched normal sample
    :param config: The configuration dictionary.
    :type config: dict.
    :param name: sample name.
    :type name: str.
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The output vcf file name.

    freebayes_vcf = "{}.freebayes.vcf".format(name)
    logfile = "{}.freebayes.log".format(name)

    command = ["{}".format(config['freebayes']['bin']),
               "--min-repeat-entropy 1",

    job.fileStore.logToMaster("FreeBayes Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    return freebayes_vcf
Example #4
def scanindel(job, config, name, samples, input_bam):
    """Run ScanIndel caller for Structural Variant Detection
    :param config: The configuration dictionary.
    :type config: dict.
    :param name: sample name.
    :type name: str.
    :param samples: The samples configuration dictionary.
    :type config: dict.
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The output vcf file name.

    output_vcf = "{}.scanindel.vcf".format(name)
    logfile = "{}.scanindel.log".format(name)
    sample_config_file = "{}.scanindel_sample_config.txt".format(name)

    with open(sample_config_file, 'w') as sample_config:
        sample_config.write("{id}\t{file}".format(id=name, file=input_bam))

    command = ("{}".format(config['scanindel']['bin']),

    job.fileStore.logToMaster("ScanIndel Configuration Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    return output_vcf
Example #5
def star_unpaired(job, config, name, samples, flags):
    """Align RNA-Seq data to a reference using STAR
    :param config: The configuration dictionary.
    :type config: dict.
    :param name: sample name.
    :type name: str.
    :param samples: The samples info and config dictionary.
    :type samples: dict.
    :returns:  str -- The output vcf file name.

    output = "{}.star.".format(name)
    logfile = "{}.star.log".format(name)
    output_file = "{}Aligned.sortedByCoord.out.bam".format(output)

    command = ["{}".format(config['star']['bin']),
               "--genomeDir {}".format(config['star']['index']),
               "--runThreadN {}".format(config['star']['num_cores']),
               "--readFilesIn {}".format(samples[name]['fastq1']),
               "--outFileNamePrefix {}".format(output),
               "--outReadsUnmapped Fastx",
               "--outSAMtype BAM SortedByCoordinate"

    command = add_additional_options(command, config, flags)

    job.fileStore.logToMaster("STAR Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    return output_file
Example #6
def sambamba_region_coverage(job, config, name, samples, input_bam):
    """Run SamBambam to calculate the coverage of targeted regions
    :param config: The configuration dictionary.
    :type config: dict.
    :param name: sample/library name.
    :type name: str.
    :param input_bam: The input_bam file name to process.
    :type samples: dict
    :param samples: The samples configuration dictionary
    :type input_bam: str.
    :returns:  str -- The output BED file name.

    output = "{}.sambamba_coverage.bed".format(name)
    logfile = "{}.sambamba_coverage.log".format(name)

    command = ["{}".format(config['sambamba']['bin']),
               "depth region",

    job.fileStore.logToMaster("SamBamba Coverage Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    return output
Example #7
def rapmap_quasi_paired(job, config, name, samples, flags):
    """Run RapMap Quasi-Mapping procedure on paired-end sequencing data
    :param config: The configuration dictionary.
    :type config: dict.
    :param name: sample name.
    :type name: str.
    :param samples: The samples info and config dictionary.
    :type samples: dict.
    :returns:  str -- The output vcf file name.

    output = "{}.rapmap.sam".format(name)
    logfile = "{}.rapmap_quasi.log".format(name)

    command = ["{} quasimap".format(config['rapmap']['bin']),
               "-t {}".format(config['rapmap']['num_cores']),
               "-i {}".format(config['rapmap']['index']),
               "-1 {}".format(samples[name]['fastq1']),
               "-2 {}".format(samples[name]['fastq2']),
               "-o {}".format(output)

    job.fileStore.logToMaster("RapMap Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    return output
Example #8
def cuffquant(job, config, name, samples):
    """Run Cuffquant on all samples
    :param config: The configuration dictionary.
    :type config: dict.
    :param name: sample name.
    :type name: str.
    :returns:  str -- The directory name for the cuffquant results.

    outdir = "{}_cuffquant".format(name)
    logfile = "{}.cuffquant.log".format(name)

    command = ["{}".format(config['cuffquant']['bin']),
               "-b {}".format(config['reference']),
               "-p {}".format(config['cuffquant']['num_cores']),
               "-o ./{}_cuffquant".format(name),

    job.fileStore.logToMaster("Cuffquant Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    return outdir
Example #9
def stringtie_merge(job, config, samples, flags, transcripts_list):
    """Perform transcript assembly and quantification with StringTie
    :param config: The configuration dictionary.
    :type config: dict.
    :param name: sample name.
    :type name: str.
    :param samples: The samples info and config dictionary.
    :type samples: dict.
    :param flags: Flags for extra parameter settings.
    :type flags: list.
    :returns:  str -- The transcript assembly GTF file name.

    logfile = "{}.stringtie_merge.log".format(config["run_id"])
    outfile = "{}.stringtie.merged.gtf".format(config["run_id"])

    command = [
        "-p {}".format(config["stringtie"]["num_cores"]),
        "-G {}".format(config["transcript_reference"]),
        "-o {}".format(outfile),

    command = add_additional_options(command, config, flags)

    job.fileStore.logToMaster("StringTie Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    return outfile
Example #10
def bowtie_paired(job, config, name, samples, flags):
    """Align RNA-Seq data to a reference using Bowtie2
    :param config: The configuration dictionary.
    :type config: dict.
    :param name: sample name.
    :type name: str.
    :param samples: The samples info and config dictionary.
    :type samples: dict.
    :returns:  str -- The output vcf file name.

    output = "{}.bowtie.sam".format(name)
    logfile = "{}.bowtie.log".format(name)

    command = ["{}".format(config['bowtie']['bin']),
               "-x {}".format(config['bowtie']['index']),
               "-p {}".format(config['bowtie']['num_cores']),
               "-1 {}".format(samples[name]['fastq1']),
               "-2 {}".format(samples[name]['fastq2']),
               "-S {}".format(output)

    command = add_additional_options(command, config, flags)

    job.fileStore.logToMaster("Bowtie Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    return output
Example #11
def salmonVB_unpaired(job, config, name, samples):
    """Run Salmon Quasi-Mapping with single-end data using the VB optimization algorithm
    :param config: The configuration dictionary.
    :type config: dict.
    :param name: sample name.
    :type name: str.
    :param samples: The samples info and config dictionary.
    :type samples: dict.
    :returns:  str -- The output vcf file name.

    output_dir = "{}.salmon.output".format(name)
    logfile = "{}.salmon.log".format(name)

    command = ["{} quant".format(config['salmon']['bin']),
               "-i {}".format(config['salmon']['index']),
               "-l {}".format(samples[name]['library_type']),
               "-p {}".format(config['salmon']['num_cores']),
               "--numBootstraps {}".format(config['salmon']['num_bootstraps']),
               "-r {}".format(samples[name]['fastq1']),
               "-o {}".format(output_dir)

    job.fileStore.logToMaster("Salmon Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    return output_dir
Example #12
def cuffmerge(job, config, name, samples, manifest):
    """Merge assembled cufflinks transcriptomes from all samples
    :param config: The configuration dictionary.
    :type config: dict.
    :param name: sample name.
    :type name: str.
    :param samples: Samples config data
    :type samples: dict.
    :returns:  str -- The merged output transcriptome from cufflinks.

    stats_root = "{}_cuffmerge_stats".format(config['run_id'])
    logfile = "{}.cuffmerge.log".format(config['run_id'])

    command = ["{}".format(config['cuffmerge']['bin']),
               "-g {}".format(config['transcript_reference']),
               "-s {}".format(config['reference']),
               "-p {}".format(config['cuffmerge']['num_cores']),

    job.fileStore.logToMaster("Cuffmerge Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    pwd = os.getcwd()
    config['merged_transcript_reference'] = os.path.join(pwd, "merged.gtf")

    return stats_root
Example #13
def bedtools_coverage_per_site(job, config, name, input_bam):
    """Run BedTools to calculate the per-site coverage of targeted regions
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The output BED file name.

    output = "{}.bedtools_coverage_per_site.bed".format(name)
    logfile = "{}.bedtools_coverage.log".format(name)

    coverage = [

    job.fileStore.logToMaster("BedTools Coverage Command: {}\n".format(coverage))
    pipeline.run_and_log_command(" ".join(coverage), logfile)

    return output
Example #14
def run_lowfreq(job, config, name, input_bam):
    """Run LoFreq on an an unmatched tumour sample and call somatic variants
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The output vcf file name.

    vcf = "{}.lofreq.vcf".format(name)
    logfile = "{}.lofreq.log".format(name)

    command = [
        "--call-indels" "-f",

    job.fileStore.logToMaster("LoFreq Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)
Example #15
def run_pindel(job, config, name, input_bam):
    """Run Pindel caller for InDel Detection
    :param config: The configuration dictionary.
    :type config: dict.
    :param name: sample name.
    :type name: str..
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The output vcf file name.

    pindel_config = "{}.pindel_config.txt".format(name)
    output_dir = "{}_pindel".format(name)
    output_vcf = "{}.pindel.vcf".format(name)

    logfile = "{}.pindel.log".format(name)
    vcf_logfile = "{}.pindel2vcf.log".format(name)

    with open(pindel_config, 'w') as bam_config:
        bam_config.write("%s %s %s\n" % (input_bam, config['insert_size'], name))

    command = ("{}".format(config['pindel']['bin']),

    pindel2vcf_command = ("{}".format(config['pindel2vcf']['bin']),

    job.fileStore.logToMaster("Pindel Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    job.fileStore.logToMaster("Pindel2vcf Command: {}\n".format(pindel2vcf_command))
    pipeline.run_and_log_command(" ".join(pindel2vcf_command), vcf_logfile)

    return output_vcf
Example #16
def run_bwa_mem(job, config, name, samples):
    """Run GATK's DiagnoseTargets against the supplied region

    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param fastq1: Input FastQ File.
    :type fastq1: str.
    :param fastq2: Input FastQ File.
    :type fastq2: str.
    :returns:  str -- Aligned and sorted BAM file name.


    job.fileStore.logToMaster("Running BWA for sample {}\n".format(name))

    output_bam = "{}.bwa.sorted.bam".format(name)
    temp = "{}.bwa.sort.temp".format(name)
    logfile = "{}.bwa-align.log".format(name)

    bwa_cmd = [

    view_cmd = ["{}".format(config["samtools"]["bin"]), "view", "-u", "-"]

    sort_cmd = [

    command = "{} | {} | {}".format(" ".join(bwa_cmd), " ".join(view_cmd), " ".join(sort_cmd))

    job.fileStore.logToMaster("BWA Command: {}\n".format(command))
    pipeline.run_and_log_command(command, logfile)

    return output_bam
Example #17
def vardict_single(job, config, name, samples, input_bam):
    """Run VarDict on an an unmatched tumour sample and call somatic variants
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The output vcf file name.

    vardict_vcf = "{}.vardict.vcf".format(name)
    logfile = "{}.vardict.log".format(name)

    vardict = ["{}".format(config['vardict']['bin']),
               # "-a", the amplicon flag seems to be creating errors
               # "-F 0", Probably don't need this as duplicates aren't marked and ignoring secondary alignment good

    vardict2vcf = ["{}".format(config['vardict2vcf']['bin']),

    vcfsort = ["{}".format(config['vcftools_sort']['bin']),

    command = ("{vardict} | {strandbias} | {vardict2vcf} | "
               "{sort} > {vcf}".format(vardict=" ".join(vardict), strandbias=config['vardict_strandbias']['bin'],
                                       vardict2vcf=" ".join(vardict2vcf), sort=" ".join(vcfsort), vcf=vardict_vcf))

    job.fileStore.logToMaster("VarDict Command: {}\n".format(command))
    pipeline.run_and_log_command(command, logfile)

    return vardict_vcf
Example #18
def hisat_unpaired(job, config, name, samples, flags):
    """Align RNA-Seq data to a reference using HiSat2
    :param config: The configuration dictionary.
    :type config: dict.
    :param name: sample name.
    :type name: str.
    :param samples: The samples info and config dictionary.
    :type samples: dict.
    :returns:  str -- The output bam file name.

    working_dir = os.getcwd()

    logfile = "{}.hisat.log".format(name)
    output = "{}.hisat.sorted.bam".format(name)
    unaligned = os.path.join(working_dir, "{}.unaligned.sam".format(name))
    temp = "{}.hisat.sort.temp".format(name)

    hisat_cmd = ["{}".format(config['hisat']['bin']),
                 "-p {}".format(config['hisat']['num_cores']),
                 "-x {}".format(config['hisat']['index']),
                 "-U {}".format(samples[name]['fastq1']),
                 "--un {}".format(unaligned)

    hisat_cmd = add_additional_options(hisat_cmd, config, flags)

    view_cmd = ["{}".format(config['samtools']['bin']),

    sort_cmd = ["{}".format(config['samtools']['bin']),

    command = "{} | {} | {}".format(" ".join(hisat_cmd), " ".join(view_cmd), " ".join(sort_cmd))

    job.fileStore.logToMaster("HiSat2 Command: {}\n".format(command))
    pipeline.run_and_log_command(command, logfile)

    return output
Example #19
def bcftools_filter_variants_regions(job, config, name, samples, input_vcf):
    """Use bcftools to filter vcf file to only variants found within the specified regions file
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_vcf: The input_vcf file name to process.
    :type input_vcf: str.
    :returns:  str -- The output vcf file name.

    filtered_vcf = "{}.on_target.vcf".format(name)
    sorted_vcf = "{}.on_target_sorted.vcf".format(name)
    bgzipped_vcf = "{}.gz".format(input_vcf)
    logfile = "{}.on_target_filter.log".format(name)
    sort_logfile = "{}.on_target_sorted.log".format(name)

    bgzip_and_tabix_vcf(job, input_vcf)

    filter_command = [

    sort_command = [

    job.fileStore.logToMaster("BCFTools isec command for filtering to only target regions: {}\n".format(filter_command))
    pipeline.run_and_log_command(" ".join(filter_command), logfile)

    job.fileStore.logToMaster("VCFTools-sort command for filtering to only target regions: {}\n".format(sort_command))
    pipeline.run_and_log_command(" ".join(sort_command), sort_logfile)

    return sorted_vcf
Example #20
def bgzip_and_tabix_vcf(job, infile):
    """Run BGZip and Tabix on the specified VCF
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param infile: The input_vcf file name to process.
    :type infile: str.
    :returns:  str -- The output vcf file name.

    bgzip_instructions, tabix_instructions = _bgzip_and_tabix_vcf_instructions(infile)

    job.fileStore.logToMaster("BGzip Command: {}\n".format(bgzip_instructions[0]))
    pipeline.run_and_log_command(bgzip_instructions[0], bgzip_instructions[1])

    job.fileStore.logToMaster("Tabix Command: {}\n".format(tabix_instructions[0]))
    pipeline.run_and_log_command(tabix_instructions[0], tabix_instructions[1])
Example #21
def convert2pe(job, row):
    bamfile = row[0]
    elements = bamfile.split('.')
    lane_id = elements[2]
    sample_id = elements[4]

    outfile1 = "{}.{}.R1.fastq".format(sample_id, lane_id)
    outfile2 = "{}.{}.R2.fastq".format(sample_id, lane_id)

    logfile = "convert_{}.log".format(bamfile)

    command = ("bedtools bamtofastq",
               "-i {}".format(bamfile),
               "-fq {}".format(outfile1),
               "-fq2 {}".format(outfile2))

    job.fileStore.logToMaster("Running command {} and logging to {}\n".format(command, logfile))
    pipeline.run_and_log_command(" ".join(command), logfile)
Example #22
def pisces(job, config, name, input_bam):
    """Run Pisces on a single sample
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The output vcf file name.

    output_vcf = "{}.pisces.vcf".format(name)
    logfile = "{}.pisces.log".format(name)
    command = ["{}".format(config['pisces']['bin']),

    job.fileStore.logToMaster("Pisces Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    return output_vcf
Example #23
def mutect2_single(job, config, name, samples, input_bam):
    """Run MuTect on an an unmatched tumour sample and call somatic variants
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The output vcf file name.

    mutect_vcf = "{}.mutect2.vcf".format(name)
    mutect_logfile = "{}.mutect2.log".format(name)

    mutect_command = [
        "-drf DuplicateRead",
        "-ip 100",

    job.fileStore.logToMaster("MuTect2 Command: {}\n".format(mutect_command))
    pipeline.run_and_log_command(" ".join(mutect_command), mutect_logfile)

    # job.fileStore.logToMaster("Subset Command: {}\n".format(subset_command))
    # pipeline.run_and_log_command(" ".join(subset_command), subset_log)

    return mutect_vcf
Example #24
def stringtie(job, config, name, samples, flags):
    """Perform transcript assembly and quantification with StringTie
    :param config: The configuration dictionary.
    :type config: dict.
    :param name: sample name.
    :type name: str.
    :param samples: The samples info and config dictionary.
    :type samples: dict.
    :param flags: Flags for extra parameter settings.
    :type flags: list.
    :returns:  str -- The transcript assembly GTF file name.

    logfile = "{}.stringtie.log".format(name)
    outfile = "{}.stringtie.gtf".format(name)
    abundances_file = "{}.gene_abundances.txt".format(name)

    outdir = "{}_stringtie_final".format(name)

    working_dir = os.getcwd()
    full_path_outfile = os.path.join(working_dir, outdir, outfile)

    command = [
        "-p {}".format(config["stringtie"]["num_cores"]),
        "-G {}".format(config["merged_transcript_reference"]),
        "-A {}".format(abundances_file),
        "-f 0.05",
        "-m 100",
        "-o {}".format(full_path_outfile),

    command = add_additional_options(command, config, flags)

    job.fileStore.logToMaster("StringTie Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    return outfile
Example #25
def run_delly2_single(job, config, name, input_bam):
    """Run delly2 for structural variant detection. As delly2 is parallelized on the level of samples,
    we use a single-threaded version
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The merged Delly output vcf file name.

    delly_vcfs = list()
    delly_command_core = ("{}".format(config['delly']['bin']),

    for mut_type in ["DEL", "DUP", "TRA", "INV"]:
        output_vcf = "{sample}.{type}.vcf".format(sample=name, type=mut_type)
        logfile = "{sample}.{type}.log".format(sample=name, type=mut_type)


        delly_command = list()

        job.fileStore.logToMaster("Running Delly: {}\n".format(delly_command))
        pipeline.run_and_log_command(" ".join(delly_command), logfile)

    job.fileStore.logToMaster("Merging delly output with command: {}\n".format(merge_command))
    pipeline.run_and_log_command(" ".join(merge_command), merge_log)

    return merged_vcf
Example #26
def cufflinks(job, config, name, samples):
    """Transcriptome assembly with cufflinks
    :param config: The configuration dictionary.
    :type config: dict.
    :param name: sample name.
    :type name: str.
    :param input_bam: The input bam file.
    :type input_bam: str.
    :returns:  str -- The output transcriptome from cufflinks.

    outdir = "{}_cufflinks".format(name)
    logfile = "{}.cufflinks.log".format(name)

    working_dir = os.getcwd()
    path = os.path.join(working_dir, outdir)
        sys.stderr.write("Directory {} already exists. Not creating...\n".format(path))


    command = ["{}".format(config['cufflinks']['bin']),
               "-g {}".format(config['transcript_reference']),
               "-b {}".format(config['reference']),
               "-p {}".format(config['cufflinks']['num_cores']),
               "--library-type {}".format(samples[name]['cufflinks_lib']),

    if not os.path.isfile("transcripts.gtf"):
        job.fileStore.logToMaster("Cufflinks Command: {}\n".format(command))
        pipeline.run_and_log_command(" ".join(command), logfile)
        job.fileStore.logToMaster("Cufflinks appears to have already executed for {}. Skipping...\n".format(name))


    return path
Example #27
def vt_normalization(job, config, sample, caller, input_vcf):
    """Decompose and left normalize variants
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param sample: caller name.
    :type sample: str.
    :param input_vcf: The input_vcf file name to process.
    :type input_vcf: str.
    :returns:  str -- The output vcf file name.

    output_vcf = "{}.{}.normalized.vcf".format(sample, caller)
    logfile = "{}.{}.vt_normalization.log".format(sample, caller)

    normalization = ["zless",

    job.fileStore.logToMaster("VT Command: {}\n".format(normalization))
    pipeline.run_and_log_command(" ".join(normalization), logfile)

    return output_vcf
Example #28
def run_flt3_itdseek(job, config, name):
    """Run ITDseek without a matched normal sample
    :param config: The configuration dictionary.
    :type config: dict.
    :param name: sample name.
    :type name: str.
    :returns:  str -- The output vcf file name.

    itdseek_vcf = "{}.flt3.itdseek.vcf".format(name)
    itdseek_logfile = "{}.flt3.itdseek.log".format(name)

    itdseek_command = ["{}".format(config['itdseek']['bin']),

    job.fileStore.logToMaster("ITDSeek Command: {}\n".format(itdseek_command))
    pipeline.run_and_log_command(" ".join(itdseek_command), itdseek_logfile)

    return itdseek_vcf
Example #29
def run_fastqc(job, config, samples):
    """Run FastQC on provided FastQ files
    :param config: The configuration dictionary.
    :type config: dict.
    :param samples: Samples dictionary
    :type samples: str.

    job.fileStore.logToMaster("Running FastQC for all samples\n")
    logfile = "fastqc.log"

    fastq_files_list = list()
    for sample in samples:

    fastq_files_string = " ".join(fastq_files_list)
    command = ["{}".format(config['fastqc']['bin']),

    job.fileStore.logToMaster("FastQC Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)
Example #30
def joint_variant_calling(job, config, name, samples):
    """Create a cohort VCF file based on joint calling from gVCF files
    :param config: The configuration dictionary.
    :type config: dict.
    :param sample: sample name.
    :type sample: str.
    :param samples: samples configuration dictionary
    :type samples: dict
    :param input_bam: The input_bam file name to process.
    :type input_bam: str.
    :returns:  str -- The output vcf file name.

    vcf = "{}.haplotypecaller.vcf".format(name)
    logfile = "{}.haplotypecaller_gvcf.log".format(name)

    gvcfs = list()
    for sample in samples:
        gvcfs.append("--variant {}.haplotypecaller.g.vcf".format(sample))

    gvcf_string = " ".join(gvcfs)

    command = ["{}".format(config['gatk-jointgenotyper']['bin']),

    job.fileStore.logToMaster("GenotypeVCFs Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    return vcf
Example #31
def run_fundi(job, root_name):
    """Take the specified VCF and use vcfanno to add additional annotations
       :param config: The configuration dictionary.
       :type config: dict.
       :param sample: sample name.
       :type sample: str.
       :param input_vcf: The input_vcf file name to process.
       :type input_vcf: str.
       :returns:  str -- The output vcf file name.

    logfile = "{}.fundi.log".format(root_name)

    command = ["perl ./FunDi.pl",
               "-m LG+F+G",
               "-P iqtree",
               "-r 4",
               "-N 22"]

    mv_fundi_log = "mv FunDi.log {}_FunDi.log".format(root_name)

    job.fileStore.logToMaster("FunDi Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    job.fileStore.logToMaster("Rename file Command: {}\n".format(command))
    pipeline.run_and_log_command(mv_fundi_log, logfile)

    return logfile
Example #32
def subsample_bam(job, addresses, keyspace, auth, name, samples, config, seed,
                  fraction, iteration):
    """Use samtools view to subsample an input file to the specified fraction"""

    library_name = "subsample-{}-{}-{}".format(samples[name]['library_name'],
                                               fraction, iteration)
    sublog = "subsample-{}-{}-{}.log".format(name, fraction, iteration)
    input_bam = "{}.recalibrated.sorted.bam".format(
    subsampled_bam = "subsample-{}-{}-{}.bam".format(
        samples[name]['library_name'], fraction, iteration)
    samcommand = "samtools view -s {seed}.{fraction} -b {input} > {output}".format(
        seed=seed, fraction=fraction, input=input_bam, output=subsampled_bam)

    index_command = "samtools index {}".format(subsampled_bam)
    index_log = "{}.index.log".format(subsampled_bam)

    output = "{}.sambamba_coverage.bed".format(subsampled_bam)
    logfile = "{}.sambamba_coverage.log".format(subsampled_bam)

    command = ("{}".format(config['sambamba']['bin']), "depth region", "-L",
               "{}".format(samples[name]['regions']), "-t",
               "{}".format(config['sambamba']['num_cores']), "-T",
               "{}".format(config['coverage_threshold']), "-T",
               "{}".format(subsampled_bam), ">", "{}".format(output))

    job.fileStore.logToMaster("Samtools ViewCommand: {}\n".format(samcommand))
    pipeline.run_and_log_command(samcommand, sublog)

        "Samtools Index Command: {}\n".format(index_command))
    pipeline.run_and_log_command(index_command, index_log)

        "SamBamba Coverage Command: {}\n".format(command))
    pipeline.run_and_log_command(" ".join(command), logfile)

    connection.setup(addresses, keyspace, auth_provider=auth)

    job.fileStore.logToMaster("Adding coverage data: {}\n".format(samcommand))

    num_libs = (float(samples[name]['num_libraries_in_run']) *
                (1 / (float(fraction) / 100.00)))
    with open(output, 'rb') as coverage:
        reader = csv.reader(coverage, delimiter='\t')
        header = reader.next()
        threshold_indices = list()
        thresholds = list()
        index = 0
        for element in header:
            if element.startswith("percentage"):
                threshold = element.replace('percentage', '')
            index += 1

        for row in reader:
            threshold_data = defaultdict(float)
            index = 0
            for threshold in thresholds:
                threshold_data[threshold] = row[threshold_indices[index]]
                index += 1

            sample_data = SampleCoverage.create(

            amplicon_data = AmpliconCoverage.create(