Ejemplo n.º 1
0
def _get_r_wrapper_lines(phase):
    assert phase == "cons" or phase == "trees", "The phase parameter can only be trees or cons"

    # Create the run command, which differs per phase
    if phase == "cons":
        run_cmd = merge_items([
            "${CMD} ${DATASET} ${ITERS} ${BURNIN} ${DATPATH} ${PURITYFILE} \"",
            phase, "\" ${PARALLEL} ${THREADS} ${BINSIZE} \"1\" ${NBLOCKS}"
        ],
                              sep="")
    else:
        run_cmd = merge_items([
            "${CMD} ${DATASET} ${ITERS} ${BURNIN} ${DATPATH} ${PURITYFILE} \"",
            phase, "\" ${PARALLEL} ${THREADS} ${BINSIZE} $LSB_JOBINDEX"
        ],
                              sep="")

    # Create the other lines
    lines = [
        "#$LSB_JOBINDEX", "DATASET=$1", "ITERS=$2", "BURNIN=$3", "DATPATH=$4",
        "PURITYFILE=$5", "ANALYSIS=$6", "PARALLEL=$7", "THREADS=$8",
        "BINSIZE=$9", "NBLOCKS=${10}",
        merge_items(["CMD=", SCRIPT], sep=""), run_cmd
    ]
    return (lines)
Ejemplo n.º 2
0
def createConcatSplitFilesCmd(samplename, infile_list, outfile, haveHeader,
                              run_dir):
    cmd = [
        DPP_SCRIPT, "-c concatSplitFiles", "-s", samplename, "--files",
        merge_items(infile_list, sep=","), "-o", outfile, "-r", run_dir
    ]

    if haveHeader:
        cmd.append("--haveHeader")

    return (merge_items(cmd))
Ejemplo n.º 3
0
def createSplitLociCmd(samplename, loci_file, prefix, postfix, fai_file,
                       ignore_file, run_dir):
    return (merge_items([
        DPP_SCRIPT, "-c splitLociFile", "-s", samplename, "--loci", loci_file,
        "-f", fai_file, "-i", ignore_file, "--prefix", prefix, "--postfix",
        postfix, "-f", CHROMS_FAI, "-i", IGNORE_FILE, "-r", run_dir
    ]))
Ejemplo n.º 4
0
def generateQcrunScript(dp_master_file, dp_in_dir, qc_dir):
    """
    Creates the qc runscript for easy qc generation.
    """
    scriptfile = path.joinpath(qc_dir, "runQc.sh")
    outf = open(scriptfile, "w")
    outf.write(merge_items([SCRIPT, dp_master_file, dp_in_dir, qc_dir]) + "\n")
    outf.write("convert *alleleFreq*png alleleFrequency.pdf\n")
    outf.write("convert *copyNumberAdjustment*png copyNumberAdjustment.pdf\n")
    outf.write("convert *depth*png depth.pdf\n")
    outf.write("convert *kappa*png kappa.pdf\n")
    outf.write("convert *mutation.copy.num*png mutation.copy.number.pdf\n")
    outf.write("convert *totalCopy*png totalCopyNumber.pdf\n")
    outf.write("convert *_fractionOfCells*png fractionOfCells.pdf\n")
    outf.write(
        "convert *subclonalFractionPerChromosome*png subclonalFractionPerChromosome.pdf\n"
    )
    outf.write(
        "convert *large.subclonal.fraction.by.chrom*png large.subclonal.fraction.by.chrom.pdf\n"
    )
    outf.write(
        "convert *depth.vs.frac.mutCount.png depth.vs.frac.mutCount.pdf\n")
    outf.write(
        "convert *_cellularityCorrectedAF.png cellularityCorrectedAF.pdf\n")
    outf.close()
    # Make executable
    st = os.stat(scriptfile)
    os.chmod(scriptfile, st.st_mode | stat.S_IEXEC)
Ejemplo n.º 5
0
def createDpInputCmd(samplename, loci_file, allele_freq_file, subclone_file,
                     rho_psi_file, mut_mut_phase_file, mut_cn_phase_file,
                     gender, bb_dir, run_dir):
    return (merge_items([
        DPP_SCRIPT, "-c dpInput", "-s", samplename, "--loci", loci_file,
        "--all_freq", allele_freq_file, "--subclones", subclone_file,
        "--rhopsi", rho_psi_file, "--mut_mut", mut_mut_phase_file, "--mut_cn",
        mut_cn_phase_file, "-x", gender, "-o",
        samplename + "_allDirichletProcessInfo.txt", "-b", bb_dir, "-r",
        run_dir
    ]))
Ejemplo n.º 6
0
def createGetAlleleFrequencyCmd(samplename, loci_file_prefix, bam_file,
                                out_file_prefix, run_dir, split_chroms):
    if split_chroms:
        filename_suffix = "${LSB_JOBINDEX}.txt"
    else:
        filename_suffix = ".txt"

    return (merge_items([
        DPP_SCRIPT, "-c getAlleleFrequency", "-s", samplename, "--bam",
        bam_file, "--loci", loci_file_prefix + filename_suffix, "-o",
        out_file_prefix + filename_suffix, "-r", run_dir
    ]))
Ejemplo n.º 7
0
def generateSamplesheet(samplename_file, tumour_file, tumourid_file,
                        normal_file, normalid_file, gender_file, bb_file,
                        variants_file, output_file):
    """
    Takes various single column files and generates a samplesheet. The i'th row of each of these files will be
    joined together (as if the Unix command line paste was called).
    """
    # Read in the various files
    samplenames = read_list_of_items(samplename_file)
    tumours = read_list_of_items(tumour_file)
    tumour_ids = read_list_of_items(tumourid_file)
    normals = read_list_of_items(normal_file)
    normal_ids = read_list_of_items(normalid_file)
    genders = read_list_of_items(gender_file)
    # BB is optional (could be ran after creation of this project. Set default placeholder if this is the case
    if not bb_file is None:
        bb = read_list_of_items(bb_file)
    else:
        bb = ['NA'] * len(samplenames)
    variants = read_list_of_items(variants_file)

    # Write the output, joining line i from all vectors together
    outf = open(output_file, 'w')
    outf.write(
        merge_items([
            "#sample", "tumour_id", "tumour", "normal_id", "normal", "bb_dir",
            "gender", "variants"
        ],
                    sep="\t") + "\n")
    for i in range(0, len(samplenames)):
        outf.write(
            merge_items([
                samplenames[i], tumour_ids[i], tumours[i], normal_ids[i],
                normals[i], bb[i], genders[i], variants[i]
            ],
                        sep="\t") + "\n")

    outf.close()
Ejemplo n.º 8
0
def createMutMutPhasingCmd(samplename, loci_file_prefix, out_file_prefix,
                           bam_file, bai_file, max_distance, bb_dir, run_dir,
                           split_chroms):
    if split_chroms:
        filename_suffix = "${LSB_JOBINDEX}.txt"
    else:
        filename_suffix = ".txt"

    return (merge_items([
        DPP_SCRIPT, "-c mutMutPhasing", "-s", samplename, "--loci",
        loci_file_prefix + filename_suffix, "-o",
        out_file_prefix + filename_suffix, "--bam", bam_file, "--bai",
        bai_file, "--max_distance",
        str(max_distance), "-b", bb_dir, "-r", run_dir
    ]))
Ejemplo n.º 9
0
def createMutCnPhasingCmd(samplename, loci_file_prefix, baf_file,
                          hap_info_prefix, hap_info_suffix, outfile_prefix,
                          bam_file, bai_file, max_distance, bb_dir, run_dir,
                          split_chroms):
    # Running this split per chromosome always, as the internal R function cannot handle all data at once because the impute output doesn't contain chromosome info
    #if split_chroms:
    filename_suffix = "${LSB_JOBINDEX}"
    #else:
    #	filename_suffix = ""

    return (merge_items([
        DPP_SCRIPT, "-c mutCNPhasing", "-s", samplename, "--loci",
        loci_file_prefix + filename_suffix + ".txt", "--phased_baf", baf_file,
        "--hap_info", hap_info_prefix + filename_suffix + hap_info_suffix,
        "-o", outfile_prefix + filename_suffix + ".txt", "--bam", bam_file,
        "--bai", bai_file, "-b", bb_dir, "-r", run_dir
    ]))
Ejemplo n.º 10
0
def generateBsubCmd(jobname,
                    logdir,
                    cmd,
                    queue="normal",
                    mem=1,
                    depends=None,
                    isArray=False,
                    threads=None):
    '''
    Transforms the cmd into a bsub command with the supplied parameters.
    '''
    bcmd = merge_items(["bsub", "-q", queue, "-J \"" + jobname + "\""])

    if isArray:
        bcmd = merge_items([
            bcmd, "-o",
            path.joinpath(logdir, jobname) + ".%J.%I.out", "-e",
            path.joinpath(logdir, jobname + ".%J.%I.err")
        ])
    else:
        bcmd = merge_items([
            bcmd, "-o",
            path.joinpath(logdir, jobname) + ".%J.out", "-e",
            path.joinpath(logdir, jobname + ".%J.err")
        ])

    mem = str(mem) + "000"
    bcmd = merge_items([
        bcmd, "-M", mem, "-R",
        "'span[hosts=1] select[mem>" + mem + "] rusage[mem=" + mem + "]'"
    ])

    if depends is not None:
        depends_str = map(lambda x: "done(" + x + ")", depends)
        depends_str = "&&".join(depends_str)
        bcmd = merge_items([bcmd, "-w\"" + depends_str + "\""])

    if threads is not None:
        bcmd = merge_items([bcmd, "-n", str(threads)])

    bcmd = merge_items([bcmd, "'" + cmd + "'"])

    return (bcmd)
Ejemplo n.º 11
0
def createPlotHaplotypesCmd(bb_conf):
	cmd = [BBSCRIPT, bb_conf.getStandardOptions_cgpBB(), bb_conf.getThreadsOption_cgpBB(),
		"-p", "plothaplotypes"]
	return(merge_items(cmd))
Ejemplo n.º 12
0
def createCleanupPostBafCmd(bb_conf):
	cmd = [BBSCRIPT, bb_conf.getStandardOptions_cgpBB(), bb_conf.getThreadsOption_cgpBB(),
		"-p", "cleanuppostbaf"]
	return(merge_items(cmd))
Ejemplo n.º 13
0
def createCombineImputeCmd(bb_conf):
	cmd = [BBSCRIPT, bb_conf.getStandardOptions_cgpBB(), bb_conf.getThreadsOption_cgpBB(),
		"-p", "combineimpute"]
	return(merge_items(cmd))	
Ejemplo n.º 14
0
def createImputeFromBafCmd(bb_conf):
	cmd = [BBSCRIPT, bb_conf.getStandardOptions_cgpBB(), bb_conf.getThreadsOption_cgpBB(),
		"-p", "imputefromaf"]
	return(merge_items(cmd))	
Ejemplo n.º 15
0
def createBafLogCmd(bb_conf):
	cmd = [BBSCRIPT, bb_conf.getStandardOptions_cgpBB(), bb_conf.getThreadsOption_cgpBB(),
		"-p", "baflog"]
	return(merge_items(cmd))
Ejemplo n.º 16
0
def createAlleleCountCmd(bb_conf):
	cmd = [BBSCRIPT, bb_conf.getStandardOptions_cgpBB(), bb_conf.getThreadsOption_cgpBB(),
		"-p", "allelecount"]
	return(merge_items(cmd))
Ejemplo n.º 17
0
#!/usr/bin/env python

import argparse, sys, os, stat
from path import path
from util import merge_items

LIBPATH = "/nfs/users/nfs_s/sd11/repo/dirichlet/dp_combined/"
SCRIPT = merge_items(["Rscript", LIBPATH + "/RunDP_pipeline.R"])


def _write_script(filename, lines):
    outf = open(filename, "w")
    for line in lines:
        outf.write(line + "\n")
    outf.close()
    # Make executable
    st = os.stat(filename)
    os.chmod(filename, st.st_mode | stat.S_IEXEC)


def _get_r_wrapper_lines(phase):
    assert phase == "cons" or phase == "trees", "The phase parameter can only be trees or cons"

    # Create the run command, which differs per phase
    if phase == "cons":
        run_cmd = merge_items([
            "${CMD} ${DATASET} ${ITERS} ${BURNIN} ${DATPATH} ${PURITYFILE} \"",
            phase, "\" ${PARALLEL} ${THREADS} ${BINSIZE} \"1\" ${NBLOCKS}"
        ],
                              sep="")
    else:
Ejemplo n.º 18
0
def createSubclonesCmd(bb_conf):
	cmd = [BBSCRIPT, bb_conf.getStandardOptions_cgpBB(),
		"-p", "subclones"]
	return(merge_items(cmd))
Ejemplo n.º 19
0
def createCombineBafsCmd(bb_conf):
	cmd = [BBSCRIPT, bb_conf.getStandardOptions_cgpBB(),
		"-p", "combinebafs"]
	return(merge_items(cmd))
Ejemplo n.º 20
0
def generateDPrunScript(run_dir, dp_in_dir, dp_master_file, projectname):
    '''
    This function will create a series of shell scripts that make life easier when running dirichlet clustering pipelines.
    
    within the directory that is supplied by the run_dir parameter this will be created:
        - submit.block.sh : for the block parallel tree based method, when run this creates two LSF jobs for the trees and cons phase respectively
        - submit.nd.sh : for running the nD clustering, when run this creates a single LSF job
        - resubmit.block.sh : for resubmitting the cons step of the tree based method, when run this creates a single LSF job for the cons phase
        These would ideally never be called from the command line and only through the other scripts:
        - R wrapper script RunBlockTreeDP_trees.sh 
        - R wrapper script RunBlockTreeDP_cons.sh
    '''
    ''' Write block parallel run scripts '''
    # Write R wrappers first
    #lines = _get_r_wrapper_lines("trees")
    #_write_script(path.joinpath(run_dir, "RunBlockTreeDP_trees.sh"), lines)
    #lines = _get_r_wrapper_lines("cons")
    #_write_script(path.joinpath(run_dir, "RunBlockTreeDP_cons.sh"), lines)

    # Write wrapper around R wrappers
    '''
    Example
        SAMPLE=$1
        QUEUE="basement"
        JOBNAME="tree_pros"
        ACCEPTEDHOSTS="-m vr-2-3-10 vr-2-3-02 vr-2-3-05 vr-2-3-08 vr-2-3-15 vr-2-3-13"
        LIBPATH="~/repo/dirichlet/dp_combined/"
        
        PARAMS="${LIBPATH} ${SAMPLE} 200 30 /lustre/scratch110/sanger/sd11/dirichlet/prostate_mets/Data/ /lustre/scratch110/sanger/sd11/dirichlet/prostate_mets/prostate_mets.txt tree_dp true 5 0.05"
        NBLOCKS=10
        
        MEMTREE=75000
        MEMCONS=75000
        
        bsub -M ${MEMTREE} -R "select[mem>${MEMTREE}] rusage[mem=${MEMTREE}] span[hosts=1]" -n 5 -J "${JOBNAME}_t[1-${NBLOCKS}]" -q "${QUEUE}" -o $PWD/logs/${JOBNAME}_t.%J.%I.out -e $PWD/logs/${JOBNAME}_t.%J.%I.err "${LIBPATH}/RunBlockTreeDP_trees.sh ${PARAMS}"
        bsub -w"ended(${JOBNAME}_t[1-${NBLOCKS}])" -M ${MEMCONS} -R "select[mem>${MEMCONS}] rusage[mem=${MEMCONS}]" -J "${JOBNAME}_c" -q "${QUEUE}" -o $PWD/logs/${JOBNAME}_c.%J.out -e $PWD/logs/${JOBNAME}_c.%J.err "${LIBPATH}/RunBlockTreeDP_cons.sh ${PARAMS} ${NBLOCKS}"
    '''
    lines = [
        "SAMPLE=$1", "QUEUE=\"basement\"",
        merge_items(["JOBNAME=\"tree_", projectname, "\""], sep=""),
        merge_items(["LIBPATH=\"", LIBPATH, "\""], sep=""),
        merge_items([
            "PARAMS=\"${LIBPATH} ${SAMPLE} 200 30", dp_in_dir, dp_master_file,
            "tree_dp true 5 0.05\""
        ]), "NBLOCKS=\"10\"", "MEMTREE=\"15000\"", "MEMCONS=\"15000\"",
        merge_items([
            "bsub -M ${MEMTREE} -R \"select[mem>${MEMTREE}] rusage[mem=${MEMTREE}] span[hosts=1]\" -n 5 -J \"${JOBNAME}_t[1-${NBLOCKS}]\" -q \"${QUEUE}\" -o $PWD/logs/${JOBNAME}_t.%J.%I.out -e $PWD/logs/${JOBNAME}_t.%J.%I.err \"${LIBPATH}/RunBlockTreeDP_trees.sh ${PARAMS}\""
        ]),
        merge_items([
            "bsub -w\"ended(${JOBNAME}_t[1-${NBLOCKS}])\" -M ${MEMCONS} -R \"select[mem>${MEMCONS}] rusage[mem=${MEMCONS}]\" -J \"${JOBNAME}_c\" -q \"${QUEUE}\" -o $PWD/logs/${JOBNAME}_c.%J.out -e $PWD/logs/${JOBNAME}_c.%J.err \"${LIBPATH}/RunBlockTreeDP_cons.sh ${PARAMS} ${NBLOCKS}\""
        ])
    ]
    _write_script(path.joinpath(run_dir, "submit.block.sh"), lines)
    ''' Write resubmit block parallel run script 
    
    Example:
        resubmit.prostate_mets.block.sh
        
        SAMPLE=$1
        QUEUE="long"
        JOBNAME="tree_pros"
        ACCEPTEDHOSTS="-m vr-2-3-10 vr-2-3-02 vr-2-3-05 vr-2-3-08 vr-2-3-15 vr-2-3-13"
        LIBPATH="~/repo/dirichlet/dp_combined/"
        
        PARAMS="${LIBPATH} ${SAMPLE} 200 30 /lustre/scratch110/sanger/sd11/dirichlet/prostate_mets/Data/ /lustre/scratch110/sanger/sd11/dirichlet/prostate_mets/prostate_mets.txt tree_dp true 5 0.05"
        NBLOCKS=10
        
        MEMCONS=75000
        
        bsub -M ${MEMCONS} -R "select[mem>${MEMCONS}] rusage[mem=${MEMCONS}]" -J "${JOBNAME}_c" -q "${QUEUE}" -o $PWD/logs/${JOBNAME}_c.%J.out -e $PWD/logs/${JOBNAME}_c.%J.err "${LIBPATH}/RunBlockTreeDP_cons.sh ${PARAMS} ${NBLOCKS}"
    '''
    lines = [
        "SAMPLE=$1", "QUEUE=\"basement\"",
        merge_items(["JOBNAME=\"tree_", projectname, "\""], sep=""),
        merge_items(["LIBPATH=\"", LIBPATH, "\""], sep=""),
        merge_items([
            "PARAMS=\"${LIBPATH} ${SAMPLE} 200 30", dp_in_dir, dp_master_file,
            "tree_dp true 5 0.05\""
        ]), "NBLOCKS=\"10\"", "MEMCONS=\"15000\"",
        merge_items([
            "bsub -w\"ended(${JOBNAME}_t[1-${NBLOCKS}])\" -M ${MEMCONS} -R \"select[mem>${MEMCONS}] rusage[mem=${MEMCONS}]\" -J \"${JOBNAME}_c\" -q \"${QUEUE}\" -o $PWD/logs/${JOBNAME}_c.%J.out -e $PWD/logs/${JOBNAME}_c.%J.err \"${LIBPATH}/RunBlockTreeDP_cons.sh ${PARAMS} ${NBLOCKS}\""
        ])
    ]
    _write_script(path.joinpath(run_dir, "resubmit.block.sh"), lines)
    ''' Write the nD run script
    
    Example:
        submit.prostate_mets.nd.sh
    
        QUEUE="normal"
        JOBNAME="nd_pros"
        # -m "vr-2-3-10 vr-2-3-02 vr-2-3-05 vr-2-3-08 vr-2-3-15 vr-2-3-13"
        CMD="Rscript ~/repo/dirichlet/dp_combined/RunDP_pipeline.R"
        PARAMS="1 1250 250 /lustre/scratch110/sanger/sd11/dirichlet/prostate_mets/Data/ /lustre/scratch110/sanger/sd11/dirichlet/prostate_mets/prostate_mets.txt nd_dp false 1 NA 1 1"
        MEMORY="17000"
        bsub -J ${JOBNAME} -q ${QUEUE} -M ${MEMORY} -R 'span[hosts=1] select[mem>'${MEMORY}'] rusage[mem='${MEMORY}']' -o $PWD/logs/${JOBNAME}.%J.out -e $PWD/logs/${JOBNAME}.%J.err "${CMD} ${PARAMS}"
    
    '''
    lines = [
        "SAMPLE=$1", "QUEUE=\"normal\"",
        merge_items(["JOBNAME=\"nd_", projectname, "\""], sep=""),
        merge_items(["CMD=\"", SCRIPT, "\""], sep=""),
        merge_items([
            "PARAMS=\"${SAMPLE} 1250 250", dp_in_dir, dp_master_file,
            "nd_dp false 1 NA 1 1\""
        ]), "MEMORY=\"15000\"",
        "bsub -J ${JOBNAME} -q ${QUEUE} -M ${MEMORY} -R 'span[hosts=1] select[mem>'${MEMORY}'] rusage[mem='${MEMORY}']' -o $PWD/logs/${JOBNAME}.%J.out -e $PWD/logs/${JOBNAME}.%J.err \"${CMD} ${PARAMS}\""
    ]
    _write_script(path.joinpath(run_dir, "submit.nd.sh"), lines)
Ejemplo n.º 21
0
def createGenerateAFLociCmd(samplename, outfile_postfix, vcf_file, run_dir):
    return (merge_items([
        DPP_SCRIPT, "-c generateAFLoci", "-s", samplename, "-o",
        samplename + outfile_postfix, "-v", vcf_file, "-f", CHROMS_FAI, "-i",
        IGNORE_FILE, "-r", run_dir
    ]))
Ejemplo n.º 22
0
def createDpIn2VcfCmd(vcf_file, dpIn_file, outfile, fai_file, ignore_file):
    return (merge_items([
        DPPVCF_SCRIPT, "-v", vcf_file, "-i", dpIn_file, "-f", fai_file, "--ig",
        ignore_file, "-o", outfile
    ]))
Ejemplo n.º 23
0
def createSegmentPhasedCmd(bb_conf):
	cmd = [BBSCRIPT, bb_conf.getStandardOptions_cgpBB(),
		"-p", "segmentphased"]
	return(merge_items(cmd))
Ejemplo n.º 24
0
def createDumpCountsSangerCmd(samplename, vcf_file, run_dir):
    return (merge_items([
        DPP_SCRIPT, "-c dumpCountsSanger", "-s", samplename, "-v", vcf_file,
        "-r", run_dir, "-o", samplename + "_alleleFrequency.txt"
    ]))
Ejemplo n.º 25
0
def createFinaliseCmd(bb_conf):
	cmd = [BBSCRIPT, bb_conf.getStandardOptions_cgpBB(),
		"-p", "finalise"]
	return(merge_items(cmd))
Ejemplo n.º 26
0
def generateAlleleCountCommand(bam, loci, outfile):
    # Generate: alleleCounter -b bam/PD7422a.bam -l /lustre/scratch110/sanger/sd11/Documents/GenomeFiles/battenberg_1000genomesloci2012_v3/1000genomesloci2012_chr22.txt -o battenberg/PD7422a/^C7422a_alleleFrequencies_chr22.txt
    return (merge_items([ALLELECOUNTER, "-b", bam, "-l", loci, "-o", outfile]))