Beispiel #1
0
def filterVCF(vcf, SAMPLE_NUM):
    output_path = os.path.join(OUT_DIR, "filtered.vcf.gz_{0}".format(SAMPLE_NUM))
    cmd = [GATK, "VariantFiltration", "-R", REF, "-V", vcf, "-O", output_path]

    stdout = runCMD(cmd)

    return output_path
Beispiel #2
0
def splitNCigarReads(bam, SAMPLE_NUM):
    output_path = os.path.join(OUT_DIR, "splitted.bam_{0}".format(SAMPLE_NUM))
    cmd = [GATK, "SplitNCigarReads", "-R", REF, "-I", bam, "-O", output_path]

    stdout = runCMD(cmd)

    return output_path
Beispiel #3
0
def haploCaller(bam, SAMPLE_NUM):
    output_path = os.path.join(OUT_DIR, "output.g.vcf.gz_{0}".format(SAMPLE_NUM))
    cmd = [GATK, "HaplotypeCaller", "-R", REF, "-I", bam, "-O", output_path, "-ERC", "GVCF", "-L", "chr1", "-L", "chr2"]

    stdout = runCMD(cmd)

    return output_path
Beispiel #4
0
def convertSAMtoFASTQ(usam, SAMPLE_NUM):
    output_path = os.path.join(OUT_DIR, "sample.fastq_{0}".format(SAMPLE_NUM))
    input_arg = "I={0}".format(usam)
    output_arg = "FASTQ={0}".format(output_path)
    cmd = ["java", "-jar", PICARD, "SamToFastq", input_arg, output_arg]

    stdout = runCMD(cmd)

    return output_path
Beispiel #5
0
def bwa_map(fastq, SAMPLE_NUM):
    output_path = os.path.join(OUT_DIR, "mapped.sam_{0}".format(SAMPLE_NUM))
    cmd = [BWA, "mem", REF, fastq]

    f = open(output_path, "w")
    stdout = runCMD(cmd, f)
    f.close()

    return output_path
Beispiel #6
0
def revertSAM(ubam, SAMPLE_NUM):
    output_path = os.path.join(OUT_DIR, "reverted.bam_{0}".format(SAMPLE_NUM))
    input_arg = "I={0}".format(ubam)
    output_arg = "O={0}".format(output_path)
    cmd = ["java", "-jar", PICARD, "RevertSam", input_arg, output_arg]

    stdout = runCMD(cmd)

    return output_path
Beispiel #7
0
def convertSAMtoBAM(sam, SAMPLE_NUM):
    output_path = os.path.join(OUT_DIR, "mapped.bam_{0}".format(SAMPLE_NUM))

    cmd = [SAMTOOLS, "view", "-bhS", sam]

    f = open(output_path, "w")
    stdout = runCMD(cmd, f)
    f.close()

    return output_path
Beispiel #8
0
def analyzeCovariates(recal_data, SAMPLE_NUM):
    output_path = os.path.join(OUT_DIR,
                               "AnalyzeCovariates.pdf_{0}".format(SAMPLE_NUM))
    cmd = [
        GATK, "AnalyzeCovariates", "-bqsr", recal_data, "-plots", output_path
    ]

    stdout = runCMD(cmd)

    return output_path
Beispiel #9
0
def annotateVCF(bam, vcf, SAMPLE_NUM):
    output_path = os.path.join(OUT_DIR, "annotated.vcf_{0}".format(SAMPLE_NUM))

    cmd = [
        GATK, "VariantAnnotator", "-I", bam, "-R", REF, "-V", vcf, "-O",
        output_path, "-A", "Coverage", "--dbsnp", DBSNP
    ]

    stdout = runCMD(cmd)

    return output_path
Beispiel #10
0
def sort(bam, SAMPLE_NUM):
    output_path = os.path.join(OUT_DIR, "sorted.bam_{0}".format(SAMPLE_NUM))
    output_arg = "O={0}".format(output_path)
    input_arg = "I={0}".format(bam)
    sort_order = "SORT_ORDER=queryname"
    cmd = [
        "java", "-jar", PICARD, "SortSam", input_arg, output_arg, sort_order
    ]

    stdout = runCMD(cmd)

    return output_path
Beispiel #11
0
def applyBQSR(bam, recal_data, SAMPLE_NUM):
    output_path = os.path.join(OUT_DIR,
                               "recalibrated.bam_{0}".format(SAMPLE_NUM))
    cmd = [
        GATK, "ApplyBQSR", "-R", REF, "-I", bam,
        "--add-output-sam-program-record", "--use-original-qualities", "-O",
        output_path, "--bqsr-recal-file", recal_data
    ]

    stdout = runCMD(cmd)

    return output_path
Beispiel #12
0
def recalibrateBase(bam, SAMPLE_NUM):
    output_path = os.path.join(OUT_DIR,
                               "recal_data.table_{0}".format(SAMPLE_NUM))
    known_sites1 = DBSNP

    cmd = [
        GATK, "BaseRecalibrator", "-I", bam, "-R", REF, "--known-sites",
        known_sites1, "-O", output_path, "-L", "chr1", "-L", "chr2"
    ]

    stdout = runCMD(cmd)

    return output_path
Beispiel #13
0
def mergeBamAlignment(mapped_bam, unmapped_sam, SAMPLE_NUM):
    output_path = os.path.join(OUT_DIR, "merged.bam_{0}".format(SAMPLE_NUM))
    output_arg = "O={0}".format(output_path)
    input_arg1 = "ALIGNED={0}".format(mapped_bam)
    input_arg2 = "UNMAPPED={0}".format(unmapped_sam)
    ref = "R={0}".format(REF)

    cmd = [
        "java", "-jar", PICARD, "MergeBamAlignment", input_arg1, input_arg2,
        ref, output_arg
    ]

    stdout = runCMD(cmd)

    return output_path
Beispiel #14
0
def markDuplicates(mapped_bam, SAMPLE_NUM):
    output_path1 = os.path.join(OUT_DIR,
                                "marked_duplicates.bam_{0}".format(SAMPLE_NUM))
    output_arg = "O={0}".format(output_path1)
    output_path2 = os.path.join(OUT_DIR, "marked_dup_metrics")
    output_arg2 = "M={0}".format(output_path2)
    input_arg = "I={0}".format(mapped_bam)

    cmd = [
        "java", "-jar", PICARD, "MarkDuplicates", input_arg, output_arg,
        output_arg2
    ]

    stdout = runCMD(cmd)

    return output_path1, output_path2
Beispiel #15
0
 def run(self, buildType):
     tCompCmd = self._formatCompileCMD()
     try:
         tCompileLog = self._getLogFileName()
         with open(tCompileLog, "w+") as tFile:
             compileOut = runCMD(tCompCmd, workDir=self._getRunDir(), pipe=tFile,
                                 isShell=platform.system() is not "Windows")
     except:
         log.error("[Error] Can't compile project: {0}".format(sys.exc_info()[1]))
         return False
     else:
         log.info("[Info] Compile log saved to: {0}".format(tCompileLog))
         if compileOut["ret_code"] != 0:
             log.error("[Error] {0} Compiler log {1}".format("=" * 20, "=" * 20))
             self._printNiceFailLog(compileOut["out"])
             log.error("[Error] {0}".format("-" * 51))
             return False
         else:
             return True
Beispiel #16
0
def addOrReplaceReadGroups(bam, SAMPLE_NUM):
    output_path = os.path.join(OUT_DIR, "output_rg.bam_{0}".format(SAMPLE_NUM))
    output_arg = "O={0}".format(output_path)
    input_arg = "I={0}".format(bam)
    rgid = "RGID=4"
    rglb = "RGLB=lib1"
    rgpl = "RGPL=ILLUMINA"
    rgpu = "RGPU=unitl"
    rgsm = "RGSM=20"

    cmd = [
        "java", "-jar", PICARD, "AddOrReplaceReadGroups", input_arg,
        output_arg, rgid, rglb, rgpl, rgpu, rgsm
    ]

    stdout = runCMD(cmd)

    indexBAM(output_rg)

    return output_path
Beispiel #17
0
 def run(self, buildType):
     assert self._rootPath is not None # check that root path specified
     self._setUpBuildInfo(buildType)
     cmakeCMD = "cmake -H." + self._getBuildDirCMD() + self._getGenCMD() + self._getDefsCMD()
     with open(self._getLogFile(), "w+") as tFile:
         setUpEnv(self._getEnvVars())
         try:
             tProcessOut = runCMD(cmakeCMD, workDir=self._getRunDir(), pipe=tFile,
                                  isShell=platform.system() is not "Windows")
         except:
             log.error("[Error] Can't run cmake: {0} ".format(sys.exc_info()[1]))
             return None
         else:
             log.info("[Info] Cmake log saved to: {0}".format(self._getLogFile()))
             if tProcessOut["ret_code"] != 0:
                 log.error("[Error] {0} Cmake log {1}".format("=" * 18, "=" * 19))
                 self._printNiceFailLog(tProcessOut["out"])
                 log.error("[Error] {0}".format("-" * 51))
                 return None
             else:
                 return self._buildDict
Beispiel #18
0
 def _checkToolVersion(self, binName, matchInfo):
   tCmdArgs = [binName, matchInfo["args"]]
   # Uncoment for easy debug
   #return True
   tProcOut = runCMD(tCmdArgs, isShell=platform.system().lower() == "windows")
   #return True
   if tProcOut["ret_code"] != 0:
     log.error("[Error] Can't get version of: {0}".format(binName))
     return False
   tLines = tProcOut["out"].split("\n")
   if len(tLines) < matchInfo["line"]:
      log.error("[Error] Invalid number of lines in output of: {0}".format(binName))
      log.error("[Error] Expected count of lines: {0}".format(matchInfo["line"]))
      return False
   tWords = tLines[matchInfo["line"]].split(" ")
   if len(tWords) < matchInfo["word"]:
     log.error("[Error] Invalid  count of word ({0} but need {1}): {2}"
               .format(len(tWords), matchInfo["word"], binName))
     return False
   tPhrase = tWords[matchInfo["word"]]
   matchInfo["actual_ver"] = tPhrase
   return self._isValidVersion(tPhrase, matchInfo["match"])
Beispiel #19
0
def indexBAM(bam):
    cmd = [SAMTOOLS, "index", bam]

    stdout = runCMD(cmd)