Example #1
0
 def testJobReadWriteAndDelete(self):
     jobDir = os.path.join(os.getcwd(), "testJobDir")
     os.mkdir(jobDir) #If directory already exists then the test will fail
     command = "by your command"
     memory = 2^32
     cpu = 1
     tryCount = 100
     
     for i in xrange(10):
         startTime = time.time()
         for j in xrange(100):
             j = Job(command, memory, cpu, tryCount, jobDir)
             self.assertEquals(j.remainingRetryCount, tryCount)
             self.assertEquals(j.jobDir, jobDir)
             self.assertEquals(j.children, [])
             self.assertEquals(j.followOnCommands, [ (command, memory, cpu, 0)])
             self.assertEquals(j.messages, [])
             j.write()
             j = Job.read(j.getJobFileName())
             self.assertEquals(j.remainingRetryCount, tryCount)
             self.assertEquals(j.jobDir, jobDir)
             self.assertEquals(j.children, [])
             self.assertEquals(j.followOnCommands, [ (command, memory, cpu, 0)])
             self.assertEquals(j.messages, [])
             self.assertTrue(os.path.exists(j.getJobFileName()))
             j.delete()
             self.assertTrue(not os.path.exists(j.getJobFileName()))
         print "It took %f seconds to load/unload jobs" % (time.time() - startTime) #We've just used it for benchmarking, so far 
         #Would be good to extend this trivial test
         
     system("rm -rf %s" % jobDir)
Example #2
0
 def testJobUpdate(self):
     jobDir = os.path.join(os.getcwd(), "testJobDir")
     os.mkdir(jobDir) #If directory already exists then the test will fail
     command = "by your command"
     memory = 2^32
     cpu = 1
     tryCount = 100
     
     for i in xrange(40):
         startTime = time.time()
         j = Job(command, memory, cpu, tryCount, jobDir)
         childNumber = random.choice(range(20))
         for k in xrange(childNumber):
             j.children.append((command, memory, cpu))
         self.assertEquals(len(j.children), childNumber)
         j.update(tryCount=tryCount, depth=0)
         j = Job.read(j.getJobFileName())
         self.assertEquals(len(j.children) + len(j.followOnCommands), childNumber + 1)
         for childJobFile, memory, cpu in j.children:
             cJ = Job.read(childJobFile)
             self.assertEquals(cJ.remainingRetryCount, tryCount)
             #self.assertEquals(cJ.jobDir, os.path.split(cJ)[0])
             self.assertEquals(cJ.children, [])
             self.assertEquals(cJ.followOnCommands, [ (command, memory, cpu, 0)])
             self.assertEquals(cJ.messages, [])
             self.assertTrue(os.path.exists(cJ.getJobFileName()))
             cJ.delete()
             self.assertTrue(not os.path.exists(cJ.getJobFileName()))
         self.assertEquals(os.listdir(jobDir), [ "job" ])
         j.delete()
         print "It took %f seconds to update jobs" % (time.time() - startTime) #We've just used it for benchmarking, so far 
         
     system("rm -rf %s" % jobDir)
Example #3
0
 def testCPecanRealignSplitSequences(self):
     """Runs cPecanRealign, splitting indels longer than 100bp, and check
     that the coverage from the results is the same as the coverage from
     realigning with no arguments.."""
     for seqFile1, seqFile2 in seqFilePairGenerator():
         # Drop the lastz command since it's not needed. But this
         # is still convenient to use the same parameters as all
         # the other tests
         realignCommand, _ = getCommands(seqFile1, seqFile2)
         splitRealignCommand = realignCommand + " --splitIndelsLongerThanThis 100"
         realignOutput = getTempFile()
         splitRealignOutput = getTempFile()
         realignCommand += " > %s" % realignOutput
         splitRealignCommand += " > %s" % splitRealignOutput
         system(realignCommand)
         system(splitRealignCommand)
         # Check coverage on seqFile1
         
         #The following will fail until we refactor.
         
         splitRealignCoverage = popenCatch("cactus_coverage %s %s" % (seqFile1, splitRealignOutput))
         realignCoverage = popenCatch("cactus_coverage %s %s" % (seqFile1, realignOutput))
         self.assertTrue(splitRealignCoverage == realignCoverage)
         # Check coverage on seqFile2
         splitRealignCoverage = popenCatch("cactus_coverage %s %s" % (seqFile2, splitRealignOutput))
         realignCoverage = popenCatch("cactus_coverage %s %s" % (seqFile2, realignOutput))
         self.assertTrue(splitRealignCoverage == realignCoverage)
         os.remove(realignOutput)
         os.remove(splitRealignOutput)
Example #4
0
    def run(self):
        self.logToMaster("Getting recomb. events for clone %s ..." % self.clone)
        max_vdel = len(self.vseq) - 3
        min_vdel = find_min_vdel(self.vseq, self.aaseq)
        max_jdel = len(self.jseq) - 3
        min_jdel = find_min_jdel(self.jseq, self.aaseq)
        self.logToMaster("Vdel: <%d-%d>" % (min_vdel, max_vdel))
        self.logToMaster("Jdel: <%d-%d>" % (min_jdel, max_jdel))

        for d, dseq in self.d2seq.iteritems():
            devents = find_devents(dseq, self.aaseq)
            self.logToMaster("%d number of devents" % (len(devents)))
            # DEBUG
            #numempty = 0
            #for devent in devents:
            #    if devent.cdr3aa_dstart == -1:
            #        numempty += 1
            #self.logToMaster("\t%d empty D, %d non_empty_D\n" % (numempty, len(devents) - numempty))
            # END DEBUG
            for i, devent in enumerate(devents):
                outdir = os.path.join(self.outdir, d, str(i))  #outdir/clone/d/i
                system("mkdir -p %s" % outdir)
                
                if devent.cdr3aa_dstart == -1:
                    dempty_file = os.path.join(outdir, "d_empty")
                    self.addChildTarget(Get_Vjins(self.clone, self.vseq,
                                min_vdel, max_vdel, self.jseq, min_jdel,
                                max_jdel, d, devent, self.aaseq, dempty_file))
                else:
                    self.addChildTarget(Get_Vd_Dj_Ins(self.clone, self.vseq,
                                min_vdel, max_vdel, self.jseq, min_jdel,
                                max_jdel, d, dseq, devent, self.aaseq, outdir))
        self.setFollowOnTarget(CloneEventsAgg(self.outdir))
Example #5
0
 def run(self):
     infile = os.path.join(self.indir, "copyNumberStats.xml")
     if os.path.exists( infile ):
         cmd = "cnvPlot.py %s --outdir %s " %(infile, self.outdir)
         if self.filteredSamples != "":
             cmd += " --filteredSamples %s" %(self.filteredSamples)
         system(cmd)
Example #6
0
 def runComparisonOfBlastScriptVsNaiveBlast(self, blastMode):
     """We compare the output with a naive run of the blast program, to check the results are nearly
     equivalent.
     """
     encodeRegions = [ "ENm00" + str(i) for i in xrange(1,2) ] #, 2) ] #Could go to six
     species = ("human", "mouse", "dog")
     #Other species to try "rat", "monodelphis", "macaque", "chimp"
     for encodeRegion in encodeRegions:
         regionPath = os.path.join(self.encodePath, encodeRegion)
         for i in xrange(len(species)):
             species1 = species[i]
             for species2 in species[i+1:]:
                 seqFile1 = os.path.join(regionPath, "%s.%s.fa" % (species1, encodeRegion))
                 seqFile2 = os.path.join(regionPath, "%s.%s.fa" % (species2, encodeRegion))
                 
                 #Run the random
                 runNaiveBlast(seqFile1, seqFile2, self.tempOutputFile)
                 logger.info("Ran the naive blast okay")
                 
                 #Run the blast
                 jobTreeDir = os.path.join(getTempDirectory(self.tempDir), "jobTree")
                 if blastMode == "allAgainstAll":
                     runCactusBlast([ seqFile1, seqFile2 ], self.tempOutputFile2, jobTreeDir,
                                    chunkSize=500000, overlapSize=10000)
                 else:
                     runCactusBlast([ seqFile1 ], self.tempOutputFile2, jobTreeDir,
                                    chunkSize=500000, overlapSize=10000, targetSequenceFiles=[ seqFile2 ])
                 runJobTreeStatusAndFailIfNotComplete(jobTreeDir)
                 system("rm -rf %s " % jobTreeDir)    
                 logger.info("Ran cactus_blast okay")
                 logger.critical("Comparing cactus_blast and naive blast; using mode: %s" % blastMode)
                 compareResultsFile(self.tempOutputFile, self.tempOutputFile2)
Example #7
0
def trimGenome(sequenceFile, coverageFile, outputFile, complement=False,
               flanking=0, minSize=1, windowSize=10, threshold=1, depth=None):
    system("cactus_trimSequences.py %s %s %s %s %s %s %s %s > %s" % (
        nameValue("complement", complement, valueType=bool),
        nameValue("flanking", flanking), nameValue("minSize", minSize),
        nameValue("windowSize", windowSize), nameValue("threshold", threshold),
        nameValue("depth", depth), sequenceFile, coverageFile, outputFile))
Example #8
0
 def run(self):
     newmodfile = "%s-modified" %self.modfile
     #modify small branch lengths (change all the xxxe-1y to xxxe-10)
     system("sed 's/e-1./e-08/g' %s > %s" %(self.modfile, newmodfile))
     #get conservation bigwig and liftover files:
     cmd = "halTreePhyloP.py %s %s %s --bigWig --numProc %d" %(self.halfile, newmodfile, self.outdir, self.numproc)
     system(cmd)
def extractOutput(workDir, outputHalFile, options):
    if options.outputMaf is not None:
        mcProj = MultiCactusProject()
        mcProj.readXML(
            os.path.join(workDir, ProjectWrapper.alignmentDirName,
                         ProjectWrapper.alignmentDirName + "_project.xml"))
        rootName = mcProj.mcTree.getRootName()
        rootPath = os.path.join(workDir, ProjectWrapper.alignmentDirName,
        rootName, rootName + '.maf')
        cmd = 'mv %s %s' % (rootPath, options.outputMaf)
        system(cmd)
    envFile = getEnvFilePath()
    logFile = os.path.join(workDir, 'cactus.log')
    pjPath = os.path.join(workDir, ProjectWrapper.alignmentDirName,
                          '%s_project.xml' % ProjectWrapper.alignmentDirName)
    logHandle = open(logFile, "a")
    logHandle.write("\n\n%s: Beginning HAL Export\n\n" % str(
        datetime.datetime.now()))
    logHandle.close()
    cmd = '. %s && cactus2hal.py %s %s >> %s 2>&1' % (envFile, pjPath,
                                                      outputHalFile, logFile)
    system(cmd)
    logHandle = open(logFile, "a")
    logHandle.write("\n%s: Finished HAL Export \n" % str(
        datetime.datetime.now()))
    logHandle.close()
def runCactus(workDir, jtCommands, jtPath, options):
    envFile = getEnvFilePath()
    pjPath = os.path.join(workDir, ProjectWrapper.alignmentDirName,
                          '%s_project.xml' % ProjectWrapper.alignmentDirName)
    logFile = os.path.join(workDir, 'cactus.log')

    if options.overwrite:
        overwriteFlag = '--overwrite'
        system("rm -f %s" % logFile)
    else:
        overwriteFlag = ''

    logHandle = open(logFile, "a")
    logHandle.write("\n%s: Beginning Progressive Cactus Alignment\n\n" % str(
        datetime.datetime.now()))
    logHandle.close()
    cmd = '. %s && cactus_progressive.py %s %s %s >> %s 2>&1' % (envFile,
                                                                 jtCommands,
                                                                 pjPath,
                                                                 overwriteFlag,
                                                                 logFile)
    jtMonitor = JobStatusMonitor(jtPath, pjPath, logFile,
                                 deadlockCallbackFn=abortFunction(jtPath,
                                                                  options))
    if options.database == "kyoto_tycoon":
        jtMonitor.daemon = True
        jtMonitor.start()
        
    system(cmd)
    logHandle = open(logFile, "a")
    logHandle.write("\n%s: Finished Progressive Cactus Alignment\n" % str(
        datetime.datetime.now()))
    logHandle.close()
Example #11
0
def checkOptions(parser, args, options):
    if not options.indir:
        raise InputOptionError("Input directory is required. None was given.\n")
    if not os.path.exists(options.indir):
        raise InputOptionError("Input directory %s does not exist\n" % options.indir)
    if not os.path.exists(options.outdir):
        system("mkdir -p %s" % options.outdir)
Example #12
0
    def testKeepingCoverageOnIngroups(self):
        """Tests whether the --ingroupCoverageDir option works as
        advertised."""
        encodeRegion = "ENm001"
        ingroups = ["human", "cow"]
        outgroups = ["macaque", "rabbit", "dog"]
        regionPath = os.path.join(self.encodePath, encodeRegion)
        ingroupPaths = map(lambda x: os.path.join(regionPath, x + "." + encodeRegion + ".fa"), ingroups)
        outgroupPaths = map(lambda x: os.path.join(regionPath, x + "." + encodeRegion + ".fa"), outgroups)
        # Run blast in "ingroup vs outgroups" mode, requesting to keep
        # the bed files that show outgroup coverage on the ingroup.
        toilDir = os.path.join(self.tempDir, "tmp_toil")
        outgroupFragmentPaths = [getTempFile(rootDir=self.tempDir) for outgroup in outgroups]
        ingroupCoveragePaths = [getTempFile(rootDir=self.tempDir) for ingroup in ingroups]
        runCactusBlastIngroupsAndOutgroups(ingroups=ingroupPaths, outgroups=outgroupPaths, alignmentsFile=self.tempOutputFile, outgroupFragmentPaths=outgroupFragmentPaths, ingroupCoveragePaths=ingroupCoveragePaths, toilDir=toilDir)
        for i, ingroupPath in enumerate(ingroupPaths):
            # Get the coverage from the outgroups independently and
            # check that it's the same as the file in
            # ingroupCoverageDir
            otherIngroupPath = ingroupPaths[1] if i == 0 else ingroupPaths[0]
            # To filter out alignments from the other ingroup and
            # self-alignments we need to create a fasta with all the
            # outgroup fragments in it.
            outgroupsCombined = getTempFile(rootDir=self.tempDir)
            for outgroupFragmentPath in outgroupFragmentPaths:
                system("cat %s >> %s" % (outgroupFragmentPath, outgroupsCombined))
            independentCoverageFile = getTempFile(rootDir=self.tempDir)
            calculateCoverage(fromGenome=outgroupsCombined, sequenceFile=ingroupPath, cigarFile=self.tempOutputFile, outputFile=independentCoverageFile)

            # find the coverage file cactus_blast kept (should be
            # named according to the basename of the ingroup path
            # file)
            keptCoverageFile = ingroupCoveragePaths[i]
            self.assertTrue(filecmp.cmp(independentCoverageFile, keptCoverageFile))
Example #13
0
 def testScriptTree_Example2(self):
     """Tests that the global and local temp dirs of a job behave as expected.
     """
     for test in xrange(self.testNo):
         command = "scriptTreeTest_Wrapper2.py --jobTree %s --logLevel=INFO --retryCount=0" % self.jobTreeDir
         system(command)
         runJobTreeStatusAndFailIfNotComplete(self.jobTreeDir)
Example #14
0
 def run(self):
     localTempDir = self.getLocalTempDir()
     i = 0
     localfiles = []
     for f in self.files:
         if not os.path.exists(f): #HACK
             continue
         localname = os.path.join(localTempDir, "%s%d.bam" %(os.path.basename(f).split('.')[0], i))
         system("scp -C %s %s" %(f, localname))
         localfiles.append(localname)
         i += 1
     mergeFile = os.path.join(localTempDir, "merge.bam")
     if len(localfiles) == 1:
         system("mv %s %s" %(localfiles[0], mergeFile))
     else:
         bamStr = " ".join(localfiles)
         logger.info("Merging bams...\n")
         mergeCmd = "samtools merge %s %s" %(mergeFile, bamStr)
         system( mergeCmd )
     
     sortPrefix = os.path.join(localTempDir, "mergeSorted")
     sortCmp = "samtools sort %s %s" %( mergeFile, sortPrefix )
     system( sortCmp )
     
     system( "cp %s.bam %s" %(sortPrefix, self.outdir) )
     #Get Snps info:
     self.setFollowOnTarget( Snp(self.outdir, self.options) )
Example #15
0
    def run(self):
        self.logToMaster("DownSampling\n")
        opts = self.options
        global_dir = self.getGlobalTempDir()
        #sampling_dir = os.path.join(global_dir, "down_sampling")
        sampling_dir = os.path.join(opts.outdir, "down_sampling")
        system("mkdir -p %s" % sampling_dir)

        for sam in os.listdir(self.sampledir):
            samdir = os.path.join(self.sampledir, sam)
            sample = pickle.load(gzip.open(os.path.join(samdir, sam), "rb"))
            out_samdir = os.path.join(sampling_dir, sam) 
            system("mkdir -p %s" % out_samdir)
            if opts.sampling_uniq:  # sampling uniq clones
                self.addChildTarget(libsample.SampleAnalysis0(sample, samdir,
                                              out_samdir, libsample.sampling,
                                              opts.sampling_uniq, 'uniq'))
            elif opts.sampling_top:  # sampling reads, then report top clones
                self.addChildTarget(libsample.SampleAnalysis0(sample, samdir,
                                out_samdir, libsample.sampling, opts.sampling,
                                "top", opts.sampling_top))
            else:  # sampling reads
                self.addChildTarget(libsample.SampleAnalysis0(sample, samdir,
                                out_samdir, libsample.sampling, opts.sampling))
        if opts.normalize:
            self.setFollowOnTarget(Normalize(sampling_dir, opts))
        else:
            self.setFollowOnTarget(Analyses(sampling_dir, opts))
Example #16
0
 def run(self):
     cmd = "halLiftover --outPSL --tab %s %s %s %s %s" % (self.opts.halfile,
             self.opts.query, self.bedfile, self.opts.target, self.liftfile)
     system(cmd)
     #system("cp %s %s_liftoverpsl" % (self.liftfile, self.opts.outfile))
     status = get_liftover_status(self.bedfile, self.liftfile, self.opts.edge)
     print_status(status, self.statusfile)
Example #17
0
 def testScriptTree_Example(self):
     """Uses the jobTreeTest code to test the scriptTree Target wrapper.
     """
     for test in xrange(self.testNo):
         command = "scriptTreeTest_Wrapper.py --jobTree %s --logLevel=INFO --retryCount=10" % self.jobTreeDir
         system(command)
         runJobTreeStatusAndFailIfNotComplete(self.jobTreeDir)
Example #18
0
def getRandomConfigFile():
    tempConfigFile = getTempFile(rootDir="./", suffix=".xml")
    config = ET.parse(os.path.join(cactusRootPath(), "cactus_config.xml")).getroot()
    cafNode = config.find("caf")
    assert len(config.findall("caf")) == 1
    
    annealingRounds = 1 + int(random.random() * 10)
    cafNode.attrib["annealingRounds"] = " ".join([ str(1 + int(random.random() * 10)) for i in xrange(annealingRounds) ])
    deannealingRounds = list(set([ 1 + int(random.random() * 10) for i in xrange(int(random.random() * 10)) ]))
    deannealingRounds.sort()
    cafNode.attrib["deannealingRounds"] = " ".join([ str(i) for i in deannealingRounds ])
    cafNode.attrib["trim"] = " ".join([ str(1 + int(random.random() * 5)) for i in xrange(annealingRounds) ])
    
    cafNode.attrib["alignRepeatsAtLoop"] = str(random.random() * annealingRounds)
    
    cafNode.attrib["minimumTreeCoverage"] = str(random.random())
    cafNode.attrib["blockTrim"] = str(int(random.random() * 5))
    cafNode.attrib["ignoreAllChainsLessThanMinimumTreeCoverage"] = str(random.choice([0, 1]))
    cafNode.attrib["minimumBlockDegree"] = str(random.choice([0, 5]))
    
    checkNode = config.find("check")
    checkNode.attrib["runCheck"] = "1"
    
    checkNode = config.find("normal")
    checkNode.attrib["iterations"] = "2"
    
    #Now print the file..
    fileHandle = open(tempConfigFile, 'w')
    ET.ElementTree(config).write(fileHandle)
    fileHandle.close()
    if getLogLevelString() == "DEBUG":
        system("cat %s" % tempConfigFile)
    return tempConfigFile
 def run(self):
     previousOutputFile = None
     previousOutputFile2 = None
     blanchettePath = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation")
     for i in xrange(self.options.blanchetteRepeats):
         trueAlignmentMFA = os.path.join(os.path.join(blanchettePath, "%.2i.job" % i), "true.mfa")
         trueAlignmentMAF = os.path.join(self.getLocalTempDir(), "temp.maf")
         treeFile = os.path.join(blanchettePath, "tree.newick")
         system("mfaToMaf --mfaFile %s --outputFile %s --treeFile %s" % (trueAlignmentMFA, trueAlignmentMAF, treeFile))
         
         
         trueRenamedMAF = trueAlignmentMAF + ".renamed"
         expPath = os.path.join(self.outputDir, str(i), "experiment.xml")
         applyNamingToMaf(expPath, trueAlignmentMAF, trueRenamedMAF)
         trueAlignmentMAF = trueRenamedMAF
         if self.params.vanilla == False:            
             predictedAlignmentMaf = os.path.join(self.outputDir, str(i), "progressiveCactusAlignment", "Anc0", "Anc0.maf")
         else:
             predictedAlignmentMaf = os.path.join(self.outputDir, str(i), "cactusVanilla.maf")
         
         outputFile = os.path.join(self.getLocalTempDir(), "temp%i" % i)
         system("mafComparator --mafFile1 %s --mafFile2 %s --outputFile %s" % (trueAlignmentMAF, predictedAlignmentMaf, outputFile))
         system("cp %s %s" % (outputFile, os.path.join(self.outputDir, str(i), "mafComparison.xml")))
         if previousOutputFile != None:
             system("mergeMafComparatorResults.py --results1 %s --results2 %s --outputFile %s" % (outputFile, previousOutputFile, outputFile))
         previousOutputFile = outputFile
         
     system("mv %s %s" % (previousOutputFile, os.path.join(self.outputDir, "mafComparison.xml")))   
Example #20
0
    def run(self):
        # filter by size
        starttime = time.time()
        opts = self.opts
        clones = pickle.load(gzip.open(self.samplefile, 'rb'))
        if (opts.mincount > 1 or opts.maxcount > 0 or opts.minfreq > 0 or
            opts.maxfreq > 0):
            clones = filter_by_size(clones, opts.mincount, opts.maxcount,
                                    opts.minfreq, opts.maxfreq)
        msg = ("Filter_by_size for file %s done in %.4f s" %
                                 (self.samplefile, time.time() - starttime))
        logger.info(msg)
        starttime = time.time()

        # filter by status
        pclones = filter_by_status(clones, True)
        npclones = filter_by_status(clones, False)
        
        filename = os.path.basename(self.samplefile)
        if pclones:
            pdir = os.path.join(self.outdir, "productive", self.name)
            system("mkdir -p %s" % pdir)
            pfile = os.path.join(pdir, filename)
            pickle.dump(pclones, gzip.open(pfile, "wb"))
        if npclones:    
            npdir = os.path.join(self.outdir, "non_productive", self.name)
            system("mkdir -p %s" % npdir)
            npfile = os.path.join(npdir, filename)
            pickle.dump(npclones, gzip.open(npfile, "wb"))
        msg = ("Filter_by_status for file %s done in %.4f s" %
                                 (self.samplefile, time.time() - starttime))
        logger.info(msg)
        self.setFollowOnTarget(libcommon.CleanupFile(self.samplefile))
Example #21
0
def runCactusProgressive(inputDir,
                      jobTreeDir, 
                      logLevel=None, retryCount=0, 
                      batchSystem="single_machine", 
                      rescueJobFrequency=None,
                      skipAlignments=False,
                      buildHal=None,
                      buildFasta=None,
                      buildAvgs=False, 
                      jobTreeStats=False,
                      maxThreads=None,
                      maxCpus=None,
                      defaultMemory=None,
                      recursive=None,
                      logFile=None,
                      event=None,
                      extraJobTreeArgumentsString="",
                      profileFile=None):
    command = ("cactus_progressive.py %s" % inputDir) + " " + _fn(jobTreeDir, 
                      logLevel, retryCount, batchSystem, rescueJobFrequency, skipAlignments,
                      buildAvgs, None,
                      buildHal,
                      buildFasta,
                      jobTreeStats, maxThreads, maxCpus, defaultMemory, logFile, extraJobTreeArgumentsString=extraJobTreeArgumentsString) + \
                      (" %s %s" % (nameValue("recursive", recursive, bool),
                                      nameValue("event", event)))
    if profileFile != None:
        command = "python -m cProfile -o %s %s/bin/%s" % (profileFile, cactusRootPath(), command)
    system(command)                   
    logger.info("Ran the cactus progressive okay")
Example #22
0
def runCactusBlast(sequenceFiles, outputFile, jobTreeDir,
                   chunkSize=None, overlapSize=None, 
                   logLevel=None, 
                   blastString=None, 
                   selfBlastString=None,
                   compressFiles=None,
                   lastzMemory=None,
                   targetSequenceFiles=None):
    logLevel = getLogLevelString2(logLevel)
    chunkSize = nameValue("chunkSize", chunkSize, int)
    overlapSize = nameValue("overlapSize", overlapSize, int)
    blastString = nameValue("blastString", blastString, str)
    selfBlastString = nameValue("selfBlastString", selfBlastString, str)
    compressFiles = nameValue("compressFiles", compressFiles, bool)
    lastzMemory = nameValue("lastzMemory", lastzMemory, int)
    if targetSequenceFiles != None: 
        targetSequenceFiles = " ".join(targetSequenceFiles)
    targetSequenceFiles = nameValue("targetSequenceFiles", targetSequenceFiles, quotes=True)
    command = "cactus_blast.py %s  --cigars %s %s %s %s %s %s %s %s --jobTree %s --logLevel %s" % \
            (" ".join(sequenceFiles), outputFile,
             chunkSize, overlapSize, blastString, selfBlastString, compressFiles, 
             lastzMemory, targetSequenceFiles, jobTreeDir, logLevel)
    logger.info("Running command : %s" % command)
    system(command)
    logger.info("Ran the cactus_blast command okay")
def split_fasta(input_fasta, work_dir):
    out_root = os.path.join(work_dir, "out") + '/'
    os.mkdir(out_root)
    system("faSplit byname {input} {out_root}".format(
        input=input_fasta,
        out_root=out_root))
    return glob(os.path.join(work_dir, "out/*"))
Example #24
0
    def run(self):
        #self.logToMaster("Get_Vjins")
        tempdir = "%s_tempdir" % os.path.splitext(self.outfile)[0]
        system("mkdir -p %s" % tempdir)

        model = pickle.load(gzip.open(self.modelfile, 'rb'))
        items = self.clone.split('_')
        v = items[0]
        j = items[2]
        batchsize = 100000

        for vdel in self.vdels:
            v_cdr3_nt = self.v_nt if vdel == 0 else self.v_nt[: -1 * vdel]
            v_hang = len(v_cdr3_nt) % 3
            for jdel in self.jdels:
                j_cdr3_nt = self.j_nt if jdel == 0 else self.j_nt[jdel: ]
                d_nts = self.devent.left_nts + self.devent.right_nts
                vjins_nts = get_vjins_emptyd(self.v_nt, vdel, self.j_nt, jdel,
                                             d_nts, self.cdr3_aa)
                if vjins_nts is None:
                    continue
                
                #self.logToMaster("Empty D: vdel: %d, jdel: %d, vjins: %d\n" % (vdel, jdel, len(vjins_nts)))
                numbatches = len(vjins_nts) / batchsize
                if len(vjins_nts) % batchsize > 0:
                    numbatches += 1
                for index in xrange(numbatches):
                    outfile = os.path.join(tempdir, "%d_%d_%d" % (vdel, jdel, index))
                    endindex = min(len(vjins_nts), (index + 1) * batchsize)
                    batch_vjins_nts = vjins_nts[index * batchsize: endindex]
                    self.addChildTarget(Get_Vjins_Batch(batch_vjins_nts, v,
                          v_hang, v_cdr3_nt, j, j_cdr3_nt, self.d, d_nts,
                          self.cdr3_aa, vdel, jdel, self.devent.d5del,
                          self.devent.d3del, model, outfile))
        self.setFollowOnTarget(Get_Vd_Dj_Ins_Agg3(tempdir, self.outfile))
Example #25
0
 def run(self):
     if os.path.exists(self.outfile):
         system("rm -f" % self.outfile)
     for batch in os.listdir(self.indir):
         batchfile = os.path.join(self.indir, batch)
         clones = pickle.load(gzip.open(batchfile, "rb"))
         write_clones(self.outfile, clones, True)
Example #26
0
    def run(self):
        #self.logToMaster("Get_Vd_Dj_Ins") 
        model = pickle.load(gzip.open(self.modelfile, "rb"))
        ins_vds = []
        for vd in model.ins_vd.keys():
            if vd >= 0:
                ins_vds.append(vd)
        ins_djs = []
        for dj in model.ins_dj.keys():
            if dj >= 0:
                ins_djs.append(dj)

        vdir = os.path.join(self.outdir, "vdels")
        system("mkdir -p %s" % vdir)
        for vdel in self.vdels:
            voutfile = os.path.join(vdir, str(vdel))
            self.addChildTarget(Get_Ins(get_vdins_events, vdel, self.v_nt,
                             self.devent, self.cdr3_aa, voutfile, ins_vds))
        jdir = os.path.join(self.outdir, 'jdels')
        system("mkdir -p %s" % jdir)
        for jdel in self.jdels:
            joutfile = os.path.join(jdir, str(jdel))
            self.addChildTarget(Get_Ins(get_djins_events, jdel, self.j_nt,
                             self.devent, self.cdr3_aa, joutfile, ins_djs))
        
        outfile = os.path.join(self.outdir, "events")
        self.setFollowOnTarget(Get_Vd_Dj_Ins_Agg(self.clone, vdir, jdir,
                self.v_nt, self.j_nt, self.d, self.d_nt, self.devent, outfile,
                self.modelfile))
Example #27
0
    def run(self):
        system("mkdir -p %s" % self.outdir)
        clone2sams = read_clone_file(self.clone_file, True)
        if os.path.isdir(self.model):
            model = rcommon.get_median_model(self.model)
        else:
            model = pickle.load(gzip.open(self.model, "rb"))
        sam2total, group2sams = read_clonesize(self.numclone_file)
        len2llh = read_llh(self.lenllh, intkey=True)
        clone2llh = read_llh(self.clonellh)

        global_dir = self.getGlobalTempDir()
        lencount_dir = os.path.join(global_dir, "sam2len2count")
        system("mkdir -p %s" % lencount_dir)
        for s in os.listdir(self.db_dir):
            samdir = os.path.join(self.db_dir, s)
            lencount_file = os.path.join(lencount_dir, s)
            self.addChildTarget(GetLencount(samdir, lencount_file))
        self.setFollowOnTarget(
            GetLlhs(
                clone2sams,
                self.outdir,
                model,
                lencount_dir,
                group2sams,
                self.ingroup,
                self.outgroup,
                len2llh,
                clone2llh,
            )
        )
Example #28
0
 def test_bedParsing(self):
     """ mafComparator should parse a bed file and use the intervals for testing
     """
     for maf1, maf2, bed, totalTrue, totalTrueInInterval in self.knownValues:
         if not os.path.exists('tempTestFiles'):
             os.mkdir('tempTestFiles')
         f = open(self.maf1path, 'w')
         f.write('%s%s%s' % (self.header, maf1, self.footer))
         f.close()
         f = open(self.maf2path, 'w')
         f.write('%s%s%s' % (self.header, maf2, self.footer))
         f.close()
         f = open(self.bedpath, 'w')
         f.write('%s' % bed)
         f.close()
         cmd = ['mafComparator']
         cmd.append('--mafFile1=%s' % self.maf1path)
         cmd.append('--mafFile2=%s' % self.maf2path)
         cmd.append('--outputFile=%s' % os.path.join('tempTestFiles', 'output.xml'))
         if bed != '':
             cmd.append('--bedFiles=%s' % os.path.join('tempTestFiles', 'bed.bed'))
         cmd.append('--sampleNumber=1000 --logLevel %s' % getLogLevelString())
         system(" ".join(cmd))
         tree = ET.parse(os.path.join('tempTestFiles', 'output.xml'))
         homTests = tree.findall('homologyTests')
         self.assertAlmostEquals(totalTrue, 
                                 float(homTests[0].find('aggregateResults').find('all').attrib['totalTrue']))
         if totalTrueInInterval is None:
             self.assertEqual(None, homTests[0].find('aggregateResults').find('A'))
         else:
             self.assertAlmostEquals(totalTrueInInterval, 
                                     float(homTests[0].find('aggregateResults').find('A').attrib['totalTrue']))
         shutil.rmtree(os.path.dirname(self.maf1path))
Example #29
0
 def realignSamFile(self):
     """Chains and then realigns the resulting global alignments.
     """
     tempSamFile = os.path.join(self.getGlobalTempDir(), "temp.sam")
     system("cp %s %s" % (self.outputSamFile, tempSamFile))
     self.addChildTargetFn(realignSamFileTargetFn, args=(tempSamFile, self.outputSamFile, 
                                                         self.readFastqFile, self.referenceFastaFile, self.options))
Example #30
0
    def run(self):
        #self.logToMaster("Get_Vd_Dj_Ins_Agg")
        tempdir = "%s_tempdir" % os.path.splitext(self.outfile)[0]
        system("mkdir -p %s" % tempdir)
        items = self.clone.split('_')
        v = items[0]
        cdr3_aa = items[1]
        j = items[2]
        if self.devent.d3del == 0:
            d_cdr3_nt = self.d_nt[self.devent.d5del: ]
        else:
            d_cdr3_nt = self.d_nt[self.devent.d5del: -1 * self.devent.d3del]

        for vdelname in os.listdir(self.vdir):
            vfile = os.path.join(self.vdir, vdelname)
            vdel = int(vdelname.split("_batch")[0])
            v_cdr3_nt = self.v_nt if vdel == 0 else self.v_nt[: -1 * vdel]
            for jdelname in os.listdir(self.jdir):
                jfile = os.path.join(self.jdir, jdelname)
                jdel = int(jdelname.split("_batch")[0])
                j_cdr3_nt = self.j_nt if jdel == 0 else self.j_nt[jdel: ]

                outfile = os.path.join(tempdir, "%s_%s" % (vdelname, jdelname))
                self.addChildTarget(Get_Vd_Dj_Ins_Agg2(v, vdel, vfile,
                         v_cdr3_nt, j, jdel, jfile, j_cdr3_nt, self.d,
                         self.devent.d5del, self.devent.d3del, d_cdr3_nt,
                         cdr3_aa, self.modelfile, outfile))
        self.setFollowOnTarget(Get_Vd_Dj_Ins_Agg3(tempdir, self.outfile))
Example #31
0
    ##Record time to run
    baseRuntime = runNaiveBlast(seqFile1, seqFile2, tempOutputFile, 
                  lastzOptions="--ambiguous=iupac,100 --ydrop=3000")
    results1 = loadResults(tempOutputFile)
    logger.info("Loaded first results")
    
    for setting in settings:
        #Run the blast
        ##Record time to run
        runtime = runNaiveBlast(seqFile1, seqFile2, tempOutputFile2,
                      lastzOptions=setting)
        
        #Now compare the results
        results2 = loadResults(tempOutputFile2)
        logger.info("Loaded second results")
        
        def fm(f):
            return "%.5f" % float(f)
        
        def fm2(f):
            return str(int(f))
        
        resultsComparator = ResultComparator(results1, results2)
        print((",".join([ species1, species2, "_".join(("_".join(setting.split())).split(",")), fm(distance), fm(resultsComparator.sensitivity),
                         fm(resultsComparator.specificity),
                         fm2(resultsComparator.intersectionSize), fm2(resultsComparator.unionSize),
                         fm2(resultsComparator.trueDifference), fm2(resultsComparator.predictedDifference),
                         fm2(resultsComparator.trueHits), fm2(resultsComparator.predictedHits), fm2(resultsComparator.trueHits -resultsComparator.predictedHits), fm(baseRuntime), fm(runtime) ])))
        
system("rm -rf %s" % tempDir)
Example #32
0
def runHalCons(halPath, outputPath):
    system("halCons %s > outputPath" % halPath)
Example #33
0
def runHalGen(preset, seed, hdf5Chunk, hdf5Compression, outPath):
    system("halRandGen --preset %s --seed %d --hdf5Chunk %d\
    --hdf5Compression %d %s" % (preset, seed, hdf5Chunk, hdf5Compression, outPath))
Example #34
0
 def tearDown(self):
     unittest.TestCase.tearDown(self)
     system("rm -rf %s" % self.tempDir)
     for tempFile in self.tempFiles:
         os.remove(tempFile)
Example #35
0
    def run(self):
        system("mv %s/%s-all.xml %s" %
               (self.outdir, "genemapHomolog", self.extraInfoDir))
        system("rm -f %s/%s-*.xml" % (self.outdir, "genemapHomolog"))

        system("mv %s/%s-all.txt %s" %
               (self.outdir, "genemapHomolog", self.extraInfoDir))
        system("rm -f %s/%s*.txt" % (self.outdir, "genemapHomolog"))

        system("mv %s/%s-*.xml %s" %
               (self.outdir, "genemapChain", self.extraInfoDir))
        system("mv %s/%s %s" % (self.outdir, "gene2chain", self.extraInfoDir))
Example #36
0
def checkHalTree(halfile, outdir, options):
    treefile = os.path.join(outdir, "haltree.nw")
    system("halStats --tree %s > %s" % (halfile, treefile))
    tree = Phylo.read(treefile, "newick")
    options.treeFile = treefile
    options.tree = tree
Example #37
0
 def run(self):
     system("rm %s/*bed" % self.cladedir)
Example #38
0
def dless(target, split_ss_path, gff_path, model):
    """
    Main function for running dless. Strips all headers out of final gff.
    """
    system('dless {} {} | sed "/^#/ d" > {}'.format(split_ss_path, model,
                                                    gff_path))
Example #39
0
def runJobTreeStatusAndFailIfNotComplete(jobTreeDir):
    command = "jobTreeStatus --jobTree %s --failIfNotComplete --verbose" % jobTreeDir
    system(command)
Example #40
0
def runJobTreeStats(jobTree, outputFile):
    system("jobTreeStats --jobTree %s --outputFile %s" % (jobTree, outputFile))
    logger.info("Ran the job-tree stats command apparently okay")
Example #41
0
def getChromSizesFromHal(halfile, genome, outfile):
    system("halStats --chromSizes %s %s > %s" % (genome, halfile, outfile))
Example #42
0
    def run(self):
        #localTempDir = getTempFile(rootDir=self.getGlobalTempDir())
        localTempDir = self.getLocalTempDir()
        config = os.path.join(localTempDir, "cactus_workflow_config.xml")
        system("cp %s %s" %
               (self.config, config))  #Copy the config file to local disk

        #Copy sequences to localTempDir:
        localSeqdir = os.path.join(localTempDir, "data")
        system("mkdir -p %s" % localSeqdir)
        for spc in self.species.split():
            currseqdir = os.path.join(self.seqdir, spc)
            system("cp -r %s %s" % (currseqdir, localSeqdir))

        #Make dir for this region if not already existed
        #system("rm -fR %s" %self.region)
        system("mkdir -p %s" % os.path.join(os.getcwd(), self.region))

        #Write experiment.xml for this region:
        experimentFile = os.path.join(localTempDir, "experiment.xml")
        writeExpCommand = "cactus_writeExperimentXml.py --species \"%s\" --tree \"%s\" --output %s --sequenceDir %s --config %s --databaseString %s"\
                          %(self.species, self.tree, experimentFile, localSeqdir, config, self.dbStr)
        system("%s" % writeExpCommand)
        system("cp %s %s" %
               (experimentFile,
                os.path.join(os.getcwd(), self.region, "experiment.xml")))
        logger.info("Got experiment.xml file for %s with command: %s\n" %
                    (self.region, writeExpCommand))

        #Now ready to runCactus:
        batchSystem = "singleMachine"
        jobTree = os.path.join(localTempDir, "jobTree")
        cactusCommand = "cactus_workflow.py --stats --batchSystem %s --experiment %s --buildReference --setupAndBuildAlignments --logDebug --jobTree %s" \
                        %(batchSystem, experimentFile, jobTree)
        logger.info("Going to run cactus now, the command is %s" %
                    cactusCommand)
        system("%s" % cactusCommand)
        system("cp -r %s %s" %
               (jobTree, os.path.join(os.getcwd(), self.region, "jobTree")))
        logger.info("Done cactusRun for %s\n" % self.region)

        #Run genemapChain:
        self.addChildTarget(
            RunGenemapChain(self.region, self.dbStr, self.options.outdir,
                            self.options.refSpecies, self.genedir))
        self.addChildTarget(
            RunGenemapHomolog(self.region, self.dbStr, self.options.outdir,
                              self.options.refSpecies, self.genedir))
Example #43
0
def runWorkflow_TestScript(testId,
                           sequences,
                           newickTreeString,
                           outputDir=None,
                           batchSystem="single_machine",
                           buildAvgs=False,
                           buildHal=False,
                           buildFasta=False,
                           configFile=None,
                           buildToilStats=False,
                           constraints=None,
                           progressive=False,
                           cactusWorkflowFunction=runCactusWorkflow,
                           logLevel=None):
    """Runs the workflow and various downstream utilities.
    The testId parameter is used to allocate a unique port so that tests
    can run in parallel.
    """
    logger.info("Running cactus workflow test script")
    logger.info("Got the following sequence dirs/files: %s" %
                " ".join(sequences))
    logger.info("Got the following tree %s" % newickTreeString)

    #Setup the output dir
    assert outputDir != None
    logger.info("Using the output dir: %s" % outputDir)

    #Setup the flower disk.
    experiment = getCactusWorkflowExperimentForTest(testId,
                                                    sequences,
                                                    newickTreeString,
                                                    outputDir=outputDir,
                                                    configFile=configFile,
                                                    constraints=constraints,
                                                    progressive=progressive)
    experimentFile = os.path.join(outputDir, "experiment.xml")
    experiment.writeXML(experimentFile)
    logger.info("The experiment file %s\n" % experimentFile)

    #Setup the job tree dir.
    toilDir = os.path.join(outputDir, "toil")
    logger.info("Got a job tree dir for the test: %s" % toilDir)

    #Run the actual workflow
    cactusWorkflowFunction(experimentFile,
                           toilDir,
                           batchSystem=batchSystem,
                           buildAvgs=buildAvgs,
                           buildHal=buildHal,
                           buildFasta=buildFasta,
                           toilStats=buildToilStats,
                           logLevel=logLevel)
    logger.info("Ran the the workflow")
    #Now run various utilities..
    if buildToilStats:
        toilStatsFile = os.path.join(outputDir, "toilStats.xml")
        runToilStats(toilDir, toilStatsFile)

    #Now remove everything we generate
    system("rm -rf %s %s" % (toilDir, experimentFile))

    #Return so calling function can cleanup
    return experiment
Example #44
0
 def testSonLibCTests(self):
     """Run m,ost the sonLib CuTests, fail if any of them fail.
     """
     system("sonLibTests %s" % getLogLevelString())
Example #45
0
 def testMaf(self):
     """Run all the api CuTests, fail if any of them fail.
     """
     system("halMafTests")
Example #46
0
 def run(self):
     f = open(self.outfile, 'w')
     f.write("#Name\tLength\tMap\tIns\tDels\tOO\tInframe\n")
     f.close()
     system("cat %s/* >> %s" % (self.indir, self.outfile))
Example #47
0
 def tearDown(self):
     unittest.TestCase.tearDown(self)
     system("rm -rf %s" % self.tempDir)
Example #48
0
    def run(self):
        regions = getList(self.options.regions)
        genemapChainXmls = []  #list of all genemapChain output Xmls
        genemapHomologXmls = []  #list of all genemapHomology output Xmls
        for r in regions:
            genemapChainXmls.append(
                os.path.join(self.output, "%s-%s.xml" % ("genemapChain", r)))
            genemapHomologXmls.append(
                os.path.join(self.output, "%s-%s.xml" % ("genemapHomolog", r)))

    #Directory of more details information if interested
        extraInfoDir = os.path.join(self.output, "extraInfo")
        system("mkdir -p %s" % extraInfoDir)
        system("chmod ug+xrw %s" % extraInfoDir)

        #Merge homologXmls of all regions:
        allHomologXml = "%s/%s-all.xml" % (self.output, "genemapHomolog")
        mergeXmls(genemapHomologXmls, allHomologXml)

        genemapHomolog = "%s/%s-*.txt" % (self.output, "genemapHomolog")
        allHomolog = "%s/%s-all.txt" % (self.output, "genemapHomolog")
        system("rm -f %s" % allHomolog)
        system("cat %s > %s" % (genemapHomolog, allHomolog))

        #geneToChain = "%s/%s" %(extraInfoDir, "gene2chain")
        geneToChain = "%s/%s" % (self.output, "gene2chain")

        genemapChainCommand = "genemapChain.py -o %s -c \"%s\" -i \"%s\" > %s" %(extraInfoDir, "cat",\
                               " ".join(genemapChainXmls), geneToChain)
        system("%s" % genemapChainCommand)

        chainMergeHomolog = "%s/%s" % (extraInfoDir, "chainMergeHomolog")
        chainMergeHomologTex = "%s/%s" % (self.output, "chainVsDup.tex")
        #chainMergeHomologTex = chainMergeHomolog + ".tex"
        missedGenes = "%s/%s" % (extraInfoDir, "missedGenes")
        genemapMergeCommand = "genemapMerge.py -f c -n %s %s %s %s %s > %s" %(self.options.runName, \
                               allHomolog, geneToChain, chainMergeHomolog, chainMergeHomologTex, missedGenes)
        system("%s" % genemapMergeCommand)

        homologCmp = "%s/%s" % (self.output, "homologCmp")
        homologCmpTex = "%s/%s" % (self.output, "homologCmp.tex")
        homologCmpV = "%s/%s" % (extraInfoDir, "homologCmpV")
        cactusVsMultizCommand = "genemapCactusVsMultiz.py -a %s -d %s %s %s %s > %s" %(extraInfoDir + "/perSpcDiff", \
                        self.options.geneDir + "/all.tx", self.options.multiz, allHomologXml, homologCmp, homologCmpV)
        system("%s" % cactusVsMultizCommand)

        makeLatexTabCommand = "genemapMakeLatexTab.py -s \"%s\" -n %s %s %s" \
                               %(self.species, self.options.runName, homologCmp, homologCmpTex)
        system("%s" % makeLatexTabCommand)

        #Cleanup now...
        self.setFollowOnTarget(Cleanup(self.output, extraInfoDir))
def runEvalMFAToMAF(mfa, maf):
    command = "mfaToMaf -b %s -d %s --logLevel DEBUG" % (mfa, maf)
    system(command)
    logger.info("Converted MFA %s to MAF %s\n" % (mfa, maf))
 def run(self):
     system("rm -rf %s" % self.dir)
     logger.info("Clean up tempDir for next run\n")
def runEvalMAFComparator(mafFile1, mafFile2, outputFile, sampleNumber):
    command = "mafComparator -b %s -c %s -d %s -e %s" % (
        mafFile1, mafFile2, outputFile, sampleNumber)
    system(command)
    logger.info("Compared MAF %s with MAF %s\n" % (mafFile1, mafFile2))
Example #52
0
 def run(self):
     bedfile = os.path.join(self.cladedir, "%s.bed" %self.target)
     system("halLiftover %s %s %s %s %s" %(self.halfile, self.query, self.queryBed, self.target, bedfile))
     #Convert to big bed:
     bigbedfile = os.path.join(self.cladedir, "%s.bb" %self.target)
     system("bedToBigBed %s %s %s" %(bedfile, self.chrsizefile, bigbedfile))
Example #53
0
    def run(self):
        options = self.options
        localHalfile = os.path.join(self.outdir,
                                    os.path.basename(self.halfile))
        if os.path.abspath(localHalfile) != os.path.abspath(self.halfile):
            if os.path.exists(localHalfile):
                system("rm %s" % localHalfile)
            if options.cpHal:
                system("cp %s %s" %
                       (os.path.abspath(self.halfile), localHalfile))
            else:
                system("ln -s %s %s" %
                       (os.path.abspath(self.halfile), localHalfile))

        #Create lod files if useLod is specified
        lodtxtfile, loddir = getLod(options, localHalfile, self.outdir)

        #Get the maximum window size to display SNPs
        if lodtxtfile:
            snpwidth = getLodLowestLevel(lodtxtfile) - 1
            if snpwidth > -1:
                options.snpwidth = snpwidth

        genomes = sortByProperName(self.genomes, self.options.properName)

        #Create documentation files:
        docdir = os.path.join(self.outdir, "documentation")
        system("mkdir -p %s" % docdir)
        writeDocFiles(docdir, self.options)

        #Create genomes.txt file
        filename = os.path.join(self.outdir, "genomes.txt")
        f = open(filename, 'w')
        #for genome in self.genomes:
        for genome in genomes:
            genomedir = os.path.join(self.outdir, genome)
            f.write("genome %s\n" % genome)
            f.write("twoBitPath %s/%s.2bit\n" % (genome, genome))

            #create trackDb for the current genome:
            if lodtxtfile == '':
                self.addChildTarget(
                    WriteTrackDbFile(self.genomes,
                                     "../%s" % os.path.basename(self.halfile),
                                     genomedir, options))
            else:
                self.addChildTarget(
                    WriteTrackDbFile(self.genomes,
                                     "../%s" % os.path.basename(lodtxtfile),
                                     genomedir, options))
            f.write("trackDb %s/trackDb.txt\n" % genome)

            #other info
            f.write("groups groups.txt\n")

            writeDescriptionFile(genome, genomedir)
            f.write("htmlPath %s/description.html\n" % genome)
            f.write("description %s\n" %
                    getProperName(genome, self.options.properName))
            f.write("organism %s\n" %
                    getProperName(genome, self.options.properName))
            f.write("orderKey 4800\n")
            f.write("scientificName %s\n" % genome)

            seq2len = self.genome2seq2len[genome]
            (seq, l) = getLongestSeq(seq2len)
            f.write("defaultPos %s:1-%d\n" % (seq, min(l, 1000)))
            f.write("\n")
        f.close()
Example #54
0
 def tearDown(self):
     for tempFile in self.tempFiles:
         if os.path.exists(tempFile):
             os.remove(tempFile)
     unittest.TestCase.tearDown(self)
     system("rm -rf %s" % self.tempDir)
Example #55
0
    def run(self):
        #GC content & Alignability
        for genome in self.genomes:
            genomedir = os.path.join(self.outdir, genome)
            if self.options.gcContent:
                self.addChildTarget(GetGCpercent(
                    genomedir, genome))  #genomedir/genome.gc.bw
            if self.options.alignability:
                self.addChildTarget(
                    GetAlignability(
                        genomedir, genome,
                        self.halfile))  #genomedir/genome.alignability.bw

        #Compute conservation track:
        if self.options.conservation:
            #if self.options.conservation or self.options.conservationDir:
            conservationDir = os.path.join(self.outdir, "conservation")
            if not self.options.conservationDir:
                system("mkdir -p %s" % conservationDir)
                self.addChildTarget(
                    GetConservationFiles(self.halfile, conservationDir,
                                         self.options))
            else:
                if os.path.abspath(self.options.conservationDir
                                   ) != os.path.abspath(conservationDir):
                    system("ln -s %s %s" % (os.path.abspath(
                        self.options.conservationDir), conservationDir))
                    #system("cp -r %s %s" %(self.options.conservationDir, conservationDir))

        #Make bed tracks:
        preprocessAnnotationInputs(self.options, self.outdir, "bed")
        self.addChildTarget(
            MakeAnnotationTracks(self.options, self.outdir, self.halfile,
                                 self.genome2seq2len, "bed"))

        #Make bed2 tracks:
        preprocessAnnotationInputs(self.options, self.outdir, "bed2")
        self.addChildTarget(
            MakeAnnotationTracks(self.options, self.outdir, self.halfile,
                                 self.genome2seq2len, "bed2"))

        #Make wig tracks:
        preprocessAnnotationInputs(self.options, self.outdir, "wig")
        self.addChildTarget(
            MakeAnnotationTracks(self.options, self.outdir, self.halfile,
                                 self.genome2seq2len, "wig"))

        #Make clade-exclusive tracks:
        if self.options.tree and self.options.cladeExclusive:
            self.addChildTarget(
                GetCladeExclusiveRegions(
                    self.halfile, self.options.tree,
                    os.path.join(self.outdir, "liftoverbeds"),
                    self.options.maxOut, self.options.minIn))
            self.options.bigbeddirs.append(
                os.path.join(self.outdir, "liftoverbeds", "CladeExclusive"))

        #Get LOD if needed, and Write trackDb files
        self.setFollowOnTarget(
            WriteGenomesFile(self.genomes, self.genome2seq2len, self.halfile,
                             self.options, self.outdir))
Example #56
0
 def testCuTest(self):
     system("matchingAndOrderingTests %s" % getLogLevelString())
Example #57
0
 def testMarginStats(self):
     system("%s %s %s %s --readIdentity --alignmentIdentity --mismatchesPerAlignedBase --readCoverage \
     --deletionsPerReadBase --insertionsPerReadBase --printValuePerReadAlignment"                                                                                     % \
     (self.marginStats, self.inputSamFile1, self.readFastqFile1, self.referenceFastaFile1))
Example #58
0
 def tearDown(self):
     unittest.TestCase.tearDown(self)
     # Clean up
     system("rm -rf %s %s %s %s" % (self.outputSamFile, self.outputHmmFile,
                                    self.outputVcfFile, self.jobTree))
Example #59
0
def linkTwoBitSeqFile(genome, twobitdir, outdir):
    twobitfile = os.path.join(outdir, "%s.2bit" %genome)
    intwobitfile = os.path.abspath( os.path.join(twobitdir, "%s.2bit" %genome) )
    if not os.path.exists(twobitfile):
        system("ln -s %s %s" %(intwobitfile, twobitfile))
Example #60
0
    def testProgressiveOutgroupsVsAllOutgroups(self):
        """Tests the difference in outgroup coverage on an ingroup when
        running in "ingroups vs. outgroups" mode and "set against set"
        mode.
        """
        encodeRegion = "ENm001"
        ingroup = "human"
        outgroups = ["macaque", "rabbit", "dog"]
        regionPath = os.path.join(self.encodePath, encodeRegion)
        ingroupPath = os.path.join(regionPath,
                                   ingroup + "." + encodeRegion + ".fa")
        outgroupPaths = map(
            lambda x: os.path.join(regionPath, x + "." + encodeRegion + ".fa"),
            outgroups)
        # Run in "set against set" mode, aligning the entire ingroup
        # vs each outgroup
        runCactusBlast([ingroupPath],
                       alignmentsFile=self.tempOutputFile,
                       toilDir=os.path.join(self.tempDir, "setVsSetToil"),
                       chunkSize=500000,
                       overlapSize=10000,
                       targetSequenceFiles=outgroupPaths)
        # Run in "ingroup vs outgroups" mode, aligning the ingroup vs
        # the outgroups in order, trimming away sequence that's
        # already been aligned.
        runCactusBlastIngroupsAndOutgroups([ingroupPath],
                                           outgroupPaths,
                                           alignmentsFile=self.tempOutputFile2,
                                           toilDir=os.path.join(
                                               self.tempDir, "outgroupToil"))

        # Get the coverage on the ingroup, in bases, from each run.
        coverageSetVsSetUnfiltered = getTempFile(rootDir=self.tempDir)
        calculateCoverage(sequenceFile=ingroupPath,
                          cigarFile=self.tempOutputFile,
                          outputFile=coverageSetVsSetUnfiltered)
        coverageSetVsSet = int(
            popenCatch(
                "cat %s | awk '{ total +=  $3 - $2} END { print total }'" %
                coverageSetVsSetUnfiltered))
        coverageIngroupVsOutgroupsUnfiltered = getTempFile(
            rootDir=self.tempDir)
        calculateCoverage(sequenceFile=ingroupPath,
                          cigarFile=self.tempOutputFile2,
                          outputFile=coverageIngroupVsOutgroupsUnfiltered)
        coverageIngroupVsOutgroups = int(
            popenCatch(
                "cat %s | awk '{ total +=  $3 - $2} END { print total }'" %
                coverageIngroupVsOutgroupsUnfiltered))

        print "total coverage on human (set vs set mode, %d outgroups): %d" % (
            len(outgroups), coverageSetVsSet)
        print "total coverage on human (ingroup vs outgroup mode, %d outgroups): %d" % (
            len(outgroups), coverageIngroupVsOutgroups)

        # Make sure we're getting a reasonable fraction of the
        # alignments when using the trimming strategy.
        self.assertTrue(
            float(coverageIngroupVsOutgroups) / coverageSetVsSet >= 0.95)

        # Get the coverage on the ingroup, in bases, from just the
        # last outgroup. Obviously this should be much higher in set
        # vs set mode than in ingroup vs outgroup mode.
        outgroupAlignments = getTempFile(rootDir=self.tempDir)
        system("grep %s %s > %s" %
               (outgroups[-1], self.tempOutputFile, outgroupAlignments))
        coverageFileSetVsSet = getTempFile(rootDir=self.tempDir)
        calculateCoverage(sequenceFile=ingroupPath,
                          cigarFile=outgroupAlignments,
                          outputFile=coverageFileSetVsSet)

        coverageFromLastOutgroupSetVsSet = int(
            popenCatch(
                "cat %s | awk '{ total +=  $3 - $2} END { print total }'" %
                coverageFileSetVsSet))

        outgroupAlignments = getTempFile(rootDir=self.tempDir)
        system("grep %s %s > %s" %
               (outgroups[-1], self.tempOutputFile2, outgroupAlignments))
        coverageFileInVsOut = getTempFile(rootDir=self.tempDir)
        calculateCoverage(sequenceFile=ingroupPath,
                          cigarFile=outgroupAlignments,
                          outputFile=coverageFileInVsOut)
        coverageFromLastOutgroupInVsOut = int(
            popenCatch(
                "cat %s | awk '{ total +=  $3 - $2} END { print total }'" %
                coverageFileInVsOut))

        print "total coverage on human from last outgroup in set (%s) (set vs set mode): %d" % (
            outgroups[-1], coverageFromLastOutgroupSetVsSet)
        print "total coverage on human from last outgroup in set (%s) (ingroup vs outgroup mode): %d" % (
            outgroups[-1], coverageFromLastOutgroupInVsOut)

        self.assertTrue(
            float(coverageFromLastOutgroupInVsOut) /
            coverageFromLastOutgroupSetVsSet <= 0.10)