Python system Examples, sonLib.bioio.system Python Examples

Example #1

0

Show file

File: jobTest.py Project: ArtRand/jobTree

 def testJobReadWriteAndDelete(self):
     jobDir = os.path.join(os.getcwd(), "testJobDir")
     os.mkdir(jobDir) #If directory already exists then the test will fail
     command = "by your command"
     memory = 2^32
     cpu = 1
     tryCount = 100
     
     for i in xrange(10):
         startTime = time.time()
         for j in xrange(100):
             j = Job(command, memory, cpu, tryCount, jobDir)
             self.assertEquals(j.remainingRetryCount, tryCount)
             self.assertEquals(j.jobDir, jobDir)
             self.assertEquals(j.children, [])
             self.assertEquals(j.followOnCommands, [ (command, memory, cpu, 0)])
             self.assertEquals(j.messages, [])
             j.write()
             j = Job.read(j.getJobFileName())
             self.assertEquals(j.remainingRetryCount, tryCount)
             self.assertEquals(j.jobDir, jobDir)
             self.assertEquals(j.children, [])
             self.assertEquals(j.followOnCommands, [ (command, memory, cpu, 0)])
             self.assertEquals(j.messages, [])
             self.assertTrue(os.path.exists(j.getJobFileName()))
             j.delete()
             self.assertTrue(not os.path.exists(j.getJobFileName()))
         print "It took %f seconds to load/unload jobs" % (time.time() - startTime) #We've just used it for benchmarking, so far 
         #Would be good to extend this trivial test
         
     system("rm -rf %s" % jobDir)

Example #2

0

Show file

File: jobTest.py Project: ArtRand/jobTree

 def testJobUpdate(self):
     jobDir = os.path.join(os.getcwd(), "testJobDir")
     os.mkdir(jobDir) #If directory already exists then the test will fail
     command = "by your command"
     memory = 2^32
     cpu = 1
     tryCount = 100
     
     for i in xrange(40):
         startTime = time.time()
         j = Job(command, memory, cpu, tryCount, jobDir)
         childNumber = random.choice(range(20))
         for k in xrange(childNumber):
             j.children.append((command, memory, cpu))
         self.assertEquals(len(j.children), childNumber)
         j.update(tryCount=tryCount, depth=0)
         j = Job.read(j.getJobFileName())
         self.assertEquals(len(j.children) + len(j.followOnCommands), childNumber + 1)
         for childJobFile, memory, cpu in j.children:
             cJ = Job.read(childJobFile)
             self.assertEquals(cJ.remainingRetryCount, tryCount)
             #self.assertEquals(cJ.jobDir, os.path.split(cJ)[0])
             self.assertEquals(cJ.children, [])
             self.assertEquals(cJ.followOnCommands, [ (command, memory, cpu, 0)])
             self.assertEquals(cJ.messages, [])
             self.assertTrue(os.path.exists(cJ.getJobFileName()))
             cJ.delete()
             self.assertTrue(not os.path.exists(cJ.getJobFileName()))
         self.assertEquals(os.listdir(jobDir), [ "job" ])
         j.delete()
         print "It took %f seconds to update jobs" % (time.time() - startTime) #We've just used it for benchmarking, so far 
         
     system("rm -rf %s" % jobDir)

Example #3

0

Show file

File: cPecanRealignTest.py Project: ArtRand/cPecan

 def testCPecanRealignSplitSequences(self):
     """Runs cPecanRealign, splitting indels longer than 100bp, and check
     that the coverage from the results is the same as the coverage from
     realigning with no arguments.."""
     for seqFile1, seqFile2 in seqFilePairGenerator():
         # Drop the lastz command since it's not needed. But this
         # is still convenient to use the same parameters as all
         # the other tests
         realignCommand, _ = getCommands(seqFile1, seqFile2)
         splitRealignCommand = realignCommand + " --splitIndelsLongerThanThis 100"
         realignOutput = getTempFile()
         splitRealignOutput = getTempFile()
         realignCommand += " > %s" % realignOutput
         splitRealignCommand += " > %s" % splitRealignOutput
         system(realignCommand)
         system(splitRealignCommand)
         # Check coverage on seqFile1
         
         #The following will fail until we refactor.
         
         splitRealignCoverage = popenCatch("cactus_coverage %s %s" % (seqFile1, splitRealignOutput))
         realignCoverage = popenCatch("cactus_coverage %s %s" % (seqFile1, realignOutput))
         self.assertTrue(splitRealignCoverage == realignCoverage)
         # Check coverage on seqFile2
         splitRealignCoverage = popenCatch("cactus_coverage %s %s" % (seqFile2, splitRealignOutput))
         realignCoverage = popenCatch("cactus_coverage %s %s" % (seqFile2, realignOutput))
         self.assertTrue(splitRealignCoverage == realignCoverage)
         os.remove(realignOutput)
         os.remove(splitRealignOutput)

Example #4

0

Show file

File: aa_events.py Project: ngannguyen/aimseqtk

    def run(self):
        self.logToMaster("Getting recomb. events for clone %s ..." % self.clone)
        max_vdel = len(self.vseq) - 3
        min_vdel = find_min_vdel(self.vseq, self.aaseq)
        max_jdel = len(self.jseq) - 3
        min_jdel = find_min_jdel(self.jseq, self.aaseq)
        self.logToMaster("Vdel: <%d-%d>" % (min_vdel, max_vdel))
        self.logToMaster("Jdel: <%d-%d>" % (min_jdel, max_jdel))

        for d, dseq in self.d2seq.iteritems():
            devents = find_devents(dseq, self.aaseq)
            self.logToMaster("%d number of devents" % (len(devents)))
            # DEBUG
            #numempty = 0
            #for devent in devents:
            #    if devent.cdr3aa_dstart == -1:
            #        numempty += 1
            #self.logToMaster("\t%d empty D, %d non_empty_D\n" % (numempty, len(devents) - numempty))
            # END DEBUG
            for i, devent in enumerate(devents):
                outdir = os.path.join(self.outdir, d, str(i))  #outdir/clone/d/i
                system("mkdir -p %s" % outdir)
                
                if devent.cdr3aa_dstart == -1:
                    dempty_file = os.path.join(outdir, "d_empty")
                    self.addChildTarget(Get_Vjins(self.clone, self.vseq,
                                min_vdel, max_vdel, self.jseq, min_jdel,
                                max_jdel, d, devent, self.aaseq, dempty_file))
                else:
                    self.addChildTarget(Get_Vd_Dj_Ins(self.clone, self.vseq,
                                min_vdel, max_vdel, self.jseq, min_jdel,
                                max_jdel, d, dseq, devent, self.aaseq, outdir))
        self.setFollowOnTarget(CloneEventsAgg(self.outdir))

Example #5

0

Show file

File: getPlots.py Project: ngannguyen/referenceViz

 def run(self):
     infile = os.path.join(self.indir, "copyNumberStats.xml")
     if os.path.exists( infile ):
         cmd = "cnvPlot.py %s --outdir %s " %(infile, self.outdir)
         if self.filteredSamples != "":
             cmd += " --filteredSamples %s" %(self.filteredSamples)
         system(cmd)

Example #6

0

Show file

 def runComparisonOfBlastScriptVsNaiveBlast(self, blastMode):
     """We compare the output with a naive run of the blast program, to check the results are nearly
     equivalent.
     """
     encodeRegions = [ "ENm00" + str(i) for i in xrange(1,2) ] #, 2) ] #Could go to six
     species = ("human", "mouse", "dog")
     #Other species to try "rat", "monodelphis", "macaque", "chimp"
     for encodeRegion in encodeRegions:
         regionPath = os.path.join(self.encodePath, encodeRegion)
         for i in xrange(len(species)):
             species1 = species[i]
             for species2 in species[i+1:]:
                 seqFile1 = os.path.join(regionPath, "%s.%s.fa" % (species1, encodeRegion))
                 seqFile2 = os.path.join(regionPath, "%s.%s.fa" % (species2, encodeRegion))
                 
                 #Run the random
                 runNaiveBlast(seqFile1, seqFile2, self.tempOutputFile)
                 logger.info("Ran the naive blast okay")
                 
                 #Run the blast
                 jobTreeDir = os.path.join(getTempDirectory(self.tempDir), "jobTree")
                 if blastMode == "allAgainstAll":
                     runCactusBlast([ seqFile1, seqFile2 ], self.tempOutputFile2, jobTreeDir,
                                    chunkSize=500000, overlapSize=10000)
                 else:
                     runCactusBlast([ seqFile1 ], self.tempOutputFile2, jobTreeDir,
                                    chunkSize=500000, overlapSize=10000, targetSequenceFiles=[ seqFile2 ])
                 runJobTreeStatusAndFailIfNotComplete(jobTreeDir)
                 system("rm -rf %s " % jobTreeDir)    
                 logger.info("Ran cactus_blast okay")
                 logger.critical("Comparing cactus_blast and naive blast; using mode: %s" % blastMode)
                 compareResultsFile(self.tempOutputFile, self.tempOutputFile2)

Example #7

0

Show file

def trimGenome(sequenceFile, coverageFile, outputFile, complement=False,
               flanking=0, minSize=1, windowSize=10, threshold=1, depth=None):
    system("cactus_trimSequences.py %s %s %s %s %s %s %s %s > %s" % (
        nameValue("complement", complement, valueType=bool),
        nameValue("flanking", flanking), nameValue("minSize", minSize),
        nameValue("windowSize", windowSize), nameValue("threshold", threshold),
        nameValue("depth", depth), sequenceFile, coverageFile, outputFile))

Example #8

0

Show file

File: conservationTrack.py Project: glennhickey/hal

 def run(self):
     newmodfile = "%s-modified" %self.modfile
     #modify small branch lengths (change all the xxxe-1y to xxxe-10)
     system("sed 's/e-1./e-08/g' %s > %s" %(self.modfile, newmodfile))
     #get conservation bigwig and liftover files:
     cmd = "halTreePhyloP.py %s %s %s --bigWig --numProc %d" %(self.halfile, newmodfile, self.outdir, self.numproc)
     system(cmd)

Example #9

0

Show file

File: progressiveCactus.py Project: glennhickey/progressiveCactus

def extractOutput(workDir, outputHalFile, options):
    if options.outputMaf is not None:
        mcProj = MultiCactusProject()
        mcProj.readXML(
            os.path.join(workDir, ProjectWrapper.alignmentDirName,
                         ProjectWrapper.alignmentDirName + "_project.xml"))
        rootName = mcProj.mcTree.getRootName()
        rootPath = os.path.join(workDir, ProjectWrapper.alignmentDirName,
        rootName, rootName + '.maf')
        cmd = 'mv %s %s' % (rootPath, options.outputMaf)
        system(cmd)
    envFile = getEnvFilePath()
    logFile = os.path.join(workDir, 'cactus.log')
    pjPath = os.path.join(workDir, ProjectWrapper.alignmentDirName,
                          '%s_project.xml' % ProjectWrapper.alignmentDirName)
    logHandle = open(logFile, "a")
    logHandle.write("\n\n%s: Beginning HAL Export\n\n" % str(
        datetime.datetime.now()))
    logHandle.close()
    cmd = '. %s && cactus2hal.py %s %s >> %s 2>&1' % (envFile, pjPath,
                                                      outputHalFile, logFile)
    system(cmd)
    logHandle = open(logFile, "a")
    logHandle.write("\n%s: Finished HAL Export \n" % str(
        datetime.datetime.now()))
    logHandle.close()

Example #10

0

Show file

File: progressiveCactus.py Project: glennhickey/progressiveCactus

def runCactus(workDir, jtCommands, jtPath, options):
    envFile = getEnvFilePath()
    pjPath = os.path.join(workDir, ProjectWrapper.alignmentDirName,
                          '%s_project.xml' % ProjectWrapper.alignmentDirName)
    logFile = os.path.join(workDir, 'cactus.log')

    if options.overwrite:
        overwriteFlag = '--overwrite'
        system("rm -f %s" % logFile)
    else:
        overwriteFlag = ''

    logHandle = open(logFile, "a")
    logHandle.write("\n%s: Beginning Progressive Cactus Alignment\n\n" % str(
        datetime.datetime.now()))
    logHandle.close()
    cmd = '. %s && cactus_progressive.py %s %s %s >> %s 2>&1' % (envFile,
                                                                 jtCommands,
                                                                 pjPath,
                                                                 overwriteFlag,
                                                                 logFile)
    jtMonitor = JobStatusMonitor(jtPath, pjPath, logFile,
                                 deadlockCallbackFn=abortFunction(jtPath,
                                                                  options))
    if options.database == "kyoto_tycoon":
        jtMonitor.daemon = True
        jtMonitor.start()
        
    system(cmd)
    logHandle = open(logFile, "a")
    logHandle.write("\n%s: Finished Progressive Cactus Alignment\n" % str(
        datetime.datetime.now()))
    logHandle.close()

Example #11

0

Show file

File: aausage.py Project: ngannguyen/immunoseq

def checkOptions(parser, args, options):
    if not options.indir:
        raise InputOptionError("Input directory is required. None was given.\n")
    if not os.path.exists(options.indir):
        raise InputOptionError("Input directory %s does not exist\n" % options.indir)
    if not os.path.exists(options.outdir):
        system("mkdir -p %s" % options.outdir)

Example #12

0

Show file

File: blastTest.py Project: benedictpaten/cactus

    def testKeepingCoverageOnIngroups(self):
        """Tests whether the --ingroupCoverageDir option works as
        advertised."""
        encodeRegion = "ENm001"
        ingroups = ["human", "cow"]
        outgroups = ["macaque", "rabbit", "dog"]
        regionPath = os.path.join(self.encodePath, encodeRegion)
        ingroupPaths = map(lambda x: os.path.join(regionPath, x + "." + encodeRegion + ".fa"), ingroups)
        outgroupPaths = map(lambda x: os.path.join(regionPath, x + "." + encodeRegion + ".fa"), outgroups)
        # Run blast in "ingroup vs outgroups" mode, requesting to keep
        # the bed files that show outgroup coverage on the ingroup.
        toilDir = os.path.join(self.tempDir, "tmp_toil")
        outgroupFragmentPaths = [getTempFile(rootDir=self.tempDir) for outgroup in outgroups]
        ingroupCoveragePaths = [getTempFile(rootDir=self.tempDir) for ingroup in ingroups]
        runCactusBlastIngroupsAndOutgroups(ingroups=ingroupPaths, outgroups=outgroupPaths, alignmentsFile=self.tempOutputFile, outgroupFragmentPaths=outgroupFragmentPaths, ingroupCoveragePaths=ingroupCoveragePaths, toilDir=toilDir)
        for i, ingroupPath in enumerate(ingroupPaths):
            # Get the coverage from the outgroups independently and
            # check that it's the same as the file in
            # ingroupCoverageDir
            otherIngroupPath = ingroupPaths[1] if i == 0 else ingroupPaths[0]
            # To filter out alignments from the other ingroup and
            # self-alignments we need to create a fasta with all the
            # outgroup fragments in it.
            outgroupsCombined = getTempFile(rootDir=self.tempDir)
            for outgroupFragmentPath in outgroupFragmentPaths:
                system("cat %s >> %s" % (outgroupFragmentPath, outgroupsCombined))
            independentCoverageFile = getTempFile(rootDir=self.tempDir)
            calculateCoverage(fromGenome=outgroupsCombined, sequenceFile=ingroupPath, cigarFile=self.tempOutputFile, outputFile=independentCoverageFile)

            # find the coverage file cactus_blast kept (should be
            # named according to the basename of the ingroup path
            # file)
            keptCoverageFile = ingroupCoveragePaths[i]
            self.assertTrue(filecmp.cmp(independentCoverageFile, keptCoverageFile))

Example #13

0

Show file

File: scriptTreeTest.py Project: decarlin/jobTree

 def testScriptTree_Example2(self):
     """Tests that the global and local temp dirs of a job behave as expected.
     """
     for test in xrange(self.testNo):
         command = "scriptTreeTest_Wrapper2.py --jobTree %s --logLevel=INFO --retryCount=0" % self.jobTreeDir
         system(command)
         runJobTreeStatusAndFailIfNotComplete(self.jobTreeDir)

Example #14

0

Show file

File: mapReadsToRef.py Project: ngannguyen/referenceViz

 def run(self):
     localTempDir = self.getLocalTempDir()
     i = 0
     localfiles = []
     for f in self.files:
         if not os.path.exists(f): #HACK
             continue
         localname = os.path.join(localTempDir, "%s%d.bam" %(os.path.basename(f).split('.')[0], i))
         system("scp -C %s %s" %(f, localname))
         localfiles.append(localname)
         i += 1
     mergeFile = os.path.join(localTempDir, "merge.bam")
     if len(localfiles) == 1:
         system("mv %s %s" %(localfiles[0], mergeFile))
     else:
         bamStr = " ".join(localfiles)
         logger.info("Merging bams...\n")
         mergeCmd = "samtools merge %s %s" %(mergeFile, bamStr)
         system( mergeCmd )
     
     sortPrefix = os.path.join(localTempDir, "mergeSorted")
     sortCmp = "samtools sort %s %s" %( mergeFile, sortPrefix )
     system( sortCmp )
     
     system( "cp %s.bam %s" %(sortPrefix, self.outdir) )
     #Get Snps info:
     self.setFollowOnTarget( Snp(self.outdir, self.options) )

Example #15

0

Show file

File: aimseq.py Project: ngannguyen/aimseqtk

    def run(self):
        self.logToMaster("DownSampling\n")
        opts = self.options
        global_dir = self.getGlobalTempDir()
        #sampling_dir = os.path.join(global_dir, "down_sampling")
        sampling_dir = os.path.join(opts.outdir, "down_sampling")
        system("mkdir -p %s" % sampling_dir)

        for sam in os.listdir(self.sampledir):
            samdir = os.path.join(self.sampledir, sam)
            sample = pickle.load(gzip.open(os.path.join(samdir, sam), "rb"))
            out_samdir = os.path.join(sampling_dir, sam) 
            system("mkdir -p %s" % out_samdir)
            if opts.sampling_uniq:  # sampling uniq clones
                self.addChildTarget(libsample.SampleAnalysis0(sample, samdir,
                                              out_samdir, libsample.sampling,
                                              opts.sampling_uniq, 'uniq'))
            elif opts.sampling_top:  # sampling reads, then report top clones
                self.addChildTarget(libsample.SampleAnalysis0(sample, samdir,
                                out_samdir, libsample.sampling, opts.sampling,
                                "top", opts.sampling_top))
            else:  # sampling reads
                self.addChildTarget(libsample.SampleAnalysis0(sample, samdir,
                                out_samdir, libsample.sampling, opts.sampling))
        if opts.normalize:
            self.setFollowOnTarget(Normalize(sampling_dir, opts))
        else:
            self.setFollowOnTarget(Analyses(sampling_dir, opts))

Example #16

0

Show file

File: halLiftoverStatus.py Project: glennhickey/hal

 def run(self):
     cmd = "halLiftover --outPSL --tab %s %s %s %s %s" % (self.opts.halfile,
             self.opts.query, self.bedfile, self.opts.target, self.liftfile)
     system(cmd)
     #system("cp %s %s_liftoverpsl" % (self.liftfile, self.opts.outfile))
     status = get_liftover_status(self.bedfile, self.liftfile, self.opts.edge)
     print_status(status, self.statusfile)

Example #17

0

Show file

File: scriptTreeTest.py Project: decarlin/jobTree

 def testScriptTree_Example(self):
     """Uses the jobTreeTest code to test the scriptTree Target wrapper.
     """
     for test in xrange(self.testNo):
         command = "scriptTreeTest_Wrapper.py --jobTree %s --logLevel=INFO --retryCount=10" % self.jobTreeDir
         system(command)
         runJobTreeStatusAndFailIfNotComplete(self.jobTreeDir)

Example #18

0

Show file

def getRandomConfigFile():
    tempConfigFile = getTempFile(rootDir="./", suffix=".xml")
    config = ET.parse(os.path.join(cactusRootPath(), "cactus_config.xml")).getroot()
    cafNode = config.find("caf")
    assert len(config.findall("caf")) == 1
    
    annealingRounds = 1 + int(random.random() * 10)
    cafNode.attrib["annealingRounds"] = " ".join([ str(1 + int(random.random() * 10)) for i in xrange(annealingRounds) ])
    deannealingRounds = list(set([ 1 + int(random.random() * 10) for i in xrange(int(random.random() * 10)) ]))
    deannealingRounds.sort()
    cafNode.attrib["deannealingRounds"] = " ".join([ str(i) for i in deannealingRounds ])
    cafNode.attrib["trim"] = " ".join([ str(1 + int(random.random() * 5)) for i in xrange(annealingRounds) ])
    
    cafNode.attrib["alignRepeatsAtLoop"] = str(random.random() * annealingRounds)
    
    cafNode.attrib["minimumTreeCoverage"] = str(random.random())
    cafNode.attrib["blockTrim"] = str(int(random.random() * 5))
    cafNode.attrib["ignoreAllChainsLessThanMinimumTreeCoverage"] = str(random.choice([0, 1]))
    cafNode.attrib["minimumBlockDegree"] = str(random.choice([0, 5]))
    
    checkNode = config.find("check")
    checkNode.attrib["runCheck"] = "1"
    
    checkNode = config.find("normal")
    checkNode.attrib["iterations"] = "2"
    
    #Now print the file..
    fileHandle = open(tempConfigFile, 'w')
    ET.ElementTree(config).write(fileHandle)
    fileHandle.close()
    if getLogLevelString() == "DEBUG":
        system("cat %s" % tempConfigFile)
    return tempConfigFile

Example #19

0

Show file

File: pipeline.py Project: benedictpaten/progressiveBenchmarks

 def run(self):
     previousOutputFile = None
     previousOutputFile2 = None
     blanchettePath = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation")
     for i in xrange(self.options.blanchetteRepeats):
         trueAlignmentMFA = os.path.join(os.path.join(blanchettePath, "%.2i.job" % i), "true.mfa")
         trueAlignmentMAF = os.path.join(self.getLocalTempDir(), "temp.maf")
         treeFile = os.path.join(blanchettePath, "tree.newick")
         system("mfaToMaf --mfaFile %s --outputFile %s --treeFile %s" % (trueAlignmentMFA, trueAlignmentMAF, treeFile))
         
         
         trueRenamedMAF = trueAlignmentMAF + ".renamed"
         expPath = os.path.join(self.outputDir, str(i), "experiment.xml")
         applyNamingToMaf(expPath, trueAlignmentMAF, trueRenamedMAF)
         trueAlignmentMAF = trueRenamedMAF
         if self.params.vanilla == False:            
             predictedAlignmentMaf = os.path.join(self.outputDir, str(i), "progressiveCactusAlignment", "Anc0", "Anc0.maf")
         else:
             predictedAlignmentMaf = os.path.join(self.outputDir, str(i), "cactusVanilla.maf")
         
         outputFile = os.path.join(self.getLocalTempDir(), "temp%i" % i)
         system("mafComparator --mafFile1 %s --mafFile2 %s --outputFile %s" % (trueAlignmentMAF, predictedAlignmentMaf, outputFile))
         system("cp %s %s" % (outputFile, os.path.join(self.outputDir, str(i), "mafComparison.xml")))
         if previousOutputFile != None:
             system("mergeMafComparatorResults.py --results1 %s --results2 %s --outputFile %s" % (outputFile, previousOutputFile, outputFile))
         previousOutputFile = outputFile
         
     system("mv %s %s" % (previousOutputFile, os.path.join(self.outputDir, "mafComparison.xml")))

Example #20

0

Show file

File: sample.py Project: ngannguyen/aimseqtk

    def run(self):
        # filter by size
        starttime = time.time()
        opts = self.opts
        clones = pickle.load(gzip.open(self.samplefile, 'rb'))
        if (opts.mincount > 1 or opts.maxcount > 0 or opts.minfreq > 0 or
            opts.maxfreq > 0):
            clones = filter_by_size(clones, opts.mincount, opts.maxcount,
                                    opts.minfreq, opts.maxfreq)
        msg = ("Filter_by_size for file %s done in %.4f s" %
                                 (self.samplefile, time.time() - starttime))
        logger.info(msg)
        starttime = time.time()

        # filter by status
        pclones = filter_by_status(clones, True)
        npclones = filter_by_status(clones, False)
        
        filename = os.path.basename(self.samplefile)
        if pclones:
            pdir = os.path.join(self.outdir, "productive", self.name)
            system("mkdir -p %s" % pdir)
            pfile = os.path.join(pdir, filename)
            pickle.dump(pclones, gzip.open(pfile, "wb"))
        if npclones:    
            npdir = os.path.join(self.outdir, "non_productive", self.name)
            system("mkdir -p %s" % npdir)
            npfile = os.path.join(npdir, filename)
            pickle.dump(npclones, gzip.open(npfile, "wb"))
        msg = ("Filter_by_status for file %s done in %.4f s" %
                                 (self.samplefile, time.time() - starttime))
        logger.info(msg)
        self.setFollowOnTarget(libcommon.CleanupFile(self.samplefile))

Example #21

0

Show file

def runCactusProgressive(inputDir,
                      jobTreeDir, 
                      logLevel=None, retryCount=0, 
                      batchSystem="single_machine", 
                      rescueJobFrequency=None,
                      skipAlignments=False,
                      buildHal=None,
                      buildFasta=None,
                      buildAvgs=False, 
                      jobTreeStats=False,
                      maxThreads=None,
                      maxCpus=None,
                      defaultMemory=None,
                      recursive=None,
                      logFile=None,
                      event=None,
                      extraJobTreeArgumentsString="",
                      profileFile=None):
    command = ("cactus_progressive.py %s" % inputDir) + " " + _fn(jobTreeDir, 
                      logLevel, retryCount, batchSystem, rescueJobFrequency, skipAlignments,
                      buildAvgs, None,
                      buildHal,
                      buildFasta,
                      jobTreeStats, maxThreads, maxCpus, defaultMemory, logFile, extraJobTreeArgumentsString=extraJobTreeArgumentsString) + \
                      (" %s %s" % (nameValue("recursive", recursive, bool),
                                      nameValue("event", event)))
    if profileFile != None:
        command = "python -m cProfile -o %s %s/bin/%s" % (profileFile, cactusRootPath(), command)
    system(command)                   
    logger.info("Ran the cactus progressive okay")

Example #22

0

Show file

def runCactusBlast(sequenceFiles, outputFile, jobTreeDir,
                   chunkSize=None, overlapSize=None, 
                   logLevel=None, 
                   blastString=None, 
                   selfBlastString=None,
                   compressFiles=None,
                   lastzMemory=None,
                   targetSequenceFiles=None):
    logLevel = getLogLevelString2(logLevel)
    chunkSize = nameValue("chunkSize", chunkSize, int)
    overlapSize = nameValue("overlapSize", overlapSize, int)
    blastString = nameValue("blastString", blastString, str)
    selfBlastString = nameValue("selfBlastString", selfBlastString, str)
    compressFiles = nameValue("compressFiles", compressFiles, bool)
    lastzMemory = nameValue("lastzMemory", lastzMemory, int)
    if targetSequenceFiles != None: 
        targetSequenceFiles = " ".join(targetSequenceFiles)
    targetSequenceFiles = nameValue("targetSequenceFiles", targetSequenceFiles, quotes=True)
    command = "cactus_blast.py %s  --cigars %s %s %s %s %s %s %s %s --jobTree %s --logLevel %s" % \
            (" ".join(sequenceFiles), outputFile,
             chunkSize, overlapSize, blastString, selfBlastString, compressFiles, 
             lastzMemory, targetSequenceFiles, jobTreeDir, logLevel)
    logger.info("Running command : %s" % command)
    system(command)
    logger.info("Ran the cactus_blast command okay")

Example #23

0

Show file

File: repeatMaskerPipeline.py Project: ifiddes/personal-scripts-code

def split_fasta(input_fasta, work_dir):
    out_root = os.path.join(work_dir, "out") + '/'
    os.mkdir(out_root)
    system("faSplit byname {input} {out_root}".format(
        input=input_fasta,
        out_root=out_root))
    return glob(os.path.join(work_dir, "out/*"))

Example #24

0

Show file

File: aa_events_prob.py Project: ngannguyen/aimseqtk

    def run(self):
        #self.logToMaster("Get_Vjins")
        tempdir = "%s_tempdir" % os.path.splitext(self.outfile)[0]
        system("mkdir -p %s" % tempdir)

        model = pickle.load(gzip.open(self.modelfile, 'rb'))
        items = self.clone.split('_')
        v = items[0]
        j = items[2]
        batchsize = 100000

        for vdel in self.vdels:
            v_cdr3_nt = self.v_nt if vdel == 0 else self.v_nt[: -1 * vdel]
            v_hang = len(v_cdr3_nt) % 3
            for jdel in self.jdels:
                j_cdr3_nt = self.j_nt if jdel == 0 else self.j_nt[jdel: ]
                d_nts = self.devent.left_nts + self.devent.right_nts
                vjins_nts = get_vjins_emptyd(self.v_nt, vdel, self.j_nt, jdel,
                                             d_nts, self.cdr3_aa)
                if vjins_nts is None:
                    continue
                
                #self.logToMaster("Empty D: vdel: %d, jdel: %d, vjins: %d\n" % (vdel, jdel, len(vjins_nts)))
                numbatches = len(vjins_nts) / batchsize
                if len(vjins_nts) % batchsize > 0:
                    numbatches += 1
                for index in xrange(numbatches):
                    outfile = os.path.join(tempdir, "%d_%d_%d" % (vdel, jdel, index))
                    endindex = min(len(vjins_nts), (index + 1) * batchsize)
                    batch_vjins_nts = vjins_nts[index * batchsize: endindex]
                    self.addChildTarget(Get_Vjins_Batch(batch_vjins_nts, v,
                          v_hang, v_cdr3_nt, j, j_cdr3_nt, self.d, d_nts,
                          self.cdr3_aa, vdel, jdel, self.devent.d5del,
                          self.devent.d3del, model, outfile))
        self.setFollowOnTarget(Get_Vd_Dj_Ins_Agg3(tempdir, self.outfile))

Example #25

0

Show file

File: sample.py Project: ngannguyen/aimseqtk

 def run(self):
     if os.path.exists(self.outfile):
         system("rm -f" % self.outfile)
     for batch in os.listdir(self.indir):
         batchfile = os.path.join(self.indir, batch)
         clones = pickle.load(gzip.open(batchfile, "rb"))
         write_clones(self.outfile, clones, True)

Example #26

0

Show file

File: aa_events_prob.py Project: ngannguyen/aimseqtk

    def run(self):
        #self.logToMaster("Get_Vd_Dj_Ins") 
        model = pickle.load(gzip.open(self.modelfile, "rb"))
        ins_vds = []
        for vd in model.ins_vd.keys():
            if vd >= 0:
                ins_vds.append(vd)
        ins_djs = []
        for dj in model.ins_dj.keys():
            if dj >= 0:
                ins_djs.append(dj)

        vdir = os.path.join(self.outdir, "vdels")
        system("mkdir -p %s" % vdir)
        for vdel in self.vdels:
            voutfile = os.path.join(vdir, str(vdel))
            self.addChildTarget(Get_Ins(get_vdins_events, vdel, self.v_nt,
                             self.devent, self.cdr3_aa, voutfile, ins_vds))
        jdir = os.path.join(self.outdir, 'jdels')
        system("mkdir -p %s" % jdir)
        for jdel in self.jdels:
            joutfile = os.path.join(jdir, str(jdel))
            self.addChildTarget(Get_Ins(get_djins_events, jdel, self.j_nt,
                             self.devent, self.cdr3_aa, joutfile, ins_djs))
        
        outfile = os.path.join(self.outdir, "events")
        self.setFollowOnTarget(Get_Vd_Dj_Ins_Agg(self.clone, vdir, jdir,
                self.v_nt, self.j_nt, self.d, self.d_nt, self.devent, outfile,
                self.modelfile))

Example #27

0

Show file

File: clone_prob_cond.py Project: ngannguyen/aimseqtk

    def run(self):
        system("mkdir -p %s" % self.outdir)
        clone2sams = read_clone_file(self.clone_file, True)
        if os.path.isdir(self.model):
            model = rcommon.get_median_model(self.model)
        else:
            model = pickle.load(gzip.open(self.model, "rb"))
        sam2total, group2sams = read_clonesize(self.numclone_file)
        len2llh = read_llh(self.lenllh, intkey=True)
        clone2llh = read_llh(self.clonellh)

        global_dir = self.getGlobalTempDir()
        lencount_dir = os.path.join(global_dir, "sam2len2count")
        system("mkdir -p %s" % lencount_dir)
        for s in os.listdir(self.db_dir):
            samdir = os.path.join(self.db_dir, s)
            lencount_file = os.path.join(lencount_dir, s)
            self.addChildTarget(GetLencount(samdir, lencount_file))
        self.setFollowOnTarget(
            GetLlhs(
                clone2sams,
                self.outdir,
                model,
                lencount_dir,
                group2sams,
                self.ingroup,
                self.outgroup,
                len2llh,
                clone2llh,
            )
        )

Example #28

0

Show file

File: allTests.py Project: sorrywm/mafTools

 def test_bedParsing(self):
     """ mafComparator should parse a bed file and use the intervals for testing
     """
     for maf1, maf2, bed, totalTrue, totalTrueInInterval in self.knownValues:
         if not os.path.exists('tempTestFiles'):
             os.mkdir('tempTestFiles')
         f = open(self.maf1path, 'w')
         f.write('%s%s%s' % (self.header, maf1, self.footer))
         f.close()
         f = open(self.maf2path, 'w')
         f.write('%s%s%s' % (self.header, maf2, self.footer))
         f.close()
         f = open(self.bedpath, 'w')
         f.write('%s' % bed)
         f.close()
         cmd = ['mafComparator']
         cmd.append('--mafFile1=%s' % self.maf1path)
         cmd.append('--mafFile2=%s' % self.maf2path)
         cmd.append('--outputFile=%s' % os.path.join('tempTestFiles', 'output.xml'))
         if bed != '':
             cmd.append('--bedFiles=%s' % os.path.join('tempTestFiles', 'bed.bed'))
         cmd.append('--sampleNumber=1000 --logLevel %s' % getLogLevelString())
         system(" ".join(cmd))
         tree = ET.parse(os.path.join('tempTestFiles', 'output.xml'))
         homTests = tree.findall('homologyTests')
         self.assertAlmostEquals(totalTrue, 
                                 float(homTests[0].find('aggregateResults').find('all').attrib['totalTrue']))
         if totalTrueInInterval is None:
             self.assertEqual(None, homTests[0].find('aggregateResults').find('A'))
         else:
             self.assertAlmostEquals(totalTrueInInterval, 
                                     float(homTests[0].find('aggregateResults').find('A').attrib['totalTrue']))
         shutil.rmtree(os.path.dirname(self.maf1path))

Example #29

0

Show file

File: abstractMapper.py Project: isovic/marginAlign

 def realignSamFile(self):
     """Chains and then realigns the resulting global alignments.
     """
     tempSamFile = os.path.join(self.getGlobalTempDir(), "temp.sam")
     system("cp %s %s" % (self.outputSamFile, tempSamFile))
     self.addChildTargetFn(realignSamFileTargetFn, args=(tempSamFile, self.outputSamFile, 
                                                         self.readFastqFile, self.referenceFastaFile, self.options))

Example #30

0

Show file

File: aa_events_prob.py Project: ngannguyen/aimseqtk

    def run(self):
        #self.logToMaster("Get_Vd_Dj_Ins_Agg")
        tempdir = "%s_tempdir" % os.path.splitext(self.outfile)[0]
        system("mkdir -p %s" % tempdir)
        items = self.clone.split('_')
        v = items[0]
        cdr3_aa = items[1]
        j = items[2]
        if self.devent.d3del == 0:
            d_cdr3_nt = self.d_nt[self.devent.d5del: ]
        else:
            d_cdr3_nt = self.d_nt[self.devent.d5del: -1 * self.devent.d3del]

        for vdelname in os.listdir(self.vdir):
            vfile = os.path.join(self.vdir, vdelname)
            vdel = int(vdelname.split("_batch")[0])
            v_cdr3_nt = self.v_nt if vdel == 0 else self.v_nt[: -1 * vdel]
            for jdelname in os.listdir(self.jdir):
                jfile = os.path.join(self.jdir, jdelname)
                jdel = int(jdelname.split("_batch")[0])
                j_cdr3_nt = self.j_nt if jdel == 0 else self.j_nt[jdel: ]

                outfile = os.path.join(tempdir, "%s_%s" % (vdelname, jdelname))
                self.addChildTarget(Get_Vd_Dj_Ins_Agg2(v, vdel, vfile,
                         v_cdr3_nt, j, jdel, jfile, j_cdr3_nt, self.d,
                         self.devent.d5del, self.devent.d3del, d_cdr3_nt,
                         cdr3_aa, self.modelfile, outfile))
        self.setFollowOnTarget(Get_Vd_Dj_Ins_Agg3(tempdir, self.outfile))

Example #31

0

Show file

File: blastParametersScript.py Project: zoumingr/cactus

    ##Record time to run
    baseRuntime = runNaiveBlast(seqFile1, seqFile2, tempOutputFile, 
                  lastzOptions="--ambiguous=iupac,100 --ydrop=3000")
    results1 = loadResults(tempOutputFile)
    logger.info("Loaded first results")
    
    for setting in settings:
        #Run the blast
        ##Record time to run
        runtime = runNaiveBlast(seqFile1, seqFile2, tempOutputFile2,
                      lastzOptions=setting)
        
        #Now compare the results
        results2 = loadResults(tempOutputFile2)
        logger.info("Loaded second results")
        
        def fm(f):
            return "%.5f" % float(f)
        
        def fm2(f):
            return str(int(f))
        
        resultsComparator = ResultComparator(results1, results2)
        print((",".join([ species1, species2, "_".join(("_".join(setting.split())).split(",")), fm(distance), fm(resultsComparator.sensitivity),
                         fm(resultsComparator.specificity),
                         fm2(resultsComparator.intersectionSize), fm2(resultsComparator.unionSize),
                         fm2(resultsComparator.trueDifference), fm2(resultsComparator.predictedDifference),
                         fm2(resultsComparator.trueHits), fm2(resultsComparator.predictedHits), fm2(resultsComparator.trueHits -resultsComparator.predictedHits), fm(baseRuntime), fm(runtime) ])))
        
system("rm -rf %s" % tempDir)

Example #32

0

Show file

File: benchMark.py Project: 5l1v3r1/hal-1

def runHalCons(halPath, outputPath):
    system("halCons %s > outputPath" % halPath)

Example #33

0

Show file

File: benchMark.py Project: 5l1v3r1/hal-1

def runHalGen(preset, seed, hdf5Chunk, hdf5Compression, outPath):
    system("halRandGen --preset %s --seed %d --hdf5Chunk %d\
    --hdf5Compression %d %s" % (preset, seed, hdf5Chunk, hdf5Compression, outPath))

Example #34

0

Show file

 def tearDown(self):
     unittest.TestCase.tearDown(self)
     system("rm -rf %s" % self.tempDir)
     for tempFile in self.tempFiles:
         os.remove(tempFile)

Example #35

0

Show file

    def run(self):
        system("mv %s/%s-all.xml %s" %
               (self.outdir, "genemapHomolog", self.extraInfoDir))
        system("rm -f %s/%s-*.xml" % (self.outdir, "genemapHomolog"))

        system("mv %s/%s-all.txt %s" %
               (self.outdir, "genemapHomolog", self.extraInfoDir))
        system("rm -f %s/%s*.txt" % (self.outdir, "genemapHomolog"))

        system("mv %s/%s-*.xml %s" %
               (self.outdir, "genemapChain", self.extraInfoDir))
        system("mv %s/%s %s" % (self.outdir, "gene2chain", self.extraInfoDir))

Example #36

0

Show file

File: treeCommon.py Project: robsyme/hal

def checkHalTree(halfile, outdir, options):
    treefile = os.path.join(outdir, "haltree.nw")
    system("halStats --tree %s > %s" % (halfile, treefile))
    tree = Phylo.read(treefile, "newick")
    options.treeFile = treefile
    options.tree = tree

Example #37

0

Show file

File: groupExclusiveRegions.py Project: wcipriet/hal

 def run(self):
     system("rm %s/*bed" % self.cladedir)

Example #38

0

Show file

def dless(target, split_ss_path, gff_path, model):
    """
    Main function for running dless. Strips all headers out of final gff.
    """
    system('dless {} {} | sed "/^#/ d" > {}'.format(split_ss_path, model,
                                                    gff_path))

Example #39

0

Show file

File: common.py Project: tmfarrell/ont_dap

def runJobTreeStatusAndFailIfNotComplete(jobTreeDir):
    command = "jobTreeStatus --jobTree %s --failIfNotComplete --verbose" % jobTreeDir
    system(command)

Example #40

0

Show file

File: common.py Project: tmfarrell/ont_dap

def runJobTreeStats(jobTree, outputFile):
    system("jobTreeStats --jobTree %s --outputFile %s" % (jobTree, outputFile))
    logger.info("Ran the job-tree stats command apparently okay")

Example #41

0

Show file

def getChromSizesFromHal(halfile, genome, outfile):
    system("halStats --chromSizes %s %s > %s" % (genome, halfile, outfile))

Example #42

0

Show file

    def run(self):
        #localTempDir = getTempFile(rootDir=self.getGlobalTempDir())
        localTempDir = self.getLocalTempDir()
        config = os.path.join(localTempDir, "cactus_workflow_config.xml")
        system("cp %s %s" %
               (self.config, config))  #Copy the config file to local disk

        #Copy sequences to localTempDir:
        localSeqdir = os.path.join(localTempDir, "data")
        system("mkdir -p %s" % localSeqdir)
        for spc in self.species.split():
            currseqdir = os.path.join(self.seqdir, spc)
            system("cp -r %s %s" % (currseqdir, localSeqdir))

        #Make dir for this region if not already existed
        #system("rm -fR %s" %self.region)
        system("mkdir -p %s" % os.path.join(os.getcwd(), self.region))

        #Write experiment.xml for this region:
        experimentFile = os.path.join(localTempDir, "experiment.xml")
        writeExpCommand = "cactus_writeExperimentXml.py --species \"%s\" --tree \"%s\" --output %s --sequenceDir %s --config %s --databaseString %s"\
                          %(self.species, self.tree, experimentFile, localSeqdir, config, self.dbStr)
        system("%s" % writeExpCommand)
        system("cp %s %s" %
               (experimentFile,
                os.path.join(os.getcwd(), self.region, "experiment.xml")))
        logger.info("Got experiment.xml file for %s with command: %s\n" %
                    (self.region, writeExpCommand))

        #Now ready to runCactus:
        batchSystem = "singleMachine"
        jobTree = os.path.join(localTempDir, "jobTree")
        cactusCommand = "cactus_workflow.py --stats --batchSystem %s --experiment %s --buildReference --setupAndBuildAlignments --logDebug --jobTree %s" \
                        %(batchSystem, experimentFile, jobTree)
        logger.info("Going to run cactus now, the command is %s" %
                    cactusCommand)
        system("%s" % cactusCommand)
        system("cp -r %s %s" %
               (jobTree, os.path.join(os.getcwd(), self.region, "jobTree")))
        logger.info("Done cactusRun for %s\n" % self.region)

        #Run genemapChain:
        self.addChildTarget(
            RunGenemapChain(self.region, self.dbStr, self.options.outdir,
                            self.options.refSpecies, self.genedir))
        self.addChildTarget(
            RunGenemapHomolog(self.region, self.dbStr, self.options.outdir,
                              self.options.refSpecies, self.genedir))

Example #43

0

Show file

File: test.py Project: zoumingr/cactus

def runWorkflow_TestScript(testId,
                           sequences,
                           newickTreeString,
                           outputDir=None,
                           batchSystem="single_machine",
                           buildAvgs=False,
                           buildHal=False,
                           buildFasta=False,
                           configFile=None,
                           buildToilStats=False,
                           constraints=None,
                           progressive=False,
                           cactusWorkflowFunction=runCactusWorkflow,
                           logLevel=None):
    """Runs the workflow and various downstream utilities.
    The testId parameter is used to allocate a unique port so that tests
    can run in parallel.
    """
    logger.info("Running cactus workflow test script")
    logger.info("Got the following sequence dirs/files: %s" %
                " ".join(sequences))
    logger.info("Got the following tree %s" % newickTreeString)

    #Setup the output dir
    assert outputDir != None
    logger.info("Using the output dir: %s" % outputDir)

    #Setup the flower disk.
    experiment = getCactusWorkflowExperimentForTest(testId,
                                                    sequences,
                                                    newickTreeString,
                                                    outputDir=outputDir,
                                                    configFile=configFile,
                                                    constraints=constraints,
                                                    progressive=progressive)
    experimentFile = os.path.join(outputDir, "experiment.xml")
    experiment.writeXML(experimentFile)
    logger.info("The experiment file %s\n" % experimentFile)

    #Setup the job tree dir.
    toilDir = os.path.join(outputDir, "toil")
    logger.info("Got a job tree dir for the test: %s" % toilDir)

    #Run the actual workflow
    cactusWorkflowFunction(experimentFile,
                           toilDir,
                           batchSystem=batchSystem,
                           buildAvgs=buildAvgs,
                           buildHal=buildHal,
                           buildFasta=buildFasta,
                           toilStats=buildToilStats,
                           logLevel=logLevel)
    logger.info("Ran the the workflow")
    #Now run various utilities..
    if buildToilStats:
        toilStatsFile = os.path.join(outputDir, "toilStats.xml")
        runToilStats(toilDir, toilStatsFile)

    #Now remove everything we generate
    system("rm -rf %s %s" % (toilDir, experimentFile))

    #Return so calling function can cleanup
    return experiment

Example #44

0

Show file

File: allTests.py Project: zegheim/sonLib

 def testSonLibCTests(self):
     """Run m,ost the sonLib CuTests, fail if any of them fail.
     """
     system("sonLibTests %s" % getLogLevelString())

Example #45

0

Show file

 def testMaf(self):
     """Run all the api CuTests, fail if any of them fail.
     """
     system("halMafTests")

Example #46

0

Show file

 def run(self):
     f = open(self.outfile, 'w')
     f.write("#Name\tLength\tMap\tIns\tDels\tOO\tInframe\n")
     f.close()
     system("cat %s/* >> %s" % (self.indir, self.outfile))

Example #47

0

Show file

 def tearDown(self):
     unittest.TestCase.tearDown(self)
     system("rm -rf %s" % self.tempDir)

Example #48

0

Show file

    def run(self):
        regions = getList(self.options.regions)
        genemapChainXmls = []  #list of all genemapChain output Xmls
        genemapHomologXmls = []  #list of all genemapHomology output Xmls
        for r in regions:
            genemapChainXmls.append(
                os.path.join(self.output, "%s-%s.xml" % ("genemapChain", r)))
            genemapHomologXmls.append(
                os.path.join(self.output, "%s-%s.xml" % ("genemapHomolog", r)))

    #Directory of more details information if interested
        extraInfoDir = os.path.join(self.output, "extraInfo")
        system("mkdir -p %s" % extraInfoDir)
        system("chmod ug+xrw %s" % extraInfoDir)

        #Merge homologXmls of all regions:
        allHomologXml = "%s/%s-all.xml" % (self.output, "genemapHomolog")
        mergeXmls(genemapHomologXmls, allHomologXml)

        genemapHomolog = "%s/%s-*.txt" % (self.output, "genemapHomolog")
        allHomolog = "%s/%s-all.txt" % (self.output, "genemapHomolog")
        system("rm -f %s" % allHomolog)
        system("cat %s > %s" % (genemapHomolog, allHomolog))

        #geneToChain = "%s/%s" %(extraInfoDir, "gene2chain")
        geneToChain = "%s/%s" % (self.output, "gene2chain")

        genemapChainCommand = "genemapChain.py -o %s -c \"%s\" -i \"%s\" > %s" %(extraInfoDir, "cat",\
                               " ".join(genemapChainXmls), geneToChain)
        system("%s" % genemapChainCommand)

        chainMergeHomolog = "%s/%s" % (extraInfoDir, "chainMergeHomolog")
        chainMergeHomologTex = "%s/%s" % (self.output, "chainVsDup.tex")
        #chainMergeHomologTex = chainMergeHomolog + ".tex"
        missedGenes = "%s/%s" % (extraInfoDir, "missedGenes")
        genemapMergeCommand = "genemapMerge.py -f c -n %s %s %s %s %s > %s" %(self.options.runName, \
                               allHomolog, geneToChain, chainMergeHomolog, chainMergeHomologTex, missedGenes)
        system("%s" % genemapMergeCommand)

        homologCmp = "%s/%s" % (self.output, "homologCmp")
        homologCmpTex = "%s/%s" % (self.output, "homologCmp.tex")
        homologCmpV = "%s/%s" % (extraInfoDir, "homologCmpV")
        cactusVsMultizCommand = "genemapCactusVsMultiz.py -a %s -d %s %s %s %s > %s" %(extraInfoDir + "/perSpcDiff", \
                        self.options.geneDir + "/all.tx", self.options.multiz, allHomologXml, homologCmp, homologCmpV)
        system("%s" % cactusVsMultizCommand)

        makeLatexTabCommand = "genemapMakeLatexTab.py -s \"%s\" -n %s %s %s" \
                               %(self.species, self.options.runName, homologCmp, homologCmpTex)
        system("%s" % makeLatexTabCommand)

        #Cleanup now...
        self.setFollowOnTarget(Cleanup(self.output, extraInfoDir))

Example #49

0

Show file

File: cactus_tuning.py Project: benedictpaten/cactusTools

def runEvalMFAToMAF(mfa, maf):
    command = "mfaToMaf -b %s -d %s --logLevel DEBUG" % (mfa, maf)
    system(command)
    logger.info("Converted MFA %s to MAF %s\n" % (mfa, maf))

Example #50

0

Show file

File: cactus_tuning.py Project: benedictpaten/cactusTools

 def run(self):
     system("rm -rf %s" % self.dir)
     logger.info("Clean up tempDir for next run\n")

Example #51

0

Show file

File: cactus_tuning.py Project: benedictpaten/cactusTools

def runEvalMAFComparator(mafFile1, mafFile2, outputFile, sampleNumber):
    command = "mafComparator -b %s -c %s -d %s -e %s" % (
        mafFile1, mafFile2, outputFile, sampleNumber)
    system(command)
    logger.info("Compared MAF %s with MAF %s\n" % (mafFile1, mafFile2))

Example #52

0

Show file

File: groupExclusiveRegions.py Project: robsyme/hal

 def run(self):
     bedfile = os.path.join(self.cladedir, "%s.bed" %self.target)
     system("halLiftover %s %s %s %s %s" %(self.halfile, self.query, self.queryBed, self.target, bedfile))
     #Convert to big bed:
     bigbedfile = os.path.join(self.cladedir, "%s.bb" %self.target)
     system("bedToBigBed %s %s %s" %(bedfile, self.chrsizefile, bigbedfile))

Example #53

0

Show file

    def run(self):
        options = self.options
        localHalfile = os.path.join(self.outdir,
                                    os.path.basename(self.halfile))
        if os.path.abspath(localHalfile) != os.path.abspath(self.halfile):
            if os.path.exists(localHalfile):
                system("rm %s" % localHalfile)
            if options.cpHal:
                system("cp %s %s" %
                       (os.path.abspath(self.halfile), localHalfile))
            else:
                system("ln -s %s %s" %
                       (os.path.abspath(self.halfile), localHalfile))

        #Create lod files if useLod is specified
        lodtxtfile, loddir = getLod(options, localHalfile, self.outdir)

        #Get the maximum window size to display SNPs
        if lodtxtfile:
            snpwidth = getLodLowestLevel(lodtxtfile) - 1
            if snpwidth > -1:
                options.snpwidth = snpwidth

        genomes = sortByProperName(self.genomes, self.options.properName)

        #Create documentation files:
        docdir = os.path.join(self.outdir, "documentation")
        system("mkdir -p %s" % docdir)
        writeDocFiles(docdir, self.options)

        #Create genomes.txt file
        filename = os.path.join(self.outdir, "genomes.txt")
        f = open(filename, 'w')
        #for genome in self.genomes:
        for genome in genomes:
            genomedir = os.path.join(self.outdir, genome)
            f.write("genome %s\n" % genome)
            f.write("twoBitPath %s/%s.2bit\n" % (genome, genome))

            #create trackDb for the current genome:
            if lodtxtfile == '':
                self.addChildTarget(
                    WriteTrackDbFile(self.genomes,
                                     "../%s" % os.path.basename(self.halfile),
                                     genomedir, options))
            else:
                self.addChildTarget(
                    WriteTrackDbFile(self.genomes,
                                     "../%s" % os.path.basename(lodtxtfile),
                                     genomedir, options))
            f.write("trackDb %s/trackDb.txt\n" % genome)

            #other info
            f.write("groups groups.txt\n")

            writeDescriptionFile(genome, genomedir)
            f.write("htmlPath %s/description.html\n" % genome)
            f.write("description %s\n" %
                    getProperName(genome, self.options.properName))
            f.write("organism %s\n" %
                    getProperName(genome, self.options.properName))
            f.write("orderKey 4800\n")
            f.write("scientificName %s\n" % genome)

            seq2len = self.genome2seq2len[genome]
            (seq, l) = getLongestSeq(seq2len)
            f.write("defaultPos %s:1-%d\n" % (seq, min(l, 1000)))
            f.write("\n")
        f.close()

Example #54

0

Show file

File: blastTest.py Project: zqingyuan/cactus

 def tearDown(self):
     for tempFile in self.tempFiles:
         if os.path.exists(tempFile):
             os.remove(tempFile)
     unittest.TestCase.tearDown(self)
     system("rm -rf %s" % self.tempDir)

Example #55

0

Show file

    def run(self):
        #GC content & Alignability
        for genome in self.genomes:
            genomedir = os.path.join(self.outdir, genome)
            if self.options.gcContent:
                self.addChildTarget(GetGCpercent(
                    genomedir, genome))  #genomedir/genome.gc.bw
            if self.options.alignability:
                self.addChildTarget(
                    GetAlignability(
                        genomedir, genome,
                        self.halfile))  #genomedir/genome.alignability.bw

        #Compute conservation track:
        if self.options.conservation:
            #if self.options.conservation or self.options.conservationDir:
            conservationDir = os.path.join(self.outdir, "conservation")
            if not self.options.conservationDir:
                system("mkdir -p %s" % conservationDir)
                self.addChildTarget(
                    GetConservationFiles(self.halfile, conservationDir,
                                         self.options))
            else:
                if os.path.abspath(self.options.conservationDir
                                   ) != os.path.abspath(conservationDir):
                    system("ln -s %s %s" % (os.path.abspath(
                        self.options.conservationDir), conservationDir))
                    #system("cp -r %s %s" %(self.options.conservationDir, conservationDir))

        #Make bed tracks:
        preprocessAnnotationInputs(self.options, self.outdir, "bed")
        self.addChildTarget(
            MakeAnnotationTracks(self.options, self.outdir, self.halfile,
                                 self.genome2seq2len, "bed"))

        #Make bed2 tracks:
        preprocessAnnotationInputs(self.options, self.outdir, "bed2")
        self.addChildTarget(
            MakeAnnotationTracks(self.options, self.outdir, self.halfile,
                                 self.genome2seq2len, "bed2"))

        #Make wig tracks:
        preprocessAnnotationInputs(self.options, self.outdir, "wig")
        self.addChildTarget(
            MakeAnnotationTracks(self.options, self.outdir, self.halfile,
                                 self.genome2seq2len, "wig"))

        #Make clade-exclusive tracks:
        if self.options.tree and self.options.cladeExclusive:
            self.addChildTarget(
                GetCladeExclusiveRegions(
                    self.halfile, self.options.tree,
                    os.path.join(self.outdir, "liftoverbeds"),
                    self.options.maxOut, self.options.minIn))
            self.options.bigbeddirs.append(
                os.path.join(self.outdir, "liftoverbeds", "CladeExclusive"))

        #Get LOD if needed, and Write trackDb files
        self.setFollowOnTarget(
            WriteGenomesFile(self.genomes, self.genome2seq2len, self.halfile,
                             self.options, self.outdir))

Example #56

0

Show file

 def testCuTest(self):
     system("matchingAndOrderingTests %s" % getLogLevelString())

Example #57

0

Show file

 def testMarginStats(self):
     system("%s %s %s %s --readIdentity --alignmentIdentity --mismatchesPerAlignedBase --readCoverage \
     --deletionsPerReadBase --insertionsPerReadBase --printValuePerReadAlignment"                                                                                     % \
     (self.marginStats, self.inputSamFile1, self.readFastqFile1, self.referenceFastaFile1))

Example #58

0

Show file

 def tearDown(self):
     unittest.TestCase.tearDown(self)
     # Clean up
     system("rm -rf %s %s %s %s" % (self.outputSamFile, self.outputHmmFile,
                                    self.outputVcfFile, self.jobTree))

Example #59

0

Show file

def linkTwoBitSeqFile(genome, twobitdir, outdir):
    twobitfile = os.path.join(outdir, "%s.2bit" %genome)
    intwobitfile = os.path.abspath( os.path.join(twobitdir, "%s.2bit" %genome) )
    if not os.path.exists(twobitfile):
        system("ln -s %s %s" %(intwobitfile, twobitfile))

Example #60

0

Show file

File: blastTest.py Project: zqingyuan/cactus

    def testProgressiveOutgroupsVsAllOutgroups(self):
        """Tests the difference in outgroup coverage on an ingroup when
        running in "ingroups vs. outgroups" mode and "set against set"
        mode.
        """
        encodeRegion = "ENm001"
        ingroup = "human"
        outgroups = ["macaque", "rabbit", "dog"]
        regionPath = os.path.join(self.encodePath, encodeRegion)
        ingroupPath = os.path.join(regionPath,
                                   ingroup + "." + encodeRegion + ".fa")
        outgroupPaths = map(
            lambda x: os.path.join(regionPath, x + "." + encodeRegion + ".fa"),
            outgroups)
        # Run in "set against set" mode, aligning the entire ingroup
        # vs each outgroup
        runCactusBlast([ingroupPath],
                       alignmentsFile=self.tempOutputFile,
                       toilDir=os.path.join(self.tempDir, "setVsSetToil"),
                       chunkSize=500000,
                       overlapSize=10000,
                       targetSequenceFiles=outgroupPaths)
        # Run in "ingroup vs outgroups" mode, aligning the ingroup vs
        # the outgroups in order, trimming away sequence that's
        # already been aligned.
        runCactusBlastIngroupsAndOutgroups([ingroupPath],
                                           outgroupPaths,
                                           alignmentsFile=self.tempOutputFile2,
                                           toilDir=os.path.join(
                                               self.tempDir, "outgroupToil"))

        # Get the coverage on the ingroup, in bases, from each run.
        coverageSetVsSetUnfiltered = getTempFile(rootDir=self.tempDir)
        calculateCoverage(sequenceFile=ingroupPath,
                          cigarFile=self.tempOutputFile,
                          outputFile=coverageSetVsSetUnfiltered)
        coverageSetVsSet = int(
            popenCatch(
                "cat %s | awk '{ total +=  $3 - $2} END { print total }'" %
                coverageSetVsSetUnfiltered))
        coverageIngroupVsOutgroupsUnfiltered = getTempFile(
            rootDir=self.tempDir)
        calculateCoverage(sequenceFile=ingroupPath,
                          cigarFile=self.tempOutputFile2,
                          outputFile=coverageIngroupVsOutgroupsUnfiltered)
        coverageIngroupVsOutgroups = int(
            popenCatch(
                "cat %s | awk '{ total +=  $3 - $2} END { print total }'" %
                coverageIngroupVsOutgroupsUnfiltered))

        print "total coverage on human (set vs set mode, %d outgroups): %d" % (
            len(outgroups), coverageSetVsSet)
        print "total coverage on human (ingroup vs outgroup mode, %d outgroups): %d" % (
            len(outgroups), coverageIngroupVsOutgroups)

        # Make sure we're getting a reasonable fraction of the
        # alignments when using the trimming strategy.
        self.assertTrue(
            float(coverageIngroupVsOutgroups) / coverageSetVsSet >= 0.95)

        # Get the coverage on the ingroup, in bases, from just the
        # last outgroup. Obviously this should be much higher in set
        # vs set mode than in ingroup vs outgroup mode.
        outgroupAlignments = getTempFile(rootDir=self.tempDir)
        system("grep %s %s > %s" %
               (outgroups[-1], self.tempOutputFile, outgroupAlignments))
        coverageFileSetVsSet = getTempFile(rootDir=self.tempDir)
        calculateCoverage(sequenceFile=ingroupPath,
                          cigarFile=outgroupAlignments,
                          outputFile=coverageFileSetVsSet)

        coverageFromLastOutgroupSetVsSet = int(
            popenCatch(
                "cat %s | awk '{ total +=  $3 - $2} END { print total }'" %
                coverageFileSetVsSet))

        outgroupAlignments = getTempFile(rootDir=self.tempDir)
        system("grep %s %s > %s" %
               (outgroups[-1], self.tempOutputFile2, outgroupAlignments))
        coverageFileInVsOut = getTempFile(rootDir=self.tempDir)
        calculateCoverage(sequenceFile=ingroupPath,
                          cigarFile=outgroupAlignments,
                          outputFile=coverageFileInVsOut)
        coverageFromLastOutgroupInVsOut = int(
            popenCatch(
                "cat %s | awk '{ total +=  $3 - $2} END { print total }'" %
                coverageFileInVsOut))

        print "total coverage on human from last outgroup in set (%s) (set vs set mode): %d" % (
            outgroups[-1], coverageFromLastOutgroupSetVsSet)
        print "total coverage on human from last outgroup in set (%s) (ingroup vs outgroup mode): %d" % (
            outgroups[-1], coverageFromLastOutgroupInVsOut)

        self.assertTrue(
            float(coverageFromLastOutgroupInVsOut) /
            coverageFromLastOutgroupSetVsSet <= 0.10)