コード例 #1
0
ファイル: jobTest.py プロジェクト: ArtRand/jobTree
 def testJobReadWriteAndDelete(self):
     jobDir = os.path.join(os.getcwd(), "testJobDir")
     os.mkdir(jobDir) #If directory already exists then the test will fail
     command = "by your command"
     memory = 2^32
     cpu = 1
     tryCount = 100
     
     for i in xrange(10):
         startTime = time.time()
         for j in xrange(100):
             j = Job(command, memory, cpu, tryCount, jobDir)
             self.assertEquals(j.remainingRetryCount, tryCount)
             self.assertEquals(j.jobDir, jobDir)
             self.assertEquals(j.children, [])
             self.assertEquals(j.followOnCommands, [ (command, memory, cpu, 0)])
             self.assertEquals(j.messages, [])
             j.write()
             j = Job.read(j.getJobFileName())
             self.assertEquals(j.remainingRetryCount, tryCount)
             self.assertEquals(j.jobDir, jobDir)
             self.assertEquals(j.children, [])
             self.assertEquals(j.followOnCommands, [ (command, memory, cpu, 0)])
             self.assertEquals(j.messages, [])
             self.assertTrue(os.path.exists(j.getJobFileName()))
             j.delete()
             self.assertTrue(not os.path.exists(j.getJobFileName()))
         print "It took %f seconds to load/unload jobs" % (time.time() - startTime) #We've just used it for benchmarking, so far 
         #Would be good to extend this trivial test
         
     system("rm -rf %s" % jobDir)
コード例 #2
0
ファイル: jobTest.py プロジェクト: ArtRand/jobTree
 def testJobUpdate(self):
     jobDir = os.path.join(os.getcwd(), "testJobDir")
     os.mkdir(jobDir) #If directory already exists then the test will fail
     command = "by your command"
     memory = 2^32
     cpu = 1
     tryCount = 100
     
     for i in xrange(40):
         startTime = time.time()
         j = Job(command, memory, cpu, tryCount, jobDir)
         childNumber = random.choice(range(20))
         for k in xrange(childNumber):
             j.children.append((command, memory, cpu))
         self.assertEquals(len(j.children), childNumber)
         j.update(tryCount=tryCount, depth=0)
         j = Job.read(j.getJobFileName())
         self.assertEquals(len(j.children) + len(j.followOnCommands), childNumber + 1)
         for childJobFile, memory, cpu in j.children:
             cJ = Job.read(childJobFile)
             self.assertEquals(cJ.remainingRetryCount, tryCount)
             #self.assertEquals(cJ.jobDir, os.path.split(cJ)[0])
             self.assertEquals(cJ.children, [])
             self.assertEquals(cJ.followOnCommands, [ (command, memory, cpu, 0)])
             self.assertEquals(cJ.messages, [])
             self.assertTrue(os.path.exists(cJ.getJobFileName()))
             cJ.delete()
             self.assertTrue(not os.path.exists(cJ.getJobFileName()))
         self.assertEquals(os.listdir(jobDir), [ "job" ])
         j.delete()
         print "It took %f seconds to update jobs" % (time.time() - startTime) #We've just used it for benchmarking, so far 
         
     system("rm -rf %s" % jobDir)
コード例 #3
0
ファイル: cPecanRealignTest.py プロジェクト: ArtRand/cPecan
 def testCPecanRealignSplitSequences(self):
     """Runs cPecanRealign, splitting indels longer than 100bp, and check
     that the coverage from the results is the same as the coverage from
     realigning with no arguments.."""
     for seqFile1, seqFile2 in seqFilePairGenerator():
         # Drop the lastz command since it's not needed. But this
         # is still convenient to use the same parameters as all
         # the other tests
         realignCommand, _ = getCommands(seqFile1, seqFile2)
         splitRealignCommand = realignCommand + " --splitIndelsLongerThanThis 100"
         realignOutput = getTempFile()
         splitRealignOutput = getTempFile()
         realignCommand += " > %s" % realignOutput
         splitRealignCommand += " > %s" % splitRealignOutput
         system(realignCommand)
         system(splitRealignCommand)
         # Check coverage on seqFile1
         
         #The following will fail until we refactor.
         
         splitRealignCoverage = popenCatch("cactus_coverage %s %s" % (seqFile1, splitRealignOutput))
         realignCoverage = popenCatch("cactus_coverage %s %s" % (seqFile1, realignOutput))
         self.assertTrue(splitRealignCoverage == realignCoverage)
         # Check coverage on seqFile2
         splitRealignCoverage = popenCatch("cactus_coverage %s %s" % (seqFile2, splitRealignOutput))
         realignCoverage = popenCatch("cactus_coverage %s %s" % (seqFile2, realignOutput))
         self.assertTrue(splitRealignCoverage == realignCoverage)
         os.remove(realignOutput)
         os.remove(splitRealignOutput)
コード例 #4
0
ファイル: aa_events.py プロジェクト: ngannguyen/aimseqtk
    def run(self):
        self.logToMaster("Getting recomb. events for clone %s ..." % self.clone)
        max_vdel = len(self.vseq) - 3
        min_vdel = find_min_vdel(self.vseq, self.aaseq)
        max_jdel = len(self.jseq) - 3
        min_jdel = find_min_jdel(self.jseq, self.aaseq)
        self.logToMaster("Vdel: <%d-%d>" % (min_vdel, max_vdel))
        self.logToMaster("Jdel: <%d-%d>" % (min_jdel, max_jdel))

        for d, dseq in self.d2seq.iteritems():
            devents = find_devents(dseq, self.aaseq)
            self.logToMaster("%d number of devents" % (len(devents)))
            # DEBUG
            #numempty = 0
            #for devent in devents:
            #    if devent.cdr3aa_dstart == -1:
            #        numempty += 1
            #self.logToMaster("\t%d empty D, %d non_empty_D\n" % (numempty, len(devents) - numempty))
            # END DEBUG
            for i, devent in enumerate(devents):
                outdir = os.path.join(self.outdir, d, str(i))  #outdir/clone/d/i
                system("mkdir -p %s" % outdir)
                
                if devent.cdr3aa_dstart == -1:
                    dempty_file = os.path.join(outdir, "d_empty")
                    self.addChildTarget(Get_Vjins(self.clone, self.vseq,
                                min_vdel, max_vdel, self.jseq, min_jdel,
                                max_jdel, d, devent, self.aaseq, dempty_file))
                else:
                    self.addChildTarget(Get_Vd_Dj_Ins(self.clone, self.vseq,
                                min_vdel, max_vdel, self.jseq, min_jdel,
                                max_jdel, d, dseq, devent, self.aaseq, outdir))
        self.setFollowOnTarget(CloneEventsAgg(self.outdir))
コード例 #5
0
ファイル: getPlots.py プロジェクト: ngannguyen/referenceViz
 def run(self):
     infile = os.path.join(self.indir, "copyNumberStats.xml")
     if os.path.exists( infile ):
         cmd = "cnvPlot.py %s --outdir %s " %(infile, self.outdir)
         if self.filteredSamples != "":
             cmd += " --filteredSamples %s" %(self.filteredSamples)
         system(cmd)
コード例 #6
0
 def runComparisonOfBlastScriptVsNaiveBlast(self, blastMode):
     """We compare the output with a naive run of the blast program, to check the results are nearly
     equivalent.
     """
     encodeRegions = [ "ENm00" + str(i) for i in xrange(1,2) ] #, 2) ] #Could go to six
     species = ("human", "mouse", "dog")
     #Other species to try "rat", "monodelphis", "macaque", "chimp"
     for encodeRegion in encodeRegions:
         regionPath = os.path.join(self.encodePath, encodeRegion)
         for i in xrange(len(species)):
             species1 = species[i]
             for species2 in species[i+1:]:
                 seqFile1 = os.path.join(regionPath, "%s.%s.fa" % (species1, encodeRegion))
                 seqFile2 = os.path.join(regionPath, "%s.%s.fa" % (species2, encodeRegion))
                 
                 #Run the random
                 runNaiveBlast(seqFile1, seqFile2, self.tempOutputFile)
                 logger.info("Ran the naive blast okay")
                 
                 #Run the blast
                 jobTreeDir = os.path.join(getTempDirectory(self.tempDir), "jobTree")
                 if blastMode == "allAgainstAll":
                     runCactusBlast([ seqFile1, seqFile2 ], self.tempOutputFile2, jobTreeDir,
                                    chunkSize=500000, overlapSize=10000)
                 else:
                     runCactusBlast([ seqFile1 ], self.tempOutputFile2, jobTreeDir,
                                    chunkSize=500000, overlapSize=10000, targetSequenceFiles=[ seqFile2 ])
                 runJobTreeStatusAndFailIfNotComplete(jobTreeDir)
                 system("rm -rf %s " % jobTreeDir)    
                 logger.info("Ran cactus_blast okay")
                 logger.critical("Comparing cactus_blast and naive blast; using mode: %s" % blastMode)
                 compareResultsFile(self.tempOutputFile, self.tempOutputFile2)
コード例 #7
0
def trimGenome(sequenceFile, coverageFile, outputFile, complement=False,
               flanking=0, minSize=1, windowSize=10, threshold=1, depth=None):
    system("cactus_trimSequences.py %s %s %s %s %s %s %s %s > %s" % (
        nameValue("complement", complement, valueType=bool),
        nameValue("flanking", flanking), nameValue("minSize", minSize),
        nameValue("windowSize", windowSize), nameValue("threshold", threshold),
        nameValue("depth", depth), sequenceFile, coverageFile, outputFile))
コード例 #8
0
ファイル: conservationTrack.py プロジェクト: glennhickey/hal
 def run(self):
     newmodfile = "%s-modified" %self.modfile
     #modify small branch lengths (change all the xxxe-1y to xxxe-10)
     system("sed 's/e-1./e-08/g' %s > %s" %(self.modfile, newmodfile))
     #get conservation bigwig and liftover files:
     cmd = "halTreePhyloP.py %s %s %s --bigWig --numProc %d" %(self.halfile, newmodfile, self.outdir, self.numproc)
     system(cmd)
コード例 #9
0
def extractOutput(workDir, outputHalFile, options):
    if options.outputMaf is not None:
        mcProj = MultiCactusProject()
        mcProj.readXML(
            os.path.join(workDir, ProjectWrapper.alignmentDirName,
                         ProjectWrapper.alignmentDirName + "_project.xml"))
        rootName = mcProj.mcTree.getRootName()
        rootPath = os.path.join(workDir, ProjectWrapper.alignmentDirName,
        rootName, rootName + '.maf')
        cmd = 'mv %s %s' % (rootPath, options.outputMaf)
        system(cmd)
    envFile = getEnvFilePath()
    logFile = os.path.join(workDir, 'cactus.log')
    pjPath = os.path.join(workDir, ProjectWrapper.alignmentDirName,
                          '%s_project.xml' % ProjectWrapper.alignmentDirName)
    logHandle = open(logFile, "a")
    logHandle.write("\n\n%s: Beginning HAL Export\n\n" % str(
        datetime.datetime.now()))
    logHandle.close()
    cmd = '. %s && cactus2hal.py %s %s >> %s 2>&1' % (envFile, pjPath,
                                                      outputHalFile, logFile)
    system(cmd)
    logHandle = open(logFile, "a")
    logHandle.write("\n%s: Finished HAL Export \n" % str(
        datetime.datetime.now()))
    logHandle.close()
コード例 #10
0
def runCactus(workDir, jtCommands, jtPath, options):
    envFile = getEnvFilePath()
    pjPath = os.path.join(workDir, ProjectWrapper.alignmentDirName,
                          '%s_project.xml' % ProjectWrapper.alignmentDirName)
    logFile = os.path.join(workDir, 'cactus.log')

    if options.overwrite:
        overwriteFlag = '--overwrite'
        system("rm -f %s" % logFile)
    else:
        overwriteFlag = ''

    logHandle = open(logFile, "a")
    logHandle.write("\n%s: Beginning Progressive Cactus Alignment\n\n" % str(
        datetime.datetime.now()))
    logHandle.close()
    cmd = '. %s && cactus_progressive.py %s %s %s >> %s 2>&1' % (envFile,
                                                                 jtCommands,
                                                                 pjPath,
                                                                 overwriteFlag,
                                                                 logFile)
    jtMonitor = JobStatusMonitor(jtPath, pjPath, logFile,
                                 deadlockCallbackFn=abortFunction(jtPath,
                                                                  options))
    if options.database == "kyoto_tycoon":
        jtMonitor.daemon = True
        jtMonitor.start()
        
    system(cmd)
    logHandle = open(logFile, "a")
    logHandle.write("\n%s: Finished Progressive Cactus Alignment\n" % str(
        datetime.datetime.now()))
    logHandle.close()
コード例 #11
0
ファイル: aausage.py プロジェクト: ngannguyen/immunoseq
def checkOptions(parser, args, options):
    if not options.indir:
        raise InputOptionError("Input directory is required. None was given.\n")
    if not os.path.exists(options.indir):
        raise InputOptionError("Input directory %s does not exist\n" % options.indir)
    if not os.path.exists(options.outdir):
        system("mkdir -p %s" % options.outdir)
コード例 #12
0
ファイル: blastTest.py プロジェクト: benedictpaten/cactus
    def testKeepingCoverageOnIngroups(self):
        """Tests whether the --ingroupCoverageDir option works as
        advertised."""
        encodeRegion = "ENm001"
        ingroups = ["human", "cow"]
        outgroups = ["macaque", "rabbit", "dog"]
        regionPath = os.path.join(self.encodePath, encodeRegion)
        ingroupPaths = map(lambda x: os.path.join(regionPath, x + "." + encodeRegion + ".fa"), ingroups)
        outgroupPaths = map(lambda x: os.path.join(regionPath, x + "." + encodeRegion + ".fa"), outgroups)
        # Run blast in "ingroup vs outgroups" mode, requesting to keep
        # the bed files that show outgroup coverage on the ingroup.
        toilDir = os.path.join(self.tempDir, "tmp_toil")
        outgroupFragmentPaths = [getTempFile(rootDir=self.tempDir) for outgroup in outgroups]
        ingroupCoveragePaths = [getTempFile(rootDir=self.tempDir) for ingroup in ingroups]
        runCactusBlastIngroupsAndOutgroups(ingroups=ingroupPaths, outgroups=outgroupPaths, alignmentsFile=self.tempOutputFile, outgroupFragmentPaths=outgroupFragmentPaths, ingroupCoveragePaths=ingroupCoveragePaths, toilDir=toilDir)
        for i, ingroupPath in enumerate(ingroupPaths):
            # Get the coverage from the outgroups independently and
            # check that it's the same as the file in
            # ingroupCoverageDir
            otherIngroupPath = ingroupPaths[1] if i == 0 else ingroupPaths[0]
            # To filter out alignments from the other ingroup and
            # self-alignments we need to create a fasta with all the
            # outgroup fragments in it.
            outgroupsCombined = getTempFile(rootDir=self.tempDir)
            for outgroupFragmentPath in outgroupFragmentPaths:
                system("cat %s >> %s" % (outgroupFragmentPath, outgroupsCombined))
            independentCoverageFile = getTempFile(rootDir=self.tempDir)
            calculateCoverage(fromGenome=outgroupsCombined, sequenceFile=ingroupPath, cigarFile=self.tempOutputFile, outputFile=independentCoverageFile)

            # find the coverage file cactus_blast kept (should be
            # named according to the basename of the ingroup path
            # file)
            keptCoverageFile = ingroupCoveragePaths[i]
            self.assertTrue(filecmp.cmp(independentCoverageFile, keptCoverageFile))
コード例 #13
0
ファイル: scriptTreeTest.py プロジェクト: decarlin/jobTree
 def testScriptTree_Example2(self):
     """Tests that the global and local temp dirs of a job behave as expected.
     """
     for test in xrange(self.testNo):
         command = "scriptTreeTest_Wrapper2.py --jobTree %s --logLevel=INFO --retryCount=0" % self.jobTreeDir
         system(command)
         runJobTreeStatusAndFailIfNotComplete(self.jobTreeDir)
コード例 #14
0
 def run(self):
     localTempDir = self.getLocalTempDir()
     i = 0
     localfiles = []
     for f in self.files:
         if not os.path.exists(f): #HACK
             continue
         localname = os.path.join(localTempDir, "%s%d.bam" %(os.path.basename(f).split('.')[0], i))
         system("scp -C %s %s" %(f, localname))
         localfiles.append(localname)
         i += 1
     mergeFile = os.path.join(localTempDir, "merge.bam")
     if len(localfiles) == 1:
         system("mv %s %s" %(localfiles[0], mergeFile))
     else:
         bamStr = " ".join(localfiles)
         logger.info("Merging bams...\n")
         mergeCmd = "samtools merge %s %s" %(mergeFile, bamStr)
         system( mergeCmd )
     
     sortPrefix = os.path.join(localTempDir, "mergeSorted")
     sortCmp = "samtools sort %s %s" %( mergeFile, sortPrefix )
     system( sortCmp )
     
     system( "cp %s.bam %s" %(sortPrefix, self.outdir) )
     #Get Snps info:
     self.setFollowOnTarget( Snp(self.outdir, self.options) )
コード例 #15
0
ファイル: aimseq.py プロジェクト: ngannguyen/aimseqtk
    def run(self):
        self.logToMaster("DownSampling\n")
        opts = self.options
        global_dir = self.getGlobalTempDir()
        #sampling_dir = os.path.join(global_dir, "down_sampling")
        sampling_dir = os.path.join(opts.outdir, "down_sampling")
        system("mkdir -p %s" % sampling_dir)

        for sam in os.listdir(self.sampledir):
            samdir = os.path.join(self.sampledir, sam)
            sample = pickle.load(gzip.open(os.path.join(samdir, sam), "rb"))
            out_samdir = os.path.join(sampling_dir, sam) 
            system("mkdir -p %s" % out_samdir)
            if opts.sampling_uniq:  # sampling uniq clones
                self.addChildTarget(libsample.SampleAnalysis0(sample, samdir,
                                              out_samdir, libsample.sampling,
                                              opts.sampling_uniq, 'uniq'))
            elif opts.sampling_top:  # sampling reads, then report top clones
                self.addChildTarget(libsample.SampleAnalysis0(sample, samdir,
                                out_samdir, libsample.sampling, opts.sampling,
                                "top", opts.sampling_top))
            else:  # sampling reads
                self.addChildTarget(libsample.SampleAnalysis0(sample, samdir,
                                out_samdir, libsample.sampling, opts.sampling))
        if opts.normalize:
            self.setFollowOnTarget(Normalize(sampling_dir, opts))
        else:
            self.setFollowOnTarget(Analyses(sampling_dir, opts))
コード例 #16
0
ファイル: halLiftoverStatus.py プロジェクト: glennhickey/hal
 def run(self):
     cmd = "halLiftover --outPSL --tab %s %s %s %s %s" % (self.opts.halfile,
             self.opts.query, self.bedfile, self.opts.target, self.liftfile)
     system(cmd)
     #system("cp %s %s_liftoverpsl" % (self.liftfile, self.opts.outfile))
     status = get_liftover_status(self.bedfile, self.liftfile, self.opts.edge)
     print_status(status, self.statusfile)
コード例 #17
0
ファイル: scriptTreeTest.py プロジェクト: decarlin/jobTree
 def testScriptTree_Example(self):
     """Uses the jobTreeTest code to test the scriptTree Target wrapper.
     """
     for test in xrange(self.testNo):
         command = "scriptTreeTest_Wrapper.py --jobTree %s --logLevel=INFO --retryCount=10" % self.jobTreeDir
         system(command)
         runJobTreeStatusAndFailIfNotComplete(self.jobTreeDir)
コード例 #18
0
def getRandomConfigFile():
    tempConfigFile = getTempFile(rootDir="./", suffix=".xml")
    config = ET.parse(os.path.join(cactusRootPath(), "cactus_config.xml")).getroot()
    cafNode = config.find("caf")
    assert len(config.findall("caf")) == 1
    
    annealingRounds = 1 + int(random.random() * 10)
    cafNode.attrib["annealingRounds"] = " ".join([ str(1 + int(random.random() * 10)) for i in xrange(annealingRounds) ])
    deannealingRounds = list(set([ 1 + int(random.random() * 10) for i in xrange(int(random.random() * 10)) ]))
    deannealingRounds.sort()
    cafNode.attrib["deannealingRounds"] = " ".join([ str(i) for i in deannealingRounds ])
    cafNode.attrib["trim"] = " ".join([ str(1 + int(random.random() * 5)) for i in xrange(annealingRounds) ])
    
    cafNode.attrib["alignRepeatsAtLoop"] = str(random.random() * annealingRounds)
    
    cafNode.attrib["minimumTreeCoverage"] = str(random.random())
    cafNode.attrib["blockTrim"] = str(int(random.random() * 5))
    cafNode.attrib["ignoreAllChainsLessThanMinimumTreeCoverage"] = str(random.choice([0, 1]))
    cafNode.attrib["minimumBlockDegree"] = str(random.choice([0, 5]))
    
    checkNode = config.find("check")
    checkNode.attrib["runCheck"] = "1"
    
    checkNode = config.find("normal")
    checkNode.attrib["iterations"] = "2"
    
    #Now print the file..
    fileHandle = open(tempConfigFile, 'w')
    ET.ElementTree(config).write(fileHandle)
    fileHandle.close()
    if getLogLevelString() == "DEBUG":
        system("cat %s" % tempConfigFile)
    return tempConfigFile
コード例 #19
0
 def run(self):
     previousOutputFile = None
     previousOutputFile2 = None
     blanchettePath = os.path.join(TestStatus.getPathToDataSets(), "blanchettesSimulation")
     for i in xrange(self.options.blanchetteRepeats):
         trueAlignmentMFA = os.path.join(os.path.join(blanchettePath, "%.2i.job" % i), "true.mfa")
         trueAlignmentMAF = os.path.join(self.getLocalTempDir(), "temp.maf")
         treeFile = os.path.join(blanchettePath, "tree.newick")
         system("mfaToMaf --mfaFile %s --outputFile %s --treeFile %s" % (trueAlignmentMFA, trueAlignmentMAF, treeFile))
         
         
         trueRenamedMAF = trueAlignmentMAF + ".renamed"
         expPath = os.path.join(self.outputDir, str(i), "experiment.xml")
         applyNamingToMaf(expPath, trueAlignmentMAF, trueRenamedMAF)
         trueAlignmentMAF = trueRenamedMAF
         if self.params.vanilla == False:            
             predictedAlignmentMaf = os.path.join(self.outputDir, str(i), "progressiveCactusAlignment", "Anc0", "Anc0.maf")
         else:
             predictedAlignmentMaf = os.path.join(self.outputDir, str(i), "cactusVanilla.maf")
         
         outputFile = os.path.join(self.getLocalTempDir(), "temp%i" % i)
         system("mafComparator --mafFile1 %s --mafFile2 %s --outputFile %s" % (trueAlignmentMAF, predictedAlignmentMaf, outputFile))
         system("cp %s %s" % (outputFile, os.path.join(self.outputDir, str(i), "mafComparison.xml")))
         if previousOutputFile != None:
             system("mergeMafComparatorResults.py --results1 %s --results2 %s --outputFile %s" % (outputFile, previousOutputFile, outputFile))
         previousOutputFile = outputFile
         
     system("mv %s %s" % (previousOutputFile, os.path.join(self.outputDir, "mafComparison.xml")))   
コード例 #20
0
ファイル: sample.py プロジェクト: ngannguyen/aimseqtk
    def run(self):
        # filter by size
        starttime = time.time()
        opts = self.opts
        clones = pickle.load(gzip.open(self.samplefile, 'rb'))
        if (opts.mincount > 1 or opts.maxcount > 0 or opts.minfreq > 0 or
            opts.maxfreq > 0):
            clones = filter_by_size(clones, opts.mincount, opts.maxcount,
                                    opts.minfreq, opts.maxfreq)
        msg = ("Filter_by_size for file %s done in %.4f s" %
                                 (self.samplefile, time.time() - starttime))
        logger.info(msg)
        starttime = time.time()

        # filter by status
        pclones = filter_by_status(clones, True)
        npclones = filter_by_status(clones, False)
        
        filename = os.path.basename(self.samplefile)
        if pclones:
            pdir = os.path.join(self.outdir, "productive", self.name)
            system("mkdir -p %s" % pdir)
            pfile = os.path.join(pdir, filename)
            pickle.dump(pclones, gzip.open(pfile, "wb"))
        if npclones:    
            npdir = os.path.join(self.outdir, "non_productive", self.name)
            system("mkdir -p %s" % npdir)
            npfile = os.path.join(npdir, filename)
            pickle.dump(npclones, gzip.open(npfile, "wb"))
        msg = ("Filter_by_status for file %s done in %.4f s" %
                                 (self.samplefile, time.time() - starttime))
        logger.info(msg)
        self.setFollowOnTarget(libcommon.CleanupFile(self.samplefile))
コード例 #21
0
def runCactusProgressive(inputDir,
                      jobTreeDir, 
                      logLevel=None, retryCount=0, 
                      batchSystem="single_machine", 
                      rescueJobFrequency=None,
                      skipAlignments=False,
                      buildHal=None,
                      buildFasta=None,
                      buildAvgs=False, 
                      jobTreeStats=False,
                      maxThreads=None,
                      maxCpus=None,
                      defaultMemory=None,
                      recursive=None,
                      logFile=None,
                      event=None,
                      extraJobTreeArgumentsString="",
                      profileFile=None):
    command = ("cactus_progressive.py %s" % inputDir) + " " + _fn(jobTreeDir, 
                      logLevel, retryCount, batchSystem, rescueJobFrequency, skipAlignments,
                      buildAvgs, None,
                      buildHal,
                      buildFasta,
                      jobTreeStats, maxThreads, maxCpus, defaultMemory, logFile, extraJobTreeArgumentsString=extraJobTreeArgumentsString) + \
                      (" %s %s" % (nameValue("recursive", recursive, bool),
                                      nameValue("event", event)))
    if profileFile != None:
        command = "python -m cProfile -o %s %s/bin/%s" % (profileFile, cactusRootPath(), command)
    system(command)                   
    logger.info("Ran the cactus progressive okay")
コード例 #22
0
def runCactusBlast(sequenceFiles, outputFile, jobTreeDir,
                   chunkSize=None, overlapSize=None, 
                   logLevel=None, 
                   blastString=None, 
                   selfBlastString=None,
                   compressFiles=None,
                   lastzMemory=None,
                   targetSequenceFiles=None):
    logLevel = getLogLevelString2(logLevel)
    chunkSize = nameValue("chunkSize", chunkSize, int)
    overlapSize = nameValue("overlapSize", overlapSize, int)
    blastString = nameValue("blastString", blastString, str)
    selfBlastString = nameValue("selfBlastString", selfBlastString, str)
    compressFiles = nameValue("compressFiles", compressFiles, bool)
    lastzMemory = nameValue("lastzMemory", lastzMemory, int)
    if targetSequenceFiles != None: 
        targetSequenceFiles = " ".join(targetSequenceFiles)
    targetSequenceFiles = nameValue("targetSequenceFiles", targetSequenceFiles, quotes=True)
    command = "cactus_blast.py %s  --cigars %s %s %s %s %s %s %s %s --jobTree %s --logLevel %s" % \
            (" ".join(sequenceFiles), outputFile,
             chunkSize, overlapSize, blastString, selfBlastString, compressFiles, 
             lastzMemory, targetSequenceFiles, jobTreeDir, logLevel)
    logger.info("Running command : %s" % command)
    system(command)
    logger.info("Ran the cactus_blast command okay")
コード例 #23
0
def split_fasta(input_fasta, work_dir):
    out_root = os.path.join(work_dir, "out") + '/'
    os.mkdir(out_root)
    system("faSplit byname {input} {out_root}".format(
        input=input_fasta,
        out_root=out_root))
    return glob(os.path.join(work_dir, "out/*"))
コード例 #24
0
ファイル: aa_events_prob.py プロジェクト: ngannguyen/aimseqtk
    def run(self):
        #self.logToMaster("Get_Vjins")
        tempdir = "%s_tempdir" % os.path.splitext(self.outfile)[0]
        system("mkdir -p %s" % tempdir)

        model = pickle.load(gzip.open(self.modelfile, 'rb'))
        items = self.clone.split('_')
        v = items[0]
        j = items[2]
        batchsize = 100000

        for vdel in self.vdels:
            v_cdr3_nt = self.v_nt if vdel == 0 else self.v_nt[: -1 * vdel]
            v_hang = len(v_cdr3_nt) % 3
            for jdel in self.jdels:
                j_cdr3_nt = self.j_nt if jdel == 0 else self.j_nt[jdel: ]
                d_nts = self.devent.left_nts + self.devent.right_nts
                vjins_nts = get_vjins_emptyd(self.v_nt, vdel, self.j_nt, jdel,
                                             d_nts, self.cdr3_aa)
                if vjins_nts is None:
                    continue
                
                #self.logToMaster("Empty D: vdel: %d, jdel: %d, vjins: %d\n" % (vdel, jdel, len(vjins_nts)))
                numbatches = len(vjins_nts) / batchsize
                if len(vjins_nts) % batchsize > 0:
                    numbatches += 1
                for index in xrange(numbatches):
                    outfile = os.path.join(tempdir, "%d_%d_%d" % (vdel, jdel, index))
                    endindex = min(len(vjins_nts), (index + 1) * batchsize)
                    batch_vjins_nts = vjins_nts[index * batchsize: endindex]
                    self.addChildTarget(Get_Vjins_Batch(batch_vjins_nts, v,
                          v_hang, v_cdr3_nt, j, j_cdr3_nt, self.d, d_nts,
                          self.cdr3_aa, vdel, jdel, self.devent.d5del,
                          self.devent.d3del, model, outfile))
        self.setFollowOnTarget(Get_Vd_Dj_Ins_Agg3(tempdir, self.outfile))
コード例 #25
0
ファイル: sample.py プロジェクト: ngannguyen/aimseqtk
 def run(self):
     if os.path.exists(self.outfile):
         system("rm -f" % self.outfile)
     for batch in os.listdir(self.indir):
         batchfile = os.path.join(self.indir, batch)
         clones = pickle.load(gzip.open(batchfile, "rb"))
         write_clones(self.outfile, clones, True)
コード例 #26
0
ファイル: aa_events_prob.py プロジェクト: ngannguyen/aimseqtk
    def run(self):
        #self.logToMaster("Get_Vd_Dj_Ins") 
        model = pickle.load(gzip.open(self.modelfile, "rb"))
        ins_vds = []
        for vd in model.ins_vd.keys():
            if vd >= 0:
                ins_vds.append(vd)
        ins_djs = []
        for dj in model.ins_dj.keys():
            if dj >= 0:
                ins_djs.append(dj)

        vdir = os.path.join(self.outdir, "vdels")
        system("mkdir -p %s" % vdir)
        for vdel in self.vdels:
            voutfile = os.path.join(vdir, str(vdel))
            self.addChildTarget(Get_Ins(get_vdins_events, vdel, self.v_nt,
                             self.devent, self.cdr3_aa, voutfile, ins_vds))
        jdir = os.path.join(self.outdir, 'jdels')
        system("mkdir -p %s" % jdir)
        for jdel in self.jdels:
            joutfile = os.path.join(jdir, str(jdel))
            self.addChildTarget(Get_Ins(get_djins_events, jdel, self.j_nt,
                             self.devent, self.cdr3_aa, joutfile, ins_djs))
        
        outfile = os.path.join(self.outdir, "events")
        self.setFollowOnTarget(Get_Vd_Dj_Ins_Agg(self.clone, vdir, jdir,
                self.v_nt, self.j_nt, self.d, self.d_nt, self.devent, outfile,
                self.modelfile))
コード例 #27
0
    def run(self):
        system("mkdir -p %s" % self.outdir)
        clone2sams = read_clone_file(self.clone_file, True)
        if os.path.isdir(self.model):
            model = rcommon.get_median_model(self.model)
        else:
            model = pickle.load(gzip.open(self.model, "rb"))
        sam2total, group2sams = read_clonesize(self.numclone_file)
        len2llh = read_llh(self.lenllh, intkey=True)
        clone2llh = read_llh(self.clonellh)

        global_dir = self.getGlobalTempDir()
        lencount_dir = os.path.join(global_dir, "sam2len2count")
        system("mkdir -p %s" % lencount_dir)
        for s in os.listdir(self.db_dir):
            samdir = os.path.join(self.db_dir, s)
            lencount_file = os.path.join(lencount_dir, s)
            self.addChildTarget(GetLencount(samdir, lencount_file))
        self.setFollowOnTarget(
            GetLlhs(
                clone2sams,
                self.outdir,
                model,
                lencount_dir,
                group2sams,
                self.ingroup,
                self.outgroup,
                len2llh,
                clone2llh,
            )
        )
コード例 #28
0
ファイル: allTests.py プロジェクト: sorrywm/mafTools
 def test_bedParsing(self):
     """ mafComparator should parse a bed file and use the intervals for testing
     """
     for maf1, maf2, bed, totalTrue, totalTrueInInterval in self.knownValues:
         if not os.path.exists('tempTestFiles'):
             os.mkdir('tempTestFiles')
         f = open(self.maf1path, 'w')
         f.write('%s%s%s' % (self.header, maf1, self.footer))
         f.close()
         f = open(self.maf2path, 'w')
         f.write('%s%s%s' % (self.header, maf2, self.footer))
         f.close()
         f = open(self.bedpath, 'w')
         f.write('%s' % bed)
         f.close()
         cmd = ['mafComparator']
         cmd.append('--mafFile1=%s' % self.maf1path)
         cmd.append('--mafFile2=%s' % self.maf2path)
         cmd.append('--outputFile=%s' % os.path.join('tempTestFiles', 'output.xml'))
         if bed != '':
             cmd.append('--bedFiles=%s' % os.path.join('tempTestFiles', 'bed.bed'))
         cmd.append('--sampleNumber=1000 --logLevel %s' % getLogLevelString())
         system(" ".join(cmd))
         tree = ET.parse(os.path.join('tempTestFiles', 'output.xml'))
         homTests = tree.findall('homologyTests')
         self.assertAlmostEquals(totalTrue, 
                                 float(homTests[0].find('aggregateResults').find('all').attrib['totalTrue']))
         if totalTrueInInterval is None:
             self.assertEqual(None, homTests[0].find('aggregateResults').find('A'))
         else:
             self.assertAlmostEquals(totalTrueInInterval, 
                                     float(homTests[0].find('aggregateResults').find('A').attrib['totalTrue']))
         shutil.rmtree(os.path.dirname(self.maf1path))
コード例 #29
0
ファイル: abstractMapper.py プロジェクト: isovic/marginAlign
 def realignSamFile(self):
     """Chains and then realigns the resulting global alignments.
     """
     tempSamFile = os.path.join(self.getGlobalTempDir(), "temp.sam")
     system("cp %s %s" % (self.outputSamFile, tempSamFile))
     self.addChildTargetFn(realignSamFileTargetFn, args=(tempSamFile, self.outputSamFile, 
                                                         self.readFastqFile, self.referenceFastaFile, self.options))
コード例 #30
0
ファイル: aa_events_prob.py プロジェクト: ngannguyen/aimseqtk
    def run(self):
        #self.logToMaster("Get_Vd_Dj_Ins_Agg")
        tempdir = "%s_tempdir" % os.path.splitext(self.outfile)[0]
        system("mkdir -p %s" % tempdir)
        items = self.clone.split('_')
        v = items[0]
        cdr3_aa = items[1]
        j = items[2]
        if self.devent.d3del == 0:
            d_cdr3_nt = self.d_nt[self.devent.d5del: ]
        else:
            d_cdr3_nt = self.d_nt[self.devent.d5del: -1 * self.devent.d3del]

        for vdelname in os.listdir(self.vdir):
            vfile = os.path.join(self.vdir, vdelname)
            vdel = int(vdelname.split("_batch")[0])
            v_cdr3_nt = self.v_nt if vdel == 0 else self.v_nt[: -1 * vdel]
            for jdelname in os.listdir(self.jdir):
                jfile = os.path.join(self.jdir, jdelname)
                jdel = int(jdelname.split("_batch")[0])
                j_cdr3_nt = self.j_nt if jdel == 0 else self.j_nt[jdel: ]

                outfile = os.path.join(tempdir, "%s_%s" % (vdelname, jdelname))
                self.addChildTarget(Get_Vd_Dj_Ins_Agg2(v, vdel, vfile,
                         v_cdr3_nt, j, jdel, jfile, j_cdr3_nt, self.d,
                         self.devent.d5del, self.devent.d3del, d_cdr3_nt,
                         cdr3_aa, self.modelfile, outfile))
        self.setFollowOnTarget(Get_Vd_Dj_Ins_Agg3(tempdir, self.outfile))
コード例 #31
0
    ##Record time to run
    baseRuntime = runNaiveBlast(seqFile1, seqFile2, tempOutputFile, 
                  lastzOptions="--ambiguous=iupac,100 --ydrop=3000")
    results1 = loadResults(tempOutputFile)
    logger.info("Loaded first results")
    
    for setting in settings:
        #Run the blast
        ##Record time to run
        runtime = runNaiveBlast(seqFile1, seqFile2, tempOutputFile2,
                      lastzOptions=setting)
        
        #Now compare the results
        results2 = loadResults(tempOutputFile2)
        logger.info("Loaded second results")
        
        def fm(f):
            return "%.5f" % float(f)
        
        def fm2(f):
            return str(int(f))
        
        resultsComparator = ResultComparator(results1, results2)
        print((",".join([ species1, species2, "_".join(("_".join(setting.split())).split(",")), fm(distance), fm(resultsComparator.sensitivity),
                         fm(resultsComparator.specificity),
                         fm2(resultsComparator.intersectionSize), fm2(resultsComparator.unionSize),
                         fm2(resultsComparator.trueDifference), fm2(resultsComparator.predictedDifference),
                         fm2(resultsComparator.trueHits), fm2(resultsComparator.predictedHits), fm2(resultsComparator.trueHits -resultsComparator.predictedHits), fm(baseRuntime), fm(runtime) ])))
        
system("rm -rf %s" % tempDir)
コード例 #32
0
ファイル: benchMark.py プロジェクト: 5l1v3r1/hal-1
def runHalCons(halPath, outputPath):
    system("halCons %s > outputPath" % halPath)
コード例 #33
0
ファイル: benchMark.py プロジェクト: 5l1v3r1/hal-1
def runHalGen(preset, seed, hdf5Chunk, hdf5Compression, outPath):
    system("halRandGen --preset %s --seed %d --hdf5Chunk %d\
    --hdf5Compression %d %s" % (preset, seed, hdf5Chunk, hdf5Compression, outPath))
コード例 #34
0
 def tearDown(self):
     unittest.TestCase.tearDown(self)
     system("rm -rf %s" % self.tempDir)
     for tempFile in self.tempFiles:
         os.remove(tempFile)
コード例 #35
0
    def run(self):
        system("mv %s/%s-all.xml %s" %
               (self.outdir, "genemapHomolog", self.extraInfoDir))
        system("rm -f %s/%s-*.xml" % (self.outdir, "genemapHomolog"))

        system("mv %s/%s-all.txt %s" %
               (self.outdir, "genemapHomolog", self.extraInfoDir))
        system("rm -f %s/%s*.txt" % (self.outdir, "genemapHomolog"))

        system("mv %s/%s-*.xml %s" %
               (self.outdir, "genemapChain", self.extraInfoDir))
        system("mv %s/%s %s" % (self.outdir, "gene2chain", self.extraInfoDir))
コード例 #36
0
ファイル: treeCommon.py プロジェクト: robsyme/hal
def checkHalTree(halfile, outdir, options):
    treefile = os.path.join(outdir, "haltree.nw")
    system("halStats --tree %s > %s" % (halfile, treefile))
    tree = Phylo.read(treefile, "newick")
    options.treeFile = treefile
    options.tree = tree
コード例 #37
0
ファイル: groupExclusiveRegions.py プロジェクト: wcipriet/hal
 def run(self):
     system("rm %s/*bed" % self.cladedir)
コード例 #38
0
def dless(target, split_ss_path, gff_path, model):
    """
    Main function for running dless. Strips all headers out of final gff.
    """
    system('dless {} {} | sed "/^#/ d" > {}'.format(split_ss_path, model,
                                                    gff_path))
コード例 #39
0
ファイル: common.py プロジェクト: tmfarrell/ont_dap
def runJobTreeStatusAndFailIfNotComplete(jobTreeDir):
    command = "jobTreeStatus --jobTree %s --failIfNotComplete --verbose" % jobTreeDir
    system(command)
コード例 #40
0
ファイル: common.py プロジェクト: tmfarrell/ont_dap
def runJobTreeStats(jobTree, outputFile):
    system("jobTreeStats --jobTree %s --outputFile %s" % (jobTree, outputFile))
    logger.info("Ran the job-tree stats command apparently okay")
コード例 #41
0
def getChromSizesFromHal(halfile, genome, outfile):
    system("halStats --chromSizes %s %s > %s" % (genome, halfile, outfile))
コード例 #42
0
    def run(self):
        #localTempDir = getTempFile(rootDir=self.getGlobalTempDir())
        localTempDir = self.getLocalTempDir()
        config = os.path.join(localTempDir, "cactus_workflow_config.xml")
        system("cp %s %s" %
               (self.config, config))  #Copy the config file to local disk

        #Copy sequences to localTempDir:
        localSeqdir = os.path.join(localTempDir, "data")
        system("mkdir -p %s" % localSeqdir)
        for spc in self.species.split():
            currseqdir = os.path.join(self.seqdir, spc)
            system("cp -r %s %s" % (currseqdir, localSeqdir))

        #Make dir for this region if not already existed
        #system("rm -fR %s" %self.region)
        system("mkdir -p %s" % os.path.join(os.getcwd(), self.region))

        #Write experiment.xml for this region:
        experimentFile = os.path.join(localTempDir, "experiment.xml")
        writeExpCommand = "cactus_writeExperimentXml.py --species \"%s\" --tree \"%s\" --output %s --sequenceDir %s --config %s --databaseString %s"\
                          %(self.species, self.tree, experimentFile, localSeqdir, config, self.dbStr)
        system("%s" % writeExpCommand)
        system("cp %s %s" %
               (experimentFile,
                os.path.join(os.getcwd(), self.region, "experiment.xml")))
        logger.info("Got experiment.xml file for %s with command: %s\n" %
                    (self.region, writeExpCommand))

        #Now ready to runCactus:
        batchSystem = "singleMachine"
        jobTree = os.path.join(localTempDir, "jobTree")
        cactusCommand = "cactus_workflow.py --stats --batchSystem %s --experiment %s --buildReference --setupAndBuildAlignments --logDebug --jobTree %s" \
                        %(batchSystem, experimentFile, jobTree)
        logger.info("Going to run cactus now, the command is %s" %
                    cactusCommand)
        system("%s" % cactusCommand)
        system("cp -r %s %s" %
               (jobTree, os.path.join(os.getcwd(), self.region, "jobTree")))
        logger.info("Done cactusRun for %s\n" % self.region)

        #Run genemapChain:
        self.addChildTarget(
            RunGenemapChain(self.region, self.dbStr, self.options.outdir,
                            self.options.refSpecies, self.genedir))
        self.addChildTarget(
            RunGenemapHomolog(self.region, self.dbStr, self.options.outdir,
                              self.options.refSpecies, self.genedir))
コード例 #43
0
ファイル: test.py プロジェクト: zoumingr/cactus
def runWorkflow_TestScript(testId,
                           sequences,
                           newickTreeString,
                           outputDir=None,
                           batchSystem="single_machine",
                           buildAvgs=False,
                           buildHal=False,
                           buildFasta=False,
                           configFile=None,
                           buildToilStats=False,
                           constraints=None,
                           progressive=False,
                           cactusWorkflowFunction=runCactusWorkflow,
                           logLevel=None):
    """Runs the workflow and various downstream utilities.
    The testId parameter is used to allocate a unique port so that tests
    can run in parallel.
    """
    logger.info("Running cactus workflow test script")
    logger.info("Got the following sequence dirs/files: %s" %
                " ".join(sequences))
    logger.info("Got the following tree %s" % newickTreeString)

    #Setup the output dir
    assert outputDir != None
    logger.info("Using the output dir: %s" % outputDir)

    #Setup the flower disk.
    experiment = getCactusWorkflowExperimentForTest(testId,
                                                    sequences,
                                                    newickTreeString,
                                                    outputDir=outputDir,
                                                    configFile=configFile,
                                                    constraints=constraints,
                                                    progressive=progressive)
    experimentFile = os.path.join(outputDir, "experiment.xml")
    experiment.writeXML(experimentFile)
    logger.info("The experiment file %s\n" % experimentFile)

    #Setup the job tree dir.
    toilDir = os.path.join(outputDir, "toil")
    logger.info("Got a job tree dir for the test: %s" % toilDir)

    #Run the actual workflow
    cactusWorkflowFunction(experimentFile,
                           toilDir,
                           batchSystem=batchSystem,
                           buildAvgs=buildAvgs,
                           buildHal=buildHal,
                           buildFasta=buildFasta,
                           toilStats=buildToilStats,
                           logLevel=logLevel)
    logger.info("Ran the the workflow")
    #Now run various utilities..
    if buildToilStats:
        toilStatsFile = os.path.join(outputDir, "toilStats.xml")
        runToilStats(toilDir, toilStatsFile)

    #Now remove everything we generate
    system("rm -rf %s %s" % (toilDir, experimentFile))

    #Return so calling function can cleanup
    return experiment
コード例 #44
0
ファイル: allTests.py プロジェクト: zegheim/sonLib
 def testSonLibCTests(self):
     """Run m,ost the sonLib CuTests, fail if any of them fail.
     """
     system("sonLibTests %s" % getLogLevelString())
コード例 #45
0
 def testMaf(self):
     """Run all the api CuTests, fail if any of them fail.
     """
     system("halMafTests")
コード例 #46
0
 def run(self):
     f = open(self.outfile, 'w')
     f.write("#Name\tLength\tMap\tIns\tDels\tOO\tInframe\n")
     f.close()
     system("cat %s/* >> %s" % (self.indir, self.outfile))
コード例 #47
0
 def tearDown(self):
     unittest.TestCase.tearDown(self)
     system("rm -rf %s" % self.tempDir)
コード例 #48
0
    def run(self):
        regions = getList(self.options.regions)
        genemapChainXmls = []  #list of all genemapChain output Xmls
        genemapHomologXmls = []  #list of all genemapHomology output Xmls
        for r in regions:
            genemapChainXmls.append(
                os.path.join(self.output, "%s-%s.xml" % ("genemapChain", r)))
            genemapHomologXmls.append(
                os.path.join(self.output, "%s-%s.xml" % ("genemapHomolog", r)))

    #Directory of more details information if interested
        extraInfoDir = os.path.join(self.output, "extraInfo")
        system("mkdir -p %s" % extraInfoDir)
        system("chmod ug+xrw %s" % extraInfoDir)

        #Merge homologXmls of all regions:
        allHomologXml = "%s/%s-all.xml" % (self.output, "genemapHomolog")
        mergeXmls(genemapHomologXmls, allHomologXml)

        genemapHomolog = "%s/%s-*.txt" % (self.output, "genemapHomolog")
        allHomolog = "%s/%s-all.txt" % (self.output, "genemapHomolog")
        system("rm -f %s" % allHomolog)
        system("cat %s > %s" % (genemapHomolog, allHomolog))

        #geneToChain = "%s/%s" %(extraInfoDir, "gene2chain")
        geneToChain = "%s/%s" % (self.output, "gene2chain")

        genemapChainCommand = "genemapChain.py -o %s -c \"%s\" -i \"%s\" > %s" %(extraInfoDir, "cat",\
                               " ".join(genemapChainXmls), geneToChain)
        system("%s" % genemapChainCommand)

        chainMergeHomolog = "%s/%s" % (extraInfoDir, "chainMergeHomolog")
        chainMergeHomologTex = "%s/%s" % (self.output, "chainVsDup.tex")
        #chainMergeHomologTex = chainMergeHomolog + ".tex"
        missedGenes = "%s/%s" % (extraInfoDir, "missedGenes")
        genemapMergeCommand = "genemapMerge.py -f c -n %s %s %s %s %s > %s" %(self.options.runName, \
                               allHomolog, geneToChain, chainMergeHomolog, chainMergeHomologTex, missedGenes)
        system("%s" % genemapMergeCommand)

        homologCmp = "%s/%s" % (self.output, "homologCmp")
        homologCmpTex = "%s/%s" % (self.output, "homologCmp.tex")
        homologCmpV = "%s/%s" % (extraInfoDir, "homologCmpV")
        cactusVsMultizCommand = "genemapCactusVsMultiz.py -a %s -d %s %s %s %s > %s" %(extraInfoDir + "/perSpcDiff", \
                        self.options.geneDir + "/all.tx", self.options.multiz, allHomologXml, homologCmp, homologCmpV)
        system("%s" % cactusVsMultizCommand)

        makeLatexTabCommand = "genemapMakeLatexTab.py -s \"%s\" -n %s %s %s" \
                               %(self.species, self.options.runName, homologCmp, homologCmpTex)
        system("%s" % makeLatexTabCommand)

        #Cleanup now...
        self.setFollowOnTarget(Cleanup(self.output, extraInfoDir))
コード例 #49
0
def runEvalMFAToMAF(mfa, maf):
    command = "mfaToMaf -b %s -d %s --logLevel DEBUG" % (mfa, maf)
    system(command)
    logger.info("Converted MFA %s to MAF %s\n" % (mfa, maf))
コード例 #50
0
 def run(self):
     system("rm -rf %s" % self.dir)
     logger.info("Clean up tempDir for next run\n")
コード例 #51
0
def runEvalMAFComparator(mafFile1, mafFile2, outputFile, sampleNumber):
    command = "mafComparator -b %s -c %s -d %s -e %s" % (
        mafFile1, mafFile2, outputFile, sampleNumber)
    system(command)
    logger.info("Compared MAF %s with MAF %s\n" % (mafFile1, mafFile2))
コード例 #52
0
ファイル: groupExclusiveRegions.py プロジェクト: robsyme/hal
 def run(self):
     bedfile = os.path.join(self.cladedir, "%s.bed" %self.target)
     system("halLiftover %s %s %s %s %s" %(self.halfile, self.query, self.queryBed, self.target, bedfile))
     #Convert to big bed:
     bigbedfile = os.path.join(self.cladedir, "%s.bb" %self.target)
     system("bedToBigBed %s %s %s" %(bedfile, self.chrsizefile, bigbedfile))
コード例 #53
0
    def run(self):
        options = self.options
        localHalfile = os.path.join(self.outdir,
                                    os.path.basename(self.halfile))
        if os.path.abspath(localHalfile) != os.path.abspath(self.halfile):
            if os.path.exists(localHalfile):
                system("rm %s" % localHalfile)
            if options.cpHal:
                system("cp %s %s" %
                       (os.path.abspath(self.halfile), localHalfile))
            else:
                system("ln -s %s %s" %
                       (os.path.abspath(self.halfile), localHalfile))

        #Create lod files if useLod is specified
        lodtxtfile, loddir = getLod(options, localHalfile, self.outdir)

        #Get the maximum window size to display SNPs
        if lodtxtfile:
            snpwidth = getLodLowestLevel(lodtxtfile) - 1
            if snpwidth > -1:
                options.snpwidth = snpwidth

        genomes = sortByProperName(self.genomes, self.options.properName)

        #Create documentation files:
        docdir = os.path.join(self.outdir, "documentation")
        system("mkdir -p %s" % docdir)
        writeDocFiles(docdir, self.options)

        #Create genomes.txt file
        filename = os.path.join(self.outdir, "genomes.txt")
        f = open(filename, 'w')
        #for genome in self.genomes:
        for genome in genomes:
            genomedir = os.path.join(self.outdir, genome)
            f.write("genome %s\n" % genome)
            f.write("twoBitPath %s/%s.2bit\n" % (genome, genome))

            #create trackDb for the current genome:
            if lodtxtfile == '':
                self.addChildTarget(
                    WriteTrackDbFile(self.genomes,
                                     "../%s" % os.path.basename(self.halfile),
                                     genomedir, options))
            else:
                self.addChildTarget(
                    WriteTrackDbFile(self.genomes,
                                     "../%s" % os.path.basename(lodtxtfile),
                                     genomedir, options))
            f.write("trackDb %s/trackDb.txt\n" % genome)

            #other info
            f.write("groups groups.txt\n")

            writeDescriptionFile(genome, genomedir)
            f.write("htmlPath %s/description.html\n" % genome)
            f.write("description %s\n" %
                    getProperName(genome, self.options.properName))
            f.write("organism %s\n" %
                    getProperName(genome, self.options.properName))
            f.write("orderKey 4800\n")
            f.write("scientificName %s\n" % genome)

            seq2len = self.genome2seq2len[genome]
            (seq, l) = getLongestSeq(seq2len)
            f.write("defaultPos %s:1-%d\n" % (seq, min(l, 1000)))
            f.write("\n")
        f.close()
コード例 #54
0
ファイル: blastTest.py プロジェクト: zqingyuan/cactus
 def tearDown(self):
     for tempFile in self.tempFiles:
         if os.path.exists(tempFile):
             os.remove(tempFile)
     unittest.TestCase.tearDown(self)
     system("rm -rf %s" % self.tempDir)
コード例 #55
0
    def run(self):
        #GC content & Alignability
        for genome in self.genomes:
            genomedir = os.path.join(self.outdir, genome)
            if self.options.gcContent:
                self.addChildTarget(GetGCpercent(
                    genomedir, genome))  #genomedir/genome.gc.bw
            if self.options.alignability:
                self.addChildTarget(
                    GetAlignability(
                        genomedir, genome,
                        self.halfile))  #genomedir/genome.alignability.bw

        #Compute conservation track:
        if self.options.conservation:
            #if self.options.conservation or self.options.conservationDir:
            conservationDir = os.path.join(self.outdir, "conservation")
            if not self.options.conservationDir:
                system("mkdir -p %s" % conservationDir)
                self.addChildTarget(
                    GetConservationFiles(self.halfile, conservationDir,
                                         self.options))
            else:
                if os.path.abspath(self.options.conservationDir
                                   ) != os.path.abspath(conservationDir):
                    system("ln -s %s %s" % (os.path.abspath(
                        self.options.conservationDir), conservationDir))
                    #system("cp -r %s %s" %(self.options.conservationDir, conservationDir))

        #Make bed tracks:
        preprocessAnnotationInputs(self.options, self.outdir, "bed")
        self.addChildTarget(
            MakeAnnotationTracks(self.options, self.outdir, self.halfile,
                                 self.genome2seq2len, "bed"))

        #Make bed2 tracks:
        preprocessAnnotationInputs(self.options, self.outdir, "bed2")
        self.addChildTarget(
            MakeAnnotationTracks(self.options, self.outdir, self.halfile,
                                 self.genome2seq2len, "bed2"))

        #Make wig tracks:
        preprocessAnnotationInputs(self.options, self.outdir, "wig")
        self.addChildTarget(
            MakeAnnotationTracks(self.options, self.outdir, self.halfile,
                                 self.genome2seq2len, "wig"))

        #Make clade-exclusive tracks:
        if self.options.tree and self.options.cladeExclusive:
            self.addChildTarget(
                GetCladeExclusiveRegions(
                    self.halfile, self.options.tree,
                    os.path.join(self.outdir, "liftoverbeds"),
                    self.options.maxOut, self.options.minIn))
            self.options.bigbeddirs.append(
                os.path.join(self.outdir, "liftoverbeds", "CladeExclusive"))

        #Get LOD if needed, and Write trackDb files
        self.setFollowOnTarget(
            WriteGenomesFile(self.genomes, self.genome2seq2len, self.halfile,
                             self.options, self.outdir))
コード例 #56
0
 def testCuTest(self):
     system("matchingAndOrderingTests %s" % getLogLevelString())
コード例 #57
0
 def testMarginStats(self):
     system("%s %s %s %s --readIdentity --alignmentIdentity --mismatchesPerAlignedBase --readCoverage \
     --deletionsPerReadBase --insertionsPerReadBase --printValuePerReadAlignment"                                                                                     % \
     (self.marginStats, self.inputSamFile1, self.readFastqFile1, self.referenceFastaFile1))
コード例 #58
0
 def tearDown(self):
     unittest.TestCase.tearDown(self)
     # Clean up
     system("rm -rf %s %s %s %s" % (self.outputSamFile, self.outputHmmFile,
                                    self.outputVcfFile, self.jobTree))
コード例 #59
0
def linkTwoBitSeqFile(genome, twobitdir, outdir):
    twobitfile = os.path.join(outdir, "%s.2bit" %genome)
    intwobitfile = os.path.abspath( os.path.join(twobitdir, "%s.2bit" %genome) )
    if not os.path.exists(twobitfile):
        system("ln -s %s %s" %(intwobitfile, twobitfile))
コード例 #60
0
ファイル: blastTest.py プロジェクト: zqingyuan/cactus
    def testProgressiveOutgroupsVsAllOutgroups(self):
        """Tests the difference in outgroup coverage on an ingroup when
        running in "ingroups vs. outgroups" mode and "set against set"
        mode.
        """
        encodeRegion = "ENm001"
        ingroup = "human"
        outgroups = ["macaque", "rabbit", "dog"]
        regionPath = os.path.join(self.encodePath, encodeRegion)
        ingroupPath = os.path.join(regionPath,
                                   ingroup + "." + encodeRegion + ".fa")
        outgroupPaths = map(
            lambda x: os.path.join(regionPath, x + "." + encodeRegion + ".fa"),
            outgroups)
        # Run in "set against set" mode, aligning the entire ingroup
        # vs each outgroup
        runCactusBlast([ingroupPath],
                       alignmentsFile=self.tempOutputFile,
                       toilDir=os.path.join(self.tempDir, "setVsSetToil"),
                       chunkSize=500000,
                       overlapSize=10000,
                       targetSequenceFiles=outgroupPaths)
        # Run in "ingroup vs outgroups" mode, aligning the ingroup vs
        # the outgroups in order, trimming away sequence that's
        # already been aligned.
        runCactusBlastIngroupsAndOutgroups([ingroupPath],
                                           outgroupPaths,
                                           alignmentsFile=self.tempOutputFile2,
                                           toilDir=os.path.join(
                                               self.tempDir, "outgroupToil"))

        # Get the coverage on the ingroup, in bases, from each run.
        coverageSetVsSetUnfiltered = getTempFile(rootDir=self.tempDir)
        calculateCoverage(sequenceFile=ingroupPath,
                          cigarFile=self.tempOutputFile,
                          outputFile=coverageSetVsSetUnfiltered)
        coverageSetVsSet = int(
            popenCatch(
                "cat %s | awk '{ total +=  $3 - $2} END { print total }'" %
                coverageSetVsSetUnfiltered))
        coverageIngroupVsOutgroupsUnfiltered = getTempFile(
            rootDir=self.tempDir)
        calculateCoverage(sequenceFile=ingroupPath,
                          cigarFile=self.tempOutputFile2,
                          outputFile=coverageIngroupVsOutgroupsUnfiltered)
        coverageIngroupVsOutgroups = int(
            popenCatch(
                "cat %s | awk '{ total +=  $3 - $2} END { print total }'" %
                coverageIngroupVsOutgroupsUnfiltered))

        print "total coverage on human (set vs set mode, %d outgroups): %d" % (
            len(outgroups), coverageSetVsSet)
        print "total coverage on human (ingroup vs outgroup mode, %d outgroups): %d" % (
            len(outgroups), coverageIngroupVsOutgroups)

        # Make sure we're getting a reasonable fraction of the
        # alignments when using the trimming strategy.
        self.assertTrue(
            float(coverageIngroupVsOutgroups) / coverageSetVsSet >= 0.95)

        # Get the coverage on the ingroup, in bases, from just the
        # last outgroup. Obviously this should be much higher in set
        # vs set mode than in ingroup vs outgroup mode.
        outgroupAlignments = getTempFile(rootDir=self.tempDir)
        system("grep %s %s > %s" %
               (outgroups[-1], self.tempOutputFile, outgroupAlignments))
        coverageFileSetVsSet = getTempFile(rootDir=self.tempDir)
        calculateCoverage(sequenceFile=ingroupPath,
                          cigarFile=outgroupAlignments,
                          outputFile=coverageFileSetVsSet)

        coverageFromLastOutgroupSetVsSet = int(
            popenCatch(
                "cat %s | awk '{ total +=  $3 - $2} END { print total }'" %
                coverageFileSetVsSet))

        outgroupAlignments = getTempFile(rootDir=self.tempDir)
        system("grep %s %s > %s" %
               (outgroups[-1], self.tempOutputFile2, outgroupAlignments))
        coverageFileInVsOut = getTempFile(rootDir=self.tempDir)
        calculateCoverage(sequenceFile=ingroupPath,
                          cigarFile=outgroupAlignments,
                          outputFile=coverageFileInVsOut)
        coverageFromLastOutgroupInVsOut = int(
            popenCatch(
                "cat %s | awk '{ total +=  $3 - $2} END { print total }'" %
                coverageFileInVsOut))

        print "total coverage on human from last outgroup in set (%s) (set vs set mode): %d" % (
            outgroups[-1], coverageFromLastOutgroupSetVsSet)
        print "total coverage on human from last outgroup in set (%s) (ingroup vs outgroup mode): %d" % (
            outgroups[-1], coverageFromLastOutgroupInVsOut)

        self.assertTrue(
            float(coverageFromLastOutgroupInVsOut) /
            coverageFromLastOutgroupSetVsSet <= 0.10)