コード例 #1
0
 def run(self):
     ##########################################
     #Setup a file tree.
     ##########################################
         
     tempFileTree = TempFileTree(os.path.join(self.getGlobalTempDir(), getRandomAlphaNumericString()))   
     
     fileTreeRootFile = tempFileTree.getTempFile()
 
     makeFileTree(fileTreeRootFile, \
                  self.depth, tempFileTree)
     
     treePointer = tempFileTree.getTempFile()
     
     makeTreePointer(fileTreeRootFile, treePointer)
     
     logger.info("We've set up the file tree")
     
     if random.random() > 0.5:
         raise RuntimeError()
     
     ##########################################
     #Issue the child and follow on jobs
     ##########################################
     
     self.addChildTarget(ChildTarget(treePointer))
     
     self.setFollowOnTarget(DestructFileTree(tempFileTree))
     
     logger.info("We've added the child target and finished SetupFileTree.run()")
コード例 #2
0
def build_hints(target, filtered_bam_tree, genome, db_path, genome_fasta, hints_dir):
    """
    Driver function for hint building. Builts intron and exon hints, then calls cat_hints to do final concatenation
    and sorting.
    """
    bam_files = [x for x in filtered_bam_tree.listFiles() if x.endswith("bam")]
    intron_hints_tree = TempFileTree(get_tmp(target, global_dir=True, name="intron_hints_tree"))
    exon_hints_tree = TempFileTree(get_tmp(target, global_dir=True, name="exon_hints_tree"))
    for bam_file in bam_files:
        intron_hints_path = intron_hints_tree.getTempFile(suffix=".intron.gff")
        target.addChildTargetFn(build_intron_hints, memory=8 * 1024 ** 3, cpu=2, args=[bam_file, intron_hints_path])
        exon_hints_path = exon_hints_tree.getTempFile(suffix=".exon.gff")
        target.addChildTargetFn(build_exon_hints, memory=8 * 1024 ** 3, cpu=2, args=[bam_file, exon_hints_path])
    target.setFollowOnTargetFn(cat_hints, args=[intron_hints_tree, exon_hints_tree, genome, db_path, genome_fasta,
                                                hints_dir])
コード例 #3
0
 def run(self):
     tempFileTree = TempFileTree(os.path.join(self.getGlobalTempDir(), "allAgainstAllResults"))
     #Make the list of blast jobs.
     for i in xrange(0, len(self.chunks)):
         for j in xrange(i+1, len(self.chunks)):
             resultsFile = tempFileTree.getTempFile()
             self.resultsFiles.append(resultsFile)
             self.addChildTarget(RunBlast(self.blastOptions, self.chunks[i], self.chunks[j], resultsFile))
     logger.info("Made the list of all-against-all blasts")
     #Set up the job to collate all the results
     self.setFollowOnTarget(CollateBlasts(self.finalResultsFile, self.resultsFiles))
コード例 #4
0
def single_copy_wrapper(target, args):
    """
    Main pipeline wrapper. Runs halSingleCopyRegionsExtract once for each region in the conserved_bed file.
    """
    bed_recs = [x.split()[:3] for x in open(args.conserved_bed)]
    result_dir = target.getGlobalTempDir()
    result_tree = TempFileTree(result_dir)
    for chunk in grouper(bed_recs, 10):
        result_path = result_tree.getTempFile()
        target.addChildTargetFn(find_single_copy,
                                args=(args, chunk, result_path))
    target.setFollowOnTargetFn(cat_results,
                               args=(args, result_tree.listFiles()))
コード例 #5
0
def extract_maf_wrapper(target, args):
    """
    Main pipeline wrapper. Calls out to hal2maf once for each region in args.conserved_bed
    """
    accelerated_genomes = set(args.accelerated_genomes + [args.ref_genome])
    outgroup_genomes = set(args.target_genomes) - accelerated_genomes
    bed_recs = [x.split() for x in open(args.conserved_bed)]
    result_dir = target.getGlobalTempDir()
    result_tree = TempFileTree(result_dir)
    for chunk in grouper(bed_recs, 50):
        result_path = result_tree.getTempFile()
        target.addChildTargetFn(extract_and_calculate, args=(args, chunk, accelerated_genomes, outgroup_genomes, result_path))
    target.setFollowOnTargetFn(cat_results, args=(args, result_tree.listFiles()))
コード例 #6
0
def dless_wrapper(target, args, split_ss_dict):
    """
    Wrapper for dless function.
    """
    split_ss_dict = read_subalignment_dir(split_ss_path)
    output_gff_tree = TempFileTree(
        os.path.join(target.getGlobalTempDir(), 'output_gff'))
    for chromosome, split_ss_dir in split_ss_dict.iteritems():
        for split_ss in os.listdir(split_ss_dir):
            gff_path = output_gff_tree.getTempFile(suffix=split_ss + '.gff')
            split_ss_path = os.path.join(split_ss_dir, split_ss)
            target.addChildTargetFn(dless,
                                    args=(split_ss_path, gff_path, args.model))
    target.setFollowOnTargetFn(cat_dless, args=(args, output_gff_tree))
コード例 #7
0
def main_hints_fn(target, bam_paths, db_path, genome, genome_fasta, hints_dir):
    """
    Main driver function. Loops over each BAM, inferring paired-ness, then passing each BAM with one chromosome name
    for filtering. Each BAM will remain separated until the final concatenation and sorting of the hint gffs.
    """
    filtered_bam_tree = TempFileTree(get_tmp(target, global_dir=True, name="filter_file_tree"))
    for bam_path in bam_paths:
        paired = "--paired --pairwiseAlignments" if bam_is_paired(bam_path) is True else ""
        sam_handle = pysam.Samfile(bam_path)
        for references in group_references(sam_handle):
            out_filter = filtered_bam_tree.getTempFile(suffix=".bam")
            target.addChildTargetFn(sort_by_name, memory=8 * 1024 ** 3, cpu=2, 
                                    args=[bam_path, references, out_filter, paired])
    target.setFollowOnTargetFn(build_hints, args=[filtered_bam_tree, genome, db_path, genome_fasta, hints_dir])
コード例 #8
0
 def run(self):
     chunks1 = self.getChunks(self.sequenceFiles1, makeSubDir(os.path.join(self.getGlobalTempDir(), "chunks1")))
     chunks2 = self.getChunks(self.sequenceFiles2, makeSubDir(os.path.join(self.getGlobalTempDir(), "chunks2")))
     tempFileTree = TempFileTree(os.path.join(self.getGlobalTempDir(), "allAgainstAllResults"))
     resultsFiles = []
     #Make the list of blast jobs.
     for chunk1 in chunks1:
         for chunk2 in chunks2:
             resultsFile = tempFileTree.getTempFile()
             resultsFiles.append(resultsFile)
             #TODO: Make the compression work
             self.blastOptions.compressFiles = False
             self.addChildTarget(RunBlast(self.blastOptions, chunk1, chunk2, resultsFile))
     logger.info("Made the list of blasts")
     #Set up the job to collate all the results
     self.setFollowOnTarget(CollateBlasts(self.finalResultsFile, resultsFiles))
コード例 #9
0
ファイル: jobTreeTest.py プロジェクト: ArtRand/jobTree
class TestCase(unittest.TestCase):
    
    def setUp(self):
        unittest.TestCase.setUp(self)
        self.jobTreeDir = os.path.join(os.getcwd(), "testJobTree") #A directory for the job tree to be created in
        self.tempFileTreeDir = os.path.join(os.getcwd(), "tempFileTree") #Ensures that file tree is visible
        self.tempFileTree = TempFileTree(self.tempFileTreeDir) #A place to get temp files from
    
    def tearDown(self):
        unittest.TestCase.tearDown(self)
        self.tempFileTree.destroyTempFiles()
        system("rm -rf %s %s" % (self.jobTreeDir, self.tempFileTreeDir)) #Cleanup the job tree in case it hasn't already been cleaned up.
   
    # only done in singleMachine for now.  Experts can run manually on other systems if they choose
    def dependenciesTest(self, batchSystem="singleMachine", furtherOptionsString=""):
        def fn(tree, maxCpus, maxThreads, size, cpusPerJob, sleepTime):
            system("rm -rf %s" % self.jobTreeDir)
            logName = self.tempFileTree.getTempFile(suffix="_comblog.txt", makeDir=False)
            commandLine = "jobTreeTest_Dependencies.py --jobTree %s --logFile %s --batchSystem '%s' --tree %s --maxCpus %s --maxThreads %s --size %s --cpusPerJob=%s --sleepTime %s %s" % \
            (self.jobTreeDir, logName, batchSystem, tree, maxCpus, maxThreads, size, cpusPerJob, sleepTime, furtherOptionsString)
            system(commandLine)
        
        fn("comb", 10, 100, 100, 1, 10)
        fn("comb", 200, 100, 100, 20, 10)
       
        fn("fly", 10, 8, 100, 1, 10)
        fn("fly", 10, 8, 100, 2, 10)
        
        fn("balanced", 5, 10, 100, 1, 10)
        fn("balanced", 5, 10, 100, 3, 10)
        
    def testJobTree_dependencies_singleMachine(self):
        self.dependenciesTest(batchSystem="singleMachine")
        
    def testJobTree_dependencies_combined(self):
        self.dependenciesTest(batchSystem="singleMachine", furtherOptionsString="--bigBatchSystem singleMachine --bigMemoryThreshold 1000000")
        
    def testJobTree_dependencies_parasol(self):
        return
        if parasolIsInstalled():
            self.dependenciesTest(batchSystem="parasol")
            
    def testJobTree_dependencies_gridengine(self):
        return
        if gridEngineIsInstalled():
            self.dependenciesTest(batchSystem="gridengine")
コード例 #10
0
    def testTempFileTree(self):
        for test in range(100):  #self.testNo):
            levels = random.choice(range(1, 4))
            fileNo = random.choice(range(1, 6))
            maxTempFiles = int(math.pow(fileNo, levels))

            print("Got %s levels, %s fileNo and %s maxTempFiles" %
                  (levels, fileNo, maxTempFiles))

            tempFileTreeRootDir = os.path.join(self.tempDir,
                                               getRandomAlphaNumericString())
            tempFileTree = TempFileTree(tempFileTreeRootDir, fileNo, levels)

            tempFiles = []
            tempDirs = []
            #Check we can mac number of temp files.
            for i in range(maxTempFiles):
                if random.random() > 0.5:
                    tempFile = tempFileTree.getTempFile()
                    assert os.path.isfile(tempFile)
                    tempFiles.append(tempFile)
                else:
                    tempFile = tempFileTree.getTempDirectory()
                    assert os.path.isdir(tempFile)
                    tempDirs.append(tempFile)

            #Check assertion is created
            try:
                tempFileTree.getTempFile()
                assert False
            except RuntimeError:
                logger.debug("Got expected error message")

            #Now remove a few temp files
            while random.random() > 0.1 and len(tempFiles) > 0:
                tempFile = tempFiles.pop()
                assert os.path.isfile(tempFile)
                tempFileTree.destroyTempFile(tempFile)
                assert not os.path.isfile(tempFile)

            #Now remove a few temp dirs
            while random.random() > 0.1 and len(tempDirs) > 0:
                tempDir = tempDirs.pop()
                assert os.path.isdir(tempDir)
                tempFileTree.destroyTempDir(tempDir)
                assert not os.path.isdir(tempDir)

            #Check temp files is okay
            set(tempFileTree.listFiles()) == set(tempFiles + tempDirs)

            #Either remove all the temp files or just destroy the whole thing
            if random.random() > 0.5:
                #Remove all temp files and check thing is empty.
                for tempFile in tempFiles:
                    tempFileTree.destroyTempFile(tempFile)
                for tempDir in tempDirs:
                    tempFileTree.destroyTempDir(tempDir)
                os.remove(os.path.join(tempFileTreeRootDir, "lock"))
                os.rmdir(tempFileTreeRootDir)
            else:
                tempFileTree.destroyTempFiles()
                assert not os.path.isdir(tempFileTreeRootDir)