Esempio n. 1
0
def single_copy_wrapper(target, args):
    """
    Main pipeline wrapper. Runs halSingleCopyRegionsExtract once for each region in the conserved_bed file.
    """
    bed_recs = [x.split()[:3] for x in open(args.conserved_bed)]
    result_dir = target.getGlobalTempDir()
    result_tree = TempFileTree(result_dir)
    for chunk in grouper(bed_recs, 10):
        result_path = result_tree.getTempFile()
        target.addChildTargetFn(find_single_copy,
                                args=(args, chunk, result_path))
    target.setFollowOnTargetFn(cat_results,
                               args=(args, result_tree.listFiles()))
Esempio n. 2
0
def extract_maf_wrapper(target, args):
    """
    Main pipeline wrapper. Calls out to hal2maf once for each region in args.conserved_bed
    """
    accelerated_genomes = set(args.accelerated_genomes + [args.ref_genome])
    outgroup_genomes = set(args.target_genomes) - accelerated_genomes
    bed_recs = [x.split() for x in open(args.conserved_bed)]
    result_dir = target.getGlobalTempDir()
    result_tree = TempFileTree(result_dir)
    for chunk in grouper(bed_recs, 50):
        result_path = result_tree.getTempFile()
        target.addChildTargetFn(extract_and_calculate, args=(args, chunk, accelerated_genomes, outgroup_genomes, result_path))
    target.setFollowOnTargetFn(cat_results, args=(args, result_tree.listFiles()))
Esempio n. 3
0
    def testTempFileTree(self):
        for test in range(100):  #self.testNo):
            levels = random.choice(range(1, 4))
            fileNo = random.choice(range(1, 6))
            maxTempFiles = int(math.pow(fileNo, levels))

            print("Got %s levels, %s fileNo and %s maxTempFiles" %
                  (levels, fileNo, maxTempFiles))

            tempFileTreeRootDir = os.path.join(self.tempDir,
                                               getRandomAlphaNumericString())
            tempFileTree = TempFileTree(tempFileTreeRootDir, fileNo, levels)

            tempFiles = []
            tempDirs = []
            #Check we can mac number of temp files.
            for i in range(maxTempFiles):
                if random.random() > 0.5:
                    tempFile = tempFileTree.getTempFile()
                    assert os.path.isfile(tempFile)
                    tempFiles.append(tempFile)
                else:
                    tempFile = tempFileTree.getTempDirectory()
                    assert os.path.isdir(tempFile)
                    tempDirs.append(tempFile)

            #Check assertion is created
            try:
                tempFileTree.getTempFile()
                assert False
            except RuntimeError:
                logger.debug("Got expected error message")

            #Now remove a few temp files
            while random.random() > 0.1 and len(tempFiles) > 0:
                tempFile = tempFiles.pop()
                assert os.path.isfile(tempFile)
                tempFileTree.destroyTempFile(tempFile)
                assert not os.path.isfile(tempFile)

            #Now remove a few temp dirs
            while random.random() > 0.1 and len(tempDirs) > 0:
                tempDir = tempDirs.pop()
                assert os.path.isdir(tempDir)
                tempFileTree.destroyTempDir(tempDir)
                assert not os.path.isdir(tempDir)

            #Check temp files is okay
            set(tempFileTree.listFiles()) == set(tempFiles + tempDirs)

            #Either remove all the temp files or just destroy the whole thing
            if random.random() > 0.5:
                #Remove all temp files and check thing is empty.
                for tempFile in tempFiles:
                    tempFileTree.destroyTempFile(tempFile)
                for tempDir in tempDirs:
                    tempFileTree.destroyTempDir(tempDir)
                os.remove(os.path.join(tempFileTreeRootDir, "lock"))
                os.rmdir(tempFileTreeRootDir)
            else:
                tempFileTree.destroyTempFiles()
                assert not os.path.isdir(tempFileTreeRootDir)