def single_copy_wrapper(target, args): """ Main pipeline wrapper. Runs halSingleCopyRegionsExtract once for each region in the conserved_bed file. """ bed_recs = [x.split()[:3] for x in open(args.conserved_bed)] result_dir = target.getGlobalTempDir() result_tree = TempFileTree(result_dir) for chunk in grouper(bed_recs, 10): result_path = result_tree.getTempFile() target.addChildTargetFn(find_single_copy, args=(args, chunk, result_path)) target.setFollowOnTargetFn(cat_results, args=(args, result_tree.listFiles()))
def extract_maf_wrapper(target, args): """ Main pipeline wrapper. Calls out to hal2maf once for each region in args.conserved_bed """ accelerated_genomes = set(args.accelerated_genomes + [args.ref_genome]) outgroup_genomes = set(args.target_genomes) - accelerated_genomes bed_recs = [x.split() for x in open(args.conserved_bed)] result_dir = target.getGlobalTempDir() result_tree = TempFileTree(result_dir) for chunk in grouper(bed_recs, 50): result_path = result_tree.getTempFile() target.addChildTargetFn(extract_and_calculate, args=(args, chunk, accelerated_genomes, outgroup_genomes, result_path)) target.setFollowOnTargetFn(cat_results, args=(args, result_tree.listFiles()))
def testTempFileTree(self): for test in range(100): #self.testNo): levels = random.choice(range(1, 4)) fileNo = random.choice(range(1, 6)) maxTempFiles = int(math.pow(fileNo, levels)) print("Got %s levels, %s fileNo and %s maxTempFiles" % (levels, fileNo, maxTempFiles)) tempFileTreeRootDir = os.path.join(self.tempDir, getRandomAlphaNumericString()) tempFileTree = TempFileTree(tempFileTreeRootDir, fileNo, levels) tempFiles = [] tempDirs = [] #Check we can mac number of temp files. for i in range(maxTempFiles): if random.random() > 0.5: tempFile = tempFileTree.getTempFile() assert os.path.isfile(tempFile) tempFiles.append(tempFile) else: tempFile = tempFileTree.getTempDirectory() assert os.path.isdir(tempFile) tempDirs.append(tempFile) #Check assertion is created try: tempFileTree.getTempFile() assert False except RuntimeError: logger.debug("Got expected error message") #Now remove a few temp files while random.random() > 0.1 and len(tempFiles) > 0: tempFile = tempFiles.pop() assert os.path.isfile(tempFile) tempFileTree.destroyTempFile(tempFile) assert not os.path.isfile(tempFile) #Now remove a few temp dirs while random.random() > 0.1 and len(tempDirs) > 0: tempDir = tempDirs.pop() assert os.path.isdir(tempDir) tempFileTree.destroyTempDir(tempDir) assert not os.path.isdir(tempDir) #Check temp files is okay set(tempFileTree.listFiles()) == set(tempFiles + tempDirs) #Either remove all the temp files or just destroy the whole thing if random.random() > 0.5: #Remove all temp files and check thing is empty. for tempFile in tempFiles: tempFileTree.destroyTempFile(tempFile) for tempDir in tempDirs: tempFileTree.destroyTempDir(tempDir) os.remove(os.path.join(tempFileTreeRootDir, "lock")) os.rmdir(tempFileTreeRootDir) else: tempFileTree.destroyTempFiles() assert not os.path.isdir(tempFileTreeRootDir)