def testBlastRandom(self): """Make some sequences, put them in a file, call blast with random parameters and check it runs okay. """ tempSeqFile = os.path.join(self.tempDir, "tempSeq.fa") self.tempFiles.append(tempSeqFile) for test in xrange(self.testNo): seqNo = random.choice(xrange(0, 10)) seq = getRandomSequence(8000)[1] fileHandle = open(tempSeqFile, 'w') for fastaHeader, seq in [(str(i), mutateSequence(seq, 0.3 * random.random())) for i in xrange(seqNo)]: if random.random() > 0.5: seq = reverseComplement(seq) fastaWrite(fileHandle, fastaHeader, seq) fileHandle.close() chunkSize = random.choice(xrange(500, 9000)) overlapSize = random.choice(xrange(2, 100)) toilDir = os.path.join(getTempDirectory(self.tempDir), "toil") runCactusBlast([tempSeqFile], self.tempOutputFile, toilDir, chunkSize, overlapSize) #runToilStatusAndFailIfNotComplete(toilDir) if getLogLevelString() == "DEBUG": system("cat %s" % self.tempOutputFile) system("rm -rf %s " % toilDir)
def testBlastRandom(self): """Make some sequences, put them in a file, call blast with random parameters and check it runs okay. """ tempSeqFile = os.path.join(self.tempDir, "tempSeq.fa") self.tempFiles.append(tempSeqFile) for test in xrange(self.testNo): seqNo = random.choice(xrange(0, 10)) seq = getRandomSequence(8000)[1] fileHandle = open(tempSeqFile, 'w') for fastaHeader, seq in [ (str(i), mutateSequence(seq, 0.3*random.random())) for i in xrange(seqNo) ]: if random.random() > 0.5: seq = reverseComplement(seq) fastaWrite(fileHandle, fastaHeader, seq) fileHandle.close() chunkSize = random.choice(xrange(500, 9000)) overlapSize = random.choice(xrange(2, 100)) toilDir = os.path.join(getTempDirectory(self.tempDir), "toil") runCactusBlast([ tempSeqFile ], self.tempOutputFile, toilDir, chunkSize, overlapSize) #runToilStatusAndFailIfNotComplete(toilDir) if getLogLevelString() == "DEBUG": system("cat %s" % self.tempOutputFile) system("rm -rf %s " % toilDir)
def getCactusInputs_random(regionNumber=0, tempDir=None, sequenceNumber=None, avgSequenceLength=None, treeLeafNumber=None): """Gets a random set of sequences, each of length given, and a species tree relating them. Each sequence is a assigned an event in this tree. """ if sequenceNumber is None: sequenceNumber = random.choice(list(range(30))) if avgSequenceLength is None: avgSequenceLength = random.choice(list(range(1, 3000))) if treeLeafNumber is None: treeLeafNumber = random.choice(list(range(2, 4))) #Make tree binaryTree = makeRandomBinaryTree(treeLeafNumber) newickTreeString = printBinaryTree(binaryTree, includeDistances=True) newickTreeLeafNames = [] def fn(tree): if tree.internal: fn(tree.left) fn(tree.right) else: newickTreeLeafNames.append(tree.iD) fn(binaryTree) logger.info("Made random binary tree: %s" % newickTreeString) sequenceDirs = [] for i in range(len(newickTreeLeafNames)): seqDir = getTempDirectory(rootDir=tempDir) sequenceDirs.append(seqDir) logger.info("Made a set of random directories: %s" % " ".join(sequenceDirs)) #Random sequences and species labelling sequenceFile = None fileHandle = None parentSequence = getRandomSequence( length=random.choice(list(range(1, 2 * avgSequenceLength))))[1] emptySequenceDirs = set(sequenceDirs) i = 0 while i < sequenceNumber or len(emptySequenceDirs) > 0: if sequenceFile == None: if random.random( ) > 0.5: #Randomly choose the files to be attached or not suffix = ".fa.complete" else: suffix = ".fa" sequenceDir = random.choice(sequenceDirs) if sequenceDir in emptySequenceDirs: emptySequenceDirs.remove(sequenceDir) sequenceFile = getTempFile(rootDir=sequenceDir, suffix=suffix) fileHandle = open(sequenceFile, 'w') if random.random() > 0.8: #Get a new root sequence parentSequence = getRandomSequence( length=random.choice(list(range(1, 2 * avgSequenceLength))))[1] sequence = mutateSequence(parentSequence, distance=random.random() * 0.25) name = getRandomAlphaNumericString(15) if random.random() > 0.5: sequence = reverseComplement(sequence) fastaWrite(fileHandle, name, sequence) if random.random() > 0.5: fileHandle.close() fileHandle = None sequenceFile = None i += 1 if fileHandle != None: fileHandle.close() logger.info("Made %s sequences in %s directories" % (sequenceNumber, len(sequenceDirs))) return sequenceDirs, newickTreeString
def getCactusInputs_random(regionNumber=0, tempDir=None, sequenceNumber=None, avgSequenceLength=None, treeLeafNumber=None): """Gets a random set of sequences, each of length given, and a species tree relating them. Each sequence is a assigned an event in this tree. """ if sequenceNumber is None: sequenceNumber = random.choice(xrange(30)) if avgSequenceLength is None: avgSequenceLength = random.choice(xrange(1,3000)) if treeLeafNumber is None: treeLeafNumber = random.choice(xrange(2, 4)) #Make tree binaryTree = makeRandomBinaryTree(treeLeafNumber) newickTreeString = printBinaryTree(binaryTree, includeDistances=True) newickTreeLeafNames = [] def fn(tree): if tree.internal: fn(tree.left) fn(tree.right) else: newickTreeLeafNames.append(tree.iD) fn(binaryTree) logger.info("Made random binary tree: %s" % newickTreeString) sequenceDirs = [] for i in xrange(len(newickTreeLeafNames)): seqDir = getTempDirectory(rootDir=tempDir) sequenceDirs.append(seqDir) logger.info("Made a set of random directories: %s" % " ".join(sequenceDirs)) #Random sequences and species labelling sequenceFile = None fileHandle = None parentSequence = getRandomSequence(length=random.choice(xrange(1, 2*avgSequenceLength)))[1] emptySequenceDirs = set(sequenceDirs) i = 0 while i < sequenceNumber or len(emptySequenceDirs) > 0: #for i in xrange(sequenceNumber): if sequenceFile == None: if random.random() > 0.5: #Randomly choose the files to be attached or not suffix = ".fa.complete" else: suffix = ".fa" sequenceDir = random.choice(sequenceDirs) if sequenceDir in emptySequenceDirs: emptySequenceDirs.remove(sequenceDir) sequenceFile = getTempFile(rootDir=sequenceDir, suffix=suffix) fileHandle = open(sequenceFile, 'w') if random.random() > 0.8: #Get a new root sequence parentSequence = getRandomSequence(length=random.choice(xrange(1, 2*avgSequenceLength)))[1] sequence = mutateSequence(parentSequence, distance=random.random()*0.5) name = getRandomAlphaNumericString(15) if random.random() > 0.5: sequence = reverseComplement(sequence) fastaWrite(fileHandle, name, sequence) if random.random() > 0.5: fileHandle.close() fileHandle = None sequenceFile = None i += 1 if fileHandle != None: fileHandle.close() logger.info("Made %s sequences in %s directories" % (sequenceNumber, len(sequenceDirs))) return sequenceDirs, newickTreeString