def testBlastRandom(self): """Make some sequences, put them in a file, call blast with random parameters and check it runs okay. """ tempSeqFile = os.path.join(self.tempDir, "tempSeq.fa") self.tempFiles.append(tempSeqFile) for test in xrange(self.testNo): seqNo = random.choice(xrange(0, 10)) seq = getRandomSequence(8000)[1] fileHandle = open(tempSeqFile, 'w') for fastaHeader, seq in [(str(i), mutateSequence(seq, 0.3 * random.random())) for i in xrange(seqNo)]: if random.random() > 0.5: seq = reverseComplement(seq) fastaWrite(fileHandle, fastaHeader, seq) fileHandle.close() chunkSize = random.choice(xrange(500, 9000)) overlapSize = random.choice(xrange(2, 100)) toilDir = os.path.join(getTempDirectory(self.tempDir), "toil") runCactusBlast([tempSeqFile], self.tempOutputFile, toilDir, chunkSize, overlapSize) #runToilStatusAndFailIfNotComplete(toilDir) if getLogLevelString() == "DEBUG": system("cat %s" % self.tempOutputFile) system("rm -rf %s " % toilDir)
def testFastaReadWriteC(self): """Tests consistency with C version of this function. """ tempFile = getTempFile() self.tempFiles.append(tempFile) tempFile2 = getTempFile() self.tempFiles.append(tempFile2) for test in range(0, self.testNo): fastaNumber = random.choice(range(10)) l = [getRandomSequence() for i in range(fastaNumber)] fileHandle = open(tempFile, 'w') for name, seq in l: fastaWrite(fileHandle, name, seq) fileHandle.close() command = "sonLib_fastaCTest %s %s" % (tempFile, tempFile2) print(command) system(command) fileHandle = open(tempFile2, 'r') l.reverse() outFh = io.StringIO() for i in fastaRead(fileHandle): name, seq = i assert i == l.pop() fastaWrite(outFh, name, seq) outFh.close() fileHandle.close()
def testRandom(self): """Makes random sequences and tests that Ortheus can align them and produce a valid output. """ outputFile = getTempFile() self.tempFiles.append(outputFile) MAX_SEQS = 20 for i in xrange(MAX_SEQS): self.tempFiles.append(getTempFile()) for test in xrange(0, self.testNo): print "test no : %i " % test #seqNo binaryTree = randomTree() middleSeq = getRandomSequence(250)[1] seqs = [] getTreeSeqs(binaryTree, middleSeq, seqs) if len(seqs) <= MAX_SEQS and len(seqs) > 2: seqFiles = [] for i in xrange(0, len(seqs)): seqFiles.append(self.tempFiles[1 + i]) fileHandle = open(seqFiles[i], 'w') fastaWrite(fileHandle, "%i" % i, seqs[i]) fileHandle.close() print "Have seq files ", seqFiles treeString = printBinaryTree(binaryTree, True) print "For tree ", treeString #align seqs and check no failure command = "ortheus_core -a %s -b '%s' -d %s -e" % ( " ".join(seqFiles), treeString, outputFile) print "command to call", command system(command) #check alignment is complete alignment = [i[:] for i in fastaAlignmentRead(outputFile)] #print "alignment", alignment checkAlignment(alignment, seqs) print "test no is finished : %i " % test
def testFastaReadWrite(self): tempFile = getTempFile() self.tempFiles.append(tempFile) for test in range(0, self.testNo): fastaNumber = random.choice(range(10)) l = [getRandomSequence() for i in range(fastaNumber)] fileHandle = open(tempFile, 'w') for name, seq in l: fastaWrite(fileHandle, name, seq) fileHandle.close() fileHandle = open(tempFile, 'r') l.reverse() outFh = io.StringIO() for i in fastaRead(fileHandle): assert i == l.pop() name, seq = i fastaWrite(outFh, name, seq) outFh.close() fileHandle.close()
def testRandom(self): """Makes random sequences and tests that Ortheus can align them and produce a valid output. """ outputFile = getTempFile() self.tempFiles.append(outputFile) MAX_SEQS = 20 for i in xrange(MAX_SEQS): self.tempFiles.append(getTempFile()) for test in xrange(0, self.testNo): print "test no : %i " % test # seqNo binaryTree = randomTree() middleSeq = getRandomSequence(250)[1] seqs = [] getTreeSeqs(binaryTree, middleSeq, seqs) if len(seqs) <= MAX_SEQS and len(seqs) > 2: seqFiles = [] for i in xrange(0, len(seqs)): seqFiles.append(self.tempFiles[1 + i]) fileHandle = open(seqFiles[i], "w") fastaWrite(fileHandle, "%i" % i, seqs[i]) fileHandle.close() print "Have seq files ", seqFiles treeString = printBinaryTree(binaryTree, True) print "For tree ", treeString # align seqs and check no failure command = "ortheus_core -a %s -b '%s' -d %s -e" % (" ".join(seqFiles), treeString, outputFile) print "command to call", command system(command) # check alignment is complete alignment = [i[:] for i in fastaAlignmentRead(outputFile)] # print "alignment", alignment checkAlignment(alignment, seqs) print "test no is finished : %i " % test
def testFastqReadWrite(self): tempFile = getTempFile() self.tempFiles.append(tempFile) for test in range(0, self.testNo): fastaNumber = random.choice(range(10)) fastqs = [(name, seq, [random.randint(33, 126) for i in range(len(seq))]) for name, seq in [getRandomSequence() for i in range(fastaNumber)]] fH = open(tempFile, 'w') for name, seq, quals in fastqs: fastqWrite(fH, name, seq, quals) fH.close() fastqs.reverse() outFh = io.StringIO() for i in fastqRead(tempFile): assert i == fastqs.pop() name, seq, quals = i fastqWrite(outFh, name, seq, quals) outFh.close()
def testBlastRandom(self): """Make some sequences, put them in a file, call blast with random parameters and check it runs okay. """ tempSeqFile = os.path.join(self.tempDir, "tempSeq.fa") self.tempFiles.append(tempSeqFile) for test in xrange(self.testNo): seqNo = random.choice(xrange(0, 10)) seq = getRandomSequence(8000)[1] fileHandle = open(tempSeqFile, 'w') for fastaHeader, seq in [ (str(i), mutateSequence(seq, 0.3*random.random())) for i in xrange(seqNo) ]: if random.random() > 0.5: seq = reverseComplement(seq) fastaWrite(fileHandle, fastaHeader, seq) fileHandle.close() chunkSize = random.choice(xrange(500, 9000)) overlapSize = random.choice(xrange(2, 100)) toilDir = os.path.join(getTempDirectory(self.tempDir), "toil") runCactusBlast([ tempSeqFile ], self.tempOutputFile, toilDir, chunkSize, overlapSize) #runToilStatusAndFailIfNotComplete(toilDir) if getLogLevelString() == "DEBUG": system("cat %s" % self.tempOutputFile) system("rm -rf %s " % toilDir)
def getCactusInputs_random(regionNumber=0, tempDir=None, sequenceNumber=None, avgSequenceLength=None, treeLeafNumber=None): """Gets a random set of sequences, each of length given, and a species tree relating them. Each sequence is a assigned an event in this tree. """ if sequenceNumber is None: sequenceNumber = random.choice(list(range(30))) if avgSequenceLength is None: avgSequenceLength = random.choice(list(range(1, 3000))) if treeLeafNumber is None: treeLeafNumber = random.choice(list(range(2, 4))) #Make tree binaryTree = makeRandomBinaryTree(treeLeafNumber) newickTreeString = printBinaryTree(binaryTree, includeDistances=True) newickTreeLeafNames = [] def fn(tree): if tree.internal: fn(tree.left) fn(tree.right) else: newickTreeLeafNames.append(tree.iD) fn(binaryTree) logger.info("Made random binary tree: %s" % newickTreeString) sequenceDirs = [] for i in range(len(newickTreeLeafNames)): seqDir = getTempDirectory(rootDir=tempDir) sequenceDirs.append(seqDir) logger.info("Made a set of random directories: %s" % " ".join(sequenceDirs)) #Random sequences and species labelling sequenceFile = None fileHandle = None parentSequence = getRandomSequence( length=random.choice(list(range(1, 2 * avgSequenceLength))))[1] emptySequenceDirs = set(sequenceDirs) i = 0 while i < sequenceNumber or len(emptySequenceDirs) > 0: if sequenceFile == None: if random.random( ) > 0.5: #Randomly choose the files to be attached or not suffix = ".fa.complete" else: suffix = ".fa" sequenceDir = random.choice(sequenceDirs) if sequenceDir in emptySequenceDirs: emptySequenceDirs.remove(sequenceDir) sequenceFile = getTempFile(rootDir=sequenceDir, suffix=suffix) fileHandle = open(sequenceFile, 'w') if random.random() > 0.8: #Get a new root sequence parentSequence = getRandomSequence( length=random.choice(list(range(1, 2 * avgSequenceLength))))[1] sequence = mutateSequence(parentSequence, distance=random.random() * 0.25) name = getRandomAlphaNumericString(15) if random.random() > 0.5: sequence = reverseComplement(sequence) fastaWrite(fileHandle, name, sequence) if random.random() > 0.5: fileHandle.close() fileHandle = None sequenceFile = None i += 1 if fileHandle != None: fileHandle.close() logger.info("Made %s sequences in %s directories" % (sequenceNumber, len(sequenceDirs))) return sequenceDirs, newickTreeString
def getCactusInputs_random(regionNumber=0, tempDir=None, sequenceNumber=None, avgSequenceLength=None, treeLeafNumber=None): """Gets a random set of sequences, each of length given, and a species tree relating them. Each sequence is a assigned an event in this tree. """ if sequenceNumber is None: sequenceNumber = random.choice(xrange(30)) if avgSequenceLength is None: avgSequenceLength = random.choice(xrange(1,3000)) if treeLeafNumber is None: treeLeafNumber = random.choice(xrange(2, 4)) #Make tree binaryTree = makeRandomBinaryTree(treeLeafNumber) newickTreeString = printBinaryTree(binaryTree, includeDistances=True) newickTreeLeafNames = [] def fn(tree): if tree.internal: fn(tree.left) fn(tree.right) else: newickTreeLeafNames.append(tree.iD) fn(binaryTree) logger.info("Made random binary tree: %s" % newickTreeString) sequenceDirs = [] for i in xrange(len(newickTreeLeafNames)): seqDir = getTempDirectory(rootDir=tempDir) sequenceDirs.append(seqDir) logger.info("Made a set of random directories: %s" % " ".join(sequenceDirs)) #Random sequences and species labelling sequenceFile = None fileHandle = None parentSequence = getRandomSequence(length=random.choice(xrange(1, 2*avgSequenceLength)))[1] emptySequenceDirs = set(sequenceDirs) i = 0 while i < sequenceNumber or len(emptySequenceDirs) > 0: #for i in xrange(sequenceNumber): if sequenceFile == None: if random.random() > 0.5: #Randomly choose the files to be attached or not suffix = ".fa.complete" else: suffix = ".fa" sequenceDir = random.choice(sequenceDirs) if sequenceDir in emptySequenceDirs: emptySequenceDirs.remove(sequenceDir) sequenceFile = getTempFile(rootDir=sequenceDir, suffix=suffix) fileHandle = open(sequenceFile, 'w') if random.random() > 0.8: #Get a new root sequence parentSequence = getRandomSequence(length=random.choice(xrange(1, 2*avgSequenceLength)))[1] sequence = mutateSequence(parentSequence, distance=random.random()*0.5) name = getRandomAlphaNumericString(15) if random.random() > 0.5: sequence = reverseComplement(sequence) fastaWrite(fileHandle, name, sequence) if random.random() > 0.5: fileHandle.close() fileHandle = None sequenceFile = None i += 1 if fileHandle != None: fileHandle.close() logger.info("Made %s sequences in %s directories" % (sequenceNumber, len(sequenceDirs))) return sequenceDirs, newickTreeString