Example #1
0
 def testBlastRandom(self):
     """Make some sequences, put them in a file, call blast with random parameters 
     and check it runs okay.
     """
     tempSeqFile = os.path.join(self.tempDir, "tempSeq.fa")
     self.tempFiles.append(tempSeqFile)
     for test in xrange(self.testNo):
         seqNo = random.choice(xrange(0, 10))
         seq = getRandomSequence(8000)[1]
         fileHandle = open(tempSeqFile, 'w')
         for fastaHeader, seq in [(str(i),
                                   mutateSequence(seq,
                                                  0.3 * random.random()))
                                  for i in xrange(seqNo)]:
             if random.random() > 0.5:
                 seq = reverseComplement(seq)
             fastaWrite(fileHandle, fastaHeader, seq)
         fileHandle.close()
         chunkSize = random.choice(xrange(500, 9000))
         overlapSize = random.choice(xrange(2, 100))
         toilDir = os.path.join(getTempDirectory(self.tempDir), "toil")
         runCactusBlast([tempSeqFile], self.tempOutputFile, toilDir,
                        chunkSize, overlapSize)
         #runToilStatusAndFailIfNotComplete(toilDir)
         if getLogLevelString() == "DEBUG":
             system("cat %s" % self.tempOutputFile)
         system("rm -rf %s " % toilDir)
Example #2
0
    def testFastaReadWriteC(self):
        """Tests consistency with C version of this function.
        """
        tempFile = getTempFile()
        self.tempFiles.append(tempFile)
        tempFile2 = getTempFile()
        self.tempFiles.append(tempFile2)
        for test in range(0, self.testNo):
            fastaNumber = random.choice(range(10))
            l = [getRandomSequence() for i in range(fastaNumber)]
            fileHandle = open(tempFile, 'w')
            for name, seq in l:
                fastaWrite(fileHandle, name, seq)
            fileHandle.close()

            command = "sonLib_fastaCTest %s %s" % (tempFile, tempFile2)

            print(command)

            system(command)

            fileHandle = open(tempFile2, 'r')
            l.reverse()
            outFh = io.StringIO()
            for i in fastaRead(fileHandle):
                name, seq = i
                assert i == l.pop()
                fastaWrite(outFh, name, seq)
            outFh.close()
            fileHandle.close()
Example #3
0
    def testRandom(self):
        """Makes random sequences and tests that Ortheus can align them and produce a valid output.
        """
        outputFile = getTempFile()
        self.tempFiles.append(outputFile)

        MAX_SEQS = 20

        for i in xrange(MAX_SEQS):
            self.tempFiles.append(getTempFile())

        for test in xrange(0, self.testNo):
            print "test no : %i " % test
            #seqNo
            binaryTree = randomTree()
            middleSeq = getRandomSequence(250)[1]
            seqs = []
            getTreeSeqs(binaryTree, middleSeq, seqs)

            if len(seqs) <= MAX_SEQS and len(seqs) > 2:
                seqFiles = []
                for i in xrange(0, len(seqs)):
                    seqFiles.append(self.tempFiles[1 + i])
                    fileHandle = open(seqFiles[i], 'w')
                    fastaWrite(fileHandle, "%i" % i, seqs[i])
                    fileHandle.close()
                print "Have seq files ", seqFiles

                treeString = printBinaryTree(binaryTree, True)
                print "For tree ", treeString

                #align seqs and check no failure
                command = "ortheus_core -a %s -b '%s' -d %s -e" % (
                    " ".join(seqFiles), treeString, outputFile)
                print "command to call", command
                system(command)

                #check alignment is complete
                alignment = [i[:] for i in fastaAlignmentRead(outputFile)]
                #print "alignment", alignment
                checkAlignment(alignment, seqs)

                print "test no is finished : %i " % test
Example #4
0
 def testFastaReadWrite(self):
     tempFile = getTempFile()
     self.tempFiles.append(tempFile)
     for test in range(0, self.testNo):
         fastaNumber = random.choice(range(10))
         l = [getRandomSequence() for i in range(fastaNumber)]
         fileHandle = open(tempFile, 'w')
         for name, seq in l:
             fastaWrite(fileHandle, name, seq)
         fileHandle.close()
         fileHandle = open(tempFile, 'r')
         l.reverse()
         outFh = io.StringIO()
         for i in fastaRead(fileHandle):
             assert i == l.pop()
             name, seq = i
             fastaWrite(outFh, name, seq)
         outFh.close()
         fileHandle.close()
Example #5
0
    def testRandom(self):
        """Makes random sequences and tests that Ortheus can align them and produce a valid output.
        """
        outputFile = getTempFile()
        self.tempFiles.append(outputFile)

        MAX_SEQS = 20

        for i in xrange(MAX_SEQS):
            self.tempFiles.append(getTempFile())

        for test in xrange(0, self.testNo):
            print "test no : %i " % test
            # seqNo
            binaryTree = randomTree()
            middleSeq = getRandomSequence(250)[1]
            seqs = []
            getTreeSeqs(binaryTree, middleSeq, seqs)

            if len(seqs) <= MAX_SEQS and len(seqs) > 2:
                seqFiles = []
                for i in xrange(0, len(seqs)):
                    seqFiles.append(self.tempFiles[1 + i])
                    fileHandle = open(seqFiles[i], "w")
                    fastaWrite(fileHandle, "%i" % i, seqs[i])
                    fileHandle.close()
                print "Have seq files ", seqFiles

                treeString = printBinaryTree(binaryTree, True)
                print "For tree ", treeString

                # align seqs and check no failure
                command = "ortheus_core -a %s -b '%s' -d %s -e" % (" ".join(seqFiles), treeString, outputFile)
                print "command to call", command
                system(command)

                # check alignment is complete
                alignment = [i[:] for i in fastaAlignmentRead(outputFile)]
                # print "alignment", alignment
                checkAlignment(alignment, seqs)

                print "test no is finished : %i " % test
Example #6
0
 def testFastqReadWrite(self):
     tempFile = getTempFile()
     self.tempFiles.append(tempFile)
     for test in range(0, self.testNo):
         fastaNumber = random.choice(range(10))
         fastqs = [(name, seq,
                    [random.randint(33, 126) for i in range(len(seq))])
                   for name, seq in
                   [getRandomSequence() for i in range(fastaNumber)]]
         fH = open(tempFile, 'w')
         for name, seq, quals in fastqs:
             fastqWrite(fH, name, seq, quals)
         fH.close()
         fastqs.reverse()
         outFh = io.StringIO()
         for i in fastqRead(tempFile):
             assert i == fastqs.pop()
             name, seq, quals = i
             fastqWrite(outFh, name, seq, quals)
         outFh.close()
Example #7
0
 def testBlastRandom(self):
     """Make some sequences, put them in a file, call blast with random parameters 
     and check it runs okay.
     """
     tempSeqFile = os.path.join(self.tempDir, "tempSeq.fa")
     self.tempFiles.append(tempSeqFile)
     for test in xrange(self.testNo):
         seqNo = random.choice(xrange(0, 10))
         seq = getRandomSequence(8000)[1]
         fileHandle = open(tempSeqFile, 'w')
         for fastaHeader, seq in [ (str(i), mutateSequence(seq, 0.3*random.random())) for i in xrange(seqNo) ]:
             if random.random() > 0.5:
                 seq = reverseComplement(seq)
             fastaWrite(fileHandle, fastaHeader, seq)
         fileHandle.close()
         chunkSize = random.choice(xrange(500, 9000))
         overlapSize = random.choice(xrange(2, 100))
         toilDir = os.path.join(getTempDirectory(self.tempDir), "toil")
         runCactusBlast([ tempSeqFile ], self.tempOutputFile, toilDir, chunkSize, overlapSize)
         #runToilStatusAndFailIfNotComplete(toilDir)
         if getLogLevelString() == "DEBUG":
             system("cat %s" % self.tempOutputFile)
         system("rm -rf %s " % toilDir)
Example #8
0
def getCactusInputs_random(regionNumber=0,
                           tempDir=None,
                           sequenceNumber=None,
                           avgSequenceLength=None,
                           treeLeafNumber=None):
    """Gets a random set of sequences, each of length given, and a species
    tree relating them. Each sequence is a assigned an event in this tree.
    """
    if sequenceNumber is None:
        sequenceNumber = random.choice(list(range(30)))
    if avgSequenceLength is None:
        avgSequenceLength = random.choice(list(range(1, 3000)))
    if treeLeafNumber is None:
        treeLeafNumber = random.choice(list(range(2, 4)))

    #Make tree
    binaryTree = makeRandomBinaryTree(treeLeafNumber)
    newickTreeString = printBinaryTree(binaryTree, includeDistances=True)
    newickTreeLeafNames = []

    def fn(tree):
        if tree.internal:
            fn(tree.left)
            fn(tree.right)
        else:
            newickTreeLeafNames.append(tree.iD)

    fn(binaryTree)
    logger.info("Made random binary tree: %s" % newickTreeString)

    sequenceDirs = []
    for i in range(len(newickTreeLeafNames)):
        seqDir = getTempDirectory(rootDir=tempDir)
        sequenceDirs.append(seqDir)

    logger.info("Made a set of random directories: %s" %
                " ".join(sequenceDirs))

    #Random sequences and species labelling
    sequenceFile = None
    fileHandle = None
    parentSequence = getRandomSequence(
        length=random.choice(list(range(1, 2 * avgSequenceLength))))[1]
    emptySequenceDirs = set(sequenceDirs)
    i = 0
    while i < sequenceNumber or len(emptySequenceDirs) > 0:
        if sequenceFile == None:
            if random.random(
            ) > 0.5:  #Randomly choose the files to be attached or not
                suffix = ".fa.complete"
            else:
                suffix = ".fa"
            sequenceDir = random.choice(sequenceDirs)
            if sequenceDir in emptySequenceDirs:
                emptySequenceDirs.remove(sequenceDir)
            sequenceFile = getTempFile(rootDir=sequenceDir, suffix=suffix)
            fileHandle = open(sequenceFile, 'w')
        if random.random() > 0.8:  #Get a new root sequence
            parentSequence = getRandomSequence(
                length=random.choice(list(range(1, 2 * avgSequenceLength))))[1]
        sequence = mutateSequence(parentSequence,
                                  distance=random.random() * 0.25)
        name = getRandomAlphaNumericString(15)
        if random.random() > 0.5:
            sequence = reverseComplement(sequence)
        fastaWrite(fileHandle, name, sequence)
        if random.random() > 0.5:
            fileHandle.close()
            fileHandle = None
            sequenceFile = None
        i += 1
    if fileHandle != None:
        fileHandle.close()

    logger.info("Made %s sequences in %s directories" %
                (sequenceNumber, len(sequenceDirs)))

    return sequenceDirs, newickTreeString
Example #9
0
def getCactusInputs_random(regionNumber=0, tempDir=None,
                           sequenceNumber=None,
                           avgSequenceLength=None,
                           treeLeafNumber=None):
    """Gets a random set of sequences, each of length given, and a species
    tree relating them. Each sequence is a assigned an event in this tree.
    """
    if sequenceNumber is None:
        sequenceNumber = random.choice(xrange(30))
    if avgSequenceLength is None:
        avgSequenceLength = random.choice(xrange(1,3000))
    if treeLeafNumber is None:
        treeLeafNumber = random.choice(xrange(2, 4))
    #Make tree
    binaryTree = makeRandomBinaryTree(treeLeafNumber)
    newickTreeString = printBinaryTree(binaryTree, includeDistances=True)
    newickTreeLeafNames = []
    def fn(tree):
        if tree.internal:
            fn(tree.left)
            fn(tree.right)
        else:
            newickTreeLeafNames.append(tree.iD)
    fn(binaryTree)
    logger.info("Made random binary tree: %s" % newickTreeString)
    
    sequenceDirs = []
    for i in xrange(len(newickTreeLeafNames)):
        seqDir = getTempDirectory(rootDir=tempDir)
        sequenceDirs.append(seqDir)

    logger.info("Made a set of random directories: %s" % " ".join(sequenceDirs))

    #Random sequences and species labelling
    sequenceFile = None
    fileHandle = None
    parentSequence = getRandomSequence(length=random.choice(xrange(1, 2*avgSequenceLength)))[1]
    emptySequenceDirs = set(sequenceDirs)
    i = 0
    while i < sequenceNumber or len(emptySequenceDirs) > 0:
        #for i in xrange(sequenceNumber):
        if sequenceFile == None:
            if random.random() > 0.5: #Randomly choose the files to be attached or not
                suffix = ".fa.complete"
            else:
                suffix = ".fa"
            sequenceDir = random.choice(sequenceDirs)
            if sequenceDir in emptySequenceDirs:
                emptySequenceDirs.remove(sequenceDir)
            sequenceFile = getTempFile(rootDir=sequenceDir, suffix=suffix)
            fileHandle = open(sequenceFile, 'w')
        if random.random() > 0.8: #Get a new root sequence
            parentSequence = getRandomSequence(length=random.choice(xrange(1, 2*avgSequenceLength)))[1]
        sequence = mutateSequence(parentSequence, distance=random.random()*0.5)
        name = getRandomAlphaNumericString(15)
        if random.random() > 0.5:
            sequence = reverseComplement(sequence)
        fastaWrite(fileHandle, name, sequence)
        if random.random() > 0.5:
            fileHandle.close()
            fileHandle = None
            sequenceFile = None
        i += 1
    if fileHandle != None:
        fileHandle.close()

    logger.info("Made %s sequences in %s directories" % (sequenceNumber, len(sequenceDirs)))
    
    return sequenceDirs, newickTreeString