Ejemplo n.º 1
0
def getTreeSeqs(binaryTree, seq, l):
    seq = mutateSequence(seq, binaryTree.distance)
    if binaryTree.internal:
        getTreeSeqs(binaryTree.left, seq, l)
        getTreeSeqs(binaryTree.right, seq, l)
    else:
        l.append(seq)
Ejemplo n.º 2
0
 def testBlastRandom(self):
     """Make some sequences, put them in a file, call blast with random parameters 
     and check it runs okay.
     """
     tempSeqFile = os.path.join(self.tempDir, "tempSeq.fa")
     self.tempFiles.append(tempSeqFile)
     for test in xrange(self.testNo):
         seqNo = random.choice(xrange(0, 10))
         seq = getRandomSequence(8000)[1]
         fileHandle = open(tempSeqFile, 'w')
         for fastaHeader, seq in [(str(i),
                                   mutateSequence(seq,
                                                  0.3 * random.random()))
                                  for i in xrange(seqNo)]:
             if random.random() > 0.5:
                 seq = reverseComplement(seq)
             fastaWrite(fileHandle, fastaHeader, seq)
         fileHandle.close()
         chunkSize = random.choice(xrange(500, 9000))
         overlapSize = random.choice(xrange(2, 100))
         toilDir = os.path.join(getTempDirectory(self.tempDir), "toil")
         runCactusBlast([tempSeqFile], self.tempOutputFile, toilDir,
                        chunkSize, overlapSize)
         #runToilStatusAndFailIfNotComplete(toilDir)
         if getLogLevelString() == "DEBUG":
             system("cat %s" % self.tempOutputFile)
         system("rm -rf %s " % toilDir)
Ejemplo n.º 3
0
def getTreeSeqs(binaryTree, seq, l):
    seq = mutateSequence(seq, binaryTree.distance)
    if binaryTree.internal:
        getTreeSeqs(binaryTree.left, seq, l)
        getTreeSeqs(binaryTree.right, seq, l)
    else:
        l.append(seq)
Ejemplo n.º 4
0
 def testBlastRandom(self):
     """Make some sequences, put them in a file, call blast with random parameters 
     and check it runs okay.
     """
     tempSeqFile = os.path.join(self.tempDir, "tempSeq.fa")
     self.tempFiles.append(tempSeqFile)
     for test in xrange(self.testNo):
         seqNo = random.choice(xrange(0, 10))
         seq = getRandomSequence(8000)[1]
         fileHandle = open(tempSeqFile, 'w')
         for fastaHeader, seq in [ (str(i), mutateSequence(seq, 0.3*random.random())) for i in xrange(seqNo) ]:
             if random.random() > 0.5:
                 seq = reverseComplement(seq)
             fastaWrite(fileHandle, fastaHeader, seq)
         fileHandle.close()
         chunkSize = random.choice(xrange(500, 9000))
         overlapSize = random.choice(xrange(2, 100))
         toilDir = os.path.join(getTempDirectory(self.tempDir), "toil")
         runCactusBlast([ tempSeqFile ], self.tempOutputFile, toilDir, chunkSize, overlapSize)
         #runToilStatusAndFailIfNotComplete(toilDir)
         if getLogLevelString() == "DEBUG":
             system("cat %s" % self.tempOutputFile)
         system("rm -rf %s " % toilDir)
Ejemplo n.º 5
0
def getCactusInputs_random(regionNumber=0,
                           tempDir=None,
                           sequenceNumber=None,
                           avgSequenceLength=None,
                           treeLeafNumber=None):
    """Gets a random set of sequences, each of length given, and a species
    tree relating them. Each sequence is a assigned an event in this tree.
    """
    if sequenceNumber is None:
        sequenceNumber = random.choice(list(range(30)))
    if avgSequenceLength is None:
        avgSequenceLength = random.choice(list(range(1, 3000)))
    if treeLeafNumber is None:
        treeLeafNumber = random.choice(list(range(2, 4)))

    #Make tree
    binaryTree = makeRandomBinaryTree(treeLeafNumber)
    newickTreeString = printBinaryTree(binaryTree, includeDistances=True)
    newickTreeLeafNames = []

    def fn(tree):
        if tree.internal:
            fn(tree.left)
            fn(tree.right)
        else:
            newickTreeLeafNames.append(tree.iD)

    fn(binaryTree)
    logger.info("Made random binary tree: %s" % newickTreeString)

    sequenceDirs = []
    for i in range(len(newickTreeLeafNames)):
        seqDir = getTempDirectory(rootDir=tempDir)
        sequenceDirs.append(seqDir)

    logger.info("Made a set of random directories: %s" %
                " ".join(sequenceDirs))

    #Random sequences and species labelling
    sequenceFile = None
    fileHandle = None
    parentSequence = getRandomSequence(
        length=random.choice(list(range(1, 2 * avgSequenceLength))))[1]
    emptySequenceDirs = set(sequenceDirs)
    i = 0
    while i < sequenceNumber or len(emptySequenceDirs) > 0:
        if sequenceFile == None:
            if random.random(
            ) > 0.5:  #Randomly choose the files to be attached or not
                suffix = ".fa.complete"
            else:
                suffix = ".fa"
            sequenceDir = random.choice(sequenceDirs)
            if sequenceDir in emptySequenceDirs:
                emptySequenceDirs.remove(sequenceDir)
            sequenceFile = getTempFile(rootDir=sequenceDir, suffix=suffix)
            fileHandle = open(sequenceFile, 'w')
        if random.random() > 0.8:  #Get a new root sequence
            parentSequence = getRandomSequence(
                length=random.choice(list(range(1, 2 * avgSequenceLength))))[1]
        sequence = mutateSequence(parentSequence,
                                  distance=random.random() * 0.25)
        name = getRandomAlphaNumericString(15)
        if random.random() > 0.5:
            sequence = reverseComplement(sequence)
        fastaWrite(fileHandle, name, sequence)
        if random.random() > 0.5:
            fileHandle.close()
            fileHandle = None
            sequenceFile = None
        i += 1
    if fileHandle != None:
        fileHandle.close()

    logger.info("Made %s sequences in %s directories" %
                (sequenceNumber, len(sequenceDirs)))

    return sequenceDirs, newickTreeString
Ejemplo n.º 6
0
def getCactusInputs_random(regionNumber=0, tempDir=None,
                           sequenceNumber=None,
                           avgSequenceLength=None,
                           treeLeafNumber=None):
    """Gets a random set of sequences, each of length given, and a species
    tree relating them. Each sequence is a assigned an event in this tree.
    """
    if sequenceNumber is None:
        sequenceNumber = random.choice(xrange(30))
    if avgSequenceLength is None:
        avgSequenceLength = random.choice(xrange(1,3000))
    if treeLeafNumber is None:
        treeLeafNumber = random.choice(xrange(2, 4))
    #Make tree
    binaryTree = makeRandomBinaryTree(treeLeafNumber)
    newickTreeString = printBinaryTree(binaryTree, includeDistances=True)
    newickTreeLeafNames = []
    def fn(tree):
        if tree.internal:
            fn(tree.left)
            fn(tree.right)
        else:
            newickTreeLeafNames.append(tree.iD)
    fn(binaryTree)
    logger.info("Made random binary tree: %s" % newickTreeString)
    
    sequenceDirs = []
    for i in xrange(len(newickTreeLeafNames)):
        seqDir = getTempDirectory(rootDir=tempDir)
        sequenceDirs.append(seqDir)

    logger.info("Made a set of random directories: %s" % " ".join(sequenceDirs))

    #Random sequences and species labelling
    sequenceFile = None
    fileHandle = None
    parentSequence = getRandomSequence(length=random.choice(xrange(1, 2*avgSequenceLength)))[1]
    emptySequenceDirs = set(sequenceDirs)
    i = 0
    while i < sequenceNumber or len(emptySequenceDirs) > 0:
        #for i in xrange(sequenceNumber):
        if sequenceFile == None:
            if random.random() > 0.5: #Randomly choose the files to be attached or not
                suffix = ".fa.complete"
            else:
                suffix = ".fa"
            sequenceDir = random.choice(sequenceDirs)
            if sequenceDir in emptySequenceDirs:
                emptySequenceDirs.remove(sequenceDir)
            sequenceFile = getTempFile(rootDir=sequenceDir, suffix=suffix)
            fileHandle = open(sequenceFile, 'w')
        if random.random() > 0.8: #Get a new root sequence
            parentSequence = getRandomSequence(length=random.choice(xrange(1, 2*avgSequenceLength)))[1]
        sequence = mutateSequence(parentSequence, distance=random.random()*0.5)
        name = getRandomAlphaNumericString(15)
        if random.random() > 0.5:
            sequence = reverseComplement(sequence)
        fastaWrite(fileHandle, name, sequence)
        if random.random() > 0.5:
            fileHandle.close()
            fileHandle = None
            sequenceFile = None
        i += 1
    if fileHandle != None:
        fileHandle.close()

    logger.info("Made %s sequences in %s directories" % (sequenceNumber, len(sequenceDirs)))
    
    return sequenceDirs, newickTreeString