Beispiel #1
0
 def removeRestrictionSites(self, sourceSeq, optimizedSeq, restrictionSites):
     '''
     get the best sequence that does not contain any restriction sites
     by substituting codons in the optimizedSeq in a shortest-paths manner
     '''
     
     checkedSeqs = set()
     worklist = list()
     
     restrictionSites = SeqUtils.expandAmbiguousMult(restrictionSites)
     
     heapq.heappush(worklist, (self.scoreSequence(sourceSeq, optimizedSeq), optimizedSeq))
     
     while len(worklist) > 0:
         
         tScore, tSeq = heapq.heappop(worklist)
         restrictionLocations = SeqUtils.searchSubseqs(tSeq, restrictionSites)
         restrictionCodons = SeqUtils.getCodonsForRanges(restrictionLocations)
         
         if not restrictionCodons:
             
             return tSeq
         else:
             checkedSeqs.add(tSeq)
             possibleChanges = self.getPossibleOneStepChanges(sourceSeq, tSeq, restrictionCodons)
             if possibleChanges:
                 for tNewSeq in possibleChanges:
                     if not tNewSeq in checkedSeqs:
                         heapq.heappush(worklist, (self.scoreSequence(sourceSeq, tNewSeq), tNewSeq))
     
     return None
Beispiel #2
0
 def __init__(self,parent,urls):
     self.pairs=False
     self.parent=parent
     self.files=SeqUtils.Filename(urls[0])
     self.basename=self.files.ID
     self.aln="_".join([self.parent.basename,self.basename])
     self.dir=os.path.join(self.parent.dir,self.basename)
     if nonexist(self.dir):
         os.mkdir(self.dir)
Beispiel #3
0
 def getBestSequence(self, sourceSequence):
     sourceCodons = re.findall('...', sourceSequence)
     remainder = SeqUtils.getRemainderSuffix(sourceSequence)
     
     result = ""
     for co in sourceCodons:
         result += self.getBestCodon(co)
         
     result += remainder
     return result
Beispiel #4
0
 def SequenceToPrint(self, seq, restrictionSites, source=True):
     '''
     return tuples of the form (codon, usage, isRestrictionSite) for the sequence
     '''
     codons = re.findall('...', seq)
     remainder = SeqUtils.getRemainderSuffix(seq)
     
     result = list()
     restrictionSites = SeqUtils.expandAmbiguousMult(restrictionSites)
     restrictionLocations = SeqUtils.searchSubseqs(seq, restrictionSites)
     restrictionCodons = SeqUtils.getCodonsForRanges(restrictionLocations)
     
     for i in range(len(codons)):
         usage = self.sourceCU.getCodonRelativeUsage(codons[i]) if source else self.targetCU.getCodonRelativeUsage(codons[i])
         result.append((codons[i], usage, i in restrictionCodons))
     
     if remainder:
         result.append((remainder, None, None))
     
     return result
Beispiel #5
0
 def __init__(self,parent,urls):
     self.files=list()
     for url in urls:
         self.files.append(SeqUtils.Filename(url))
     self.pairs=True
     self.urls=list(map(os.path.abspath,urls))
     self.parent=parent
     self.basename=self.files[0].ID
     self.aln="_".join([self.parent.basename,self.basename])
     self.dir=os.path.join(self.parent.dir,self.basename)
     if nonexist(self.dir):
         os.mkdir(self.dir)
Beispiel #6
0
 def removeRestrictionSites(self, sourceSeq, optimizedSeq, restrictionSites):
     '''
     get the best sequence that does not contain any restriction sites
     by re-randomizing until no restriction site remains
     '''
     
     codons = re.findall('...', optimizedSeq)
     remainder = SeqUtils.getRemainderSuffix(optimizedSeq)
     
     restrictionSites = SeqUtils.expandAmbiguousMult(restrictionSites)
     restrictionLocations = SeqUtils.searchSubseqs(optimizedSeq, restrictionSites)
     restrictionCodons = SeqUtils.getCodonsForRanges(restrictionLocations)
     
     # try to re-randomize a finite amount of times
     ITERMAX = 10000
     iteration = 0
     
     while iteration < ITERMAX:
         
         for i in restrictionCodons:
             codons[i] = self.getRandomOptimizedCodon(codons[i])
             
         tSeq = "".join(codons) + remainder
         
         tRL = SeqUtils.searchSubseqs(tSeq, restrictionSites)
         tRC = SeqUtils.getCodonsForRanges(tRL)
         
         if not tRC:
             return tSeq
             
         iteration += 1
     
     return None
 
     
Beispiel #7
0
 def getPossibleOneStepChanges(self, sourceSeq, optimizedSeq, codonsToConsider):
     '''
     get all possible sequences resulting from substituting the codons to consider
     in the optimized Seq with the next best codons
     '''
     sourceCodons = re.findall('...', sourceSeq) # list of codons
     optCodons = re.findall("...", optimizedSeq)
     remainder = SeqUtils.getRemainderSuffix(optimizedSeq)
     
     res = list()
     
     for i in range(len(optCodons)):
         if i in codonsToConsider:
             nextBestCodon = self.getNextBestCodon(sourceCodons[i], optCodons[i])
             if nextBestCodon:
                 tCodons = optCodons
                 tCodons[i] = nextBestCodon
                 tSeq = "".join(tCodons)
                 tSeq += remainder
                 res.append(tSeq)
     
     return res
Beispiel #8
0
args=vars(ap.parse_args() )

def GC_chunk(size, step):
    	"""
	In theory, will create a sequence, chunk it, then calculate GC content for each chunk
	In practice...
    	"""
	bases = 'ATGC'
    seq = ''.join(random.choice(bases) for i in range(args["LENGTH"])
	return seq

    gc = []
    #Initializes list
	
    seq = seq_rec.seq
    print SeqUtils.GC(seq)
	#Converts to sequence record to be read by SeqUtils
    
    for i in xrange(0, len(seq), step):
        s = seq[i:i+step].upper()
        a = s.count('A')
        c = s.count('C')
        g = s.count('G')
        t = s.count('T')
        if a+c+g+t > 0:
            gc.append((g+c)/float(a+c+g+t))
        else:
            gc.append(0.0)
	"""
	For every step, here 1000/100, count number of nucleotides, then divide GC content by all nucleotides
	"""