def analyseRecombindationResults(self,oligoReport,featureList,outputFile="oligo_recomb_analysis.txt"): result = Report() result.extend(oligoReport) rowNames = oligoReport.returnRowNames() recordAlignment = SeqRecord(Seq("")) recordAlignment.features = featureList for rName in rowNames: output.write(rName + "\n") start = float(oligoReport["genomic_start"][rName]) end = float(oligoReport["genomic_end"][rName]) oligo = oligoReport["best"][rName] keyString = "\n(%s,%s) %s\n\n" % (start,end,oligo) output.write(keyString) subFeatures = self._selectFeatures(featureList,start,end) index = 0 for feature in subFeatures: matchName = "match_" + str(index) id = feature.id qValue = feature.qualifiers["query"] sValue = feature.qualifiers["subject"] matchString = feature.qualifiers["alignment"] output.write(id + "\n") output.write(matchString + "\n\n") result.add(rName,matchName,qValue) index += 1 output.close() return result
def geneControlReport(self,model,controlMap,outputName): report = Report() geneMap = model.controlMap for (target,controls) in geneMap.keys(): for control in controls: report.add(target,control,"control") writer = ReportWriter() writer.setFile(outputName) writer.write(report) writer.closeFile() return None
def parseGenericReport(self, fileName, keyTag=None, header = None, unique = True): ''' @var fileName: name of flat (delimited) in text format to be parsed @type fileName: String @var keyTag: ID of column to be used for report key row @type keyTag: String @summary: Primary Function Parses flat file returns report object. ''' result = Report() kIndex = None lines = open( fileName, 'r' ) index = 0 for line in lines: if self.isComment( line ): pass elif self.endLine < index < self.startLine: index += 1 continue elif index == self.headerLine: header = self.parseHeader( line, unique ) if keyTag in header: kIndex = header.index(keyTag) elif self.endLine > index > self.startLine: line = line.replace('\n','') sLine =self._safeSplit(line) if kIndex != None: rName = sLine[kIndex] else: rName = str(index) for i in range(len(sLine)): if i != kIndex: cName = header[i] value = sLine[i] result.add(rName,cName,value) index += 1 lines.close() return result
def enzymeBoundaryControl(model,targets,bounds,objectiveName,productionName,searchSize): targets = bounds.keys() controlNames = model.getControlsForNames(targets) eControlMap = model.getEnzymeControlMap() controlSubSets = combinations(controlNames,searchSize) xcontrolSubSets = set(controlSubSets) result = Report() iter = 0 for icontrolNames in xcontrolSubSets: ibounds = {} controlTag = '' ienzymeNames = model.annotateGeneList(list(icontrolNames)) ienzymeNames.sort() for ieName in ienzymeNames: controlTag += "(%s)" % (ieName) for icontrolName in icontrolNames: itargets = eControlMap[icontrolName] for itarget in itargets: if itarget in bounds.keys(): ibound = bounds[itarget] ibounds[itarget] = ibound #print ("control %s => [%s]") % (icontrolName,ibounds) pass if len(ibounds) == 0: continue (fluxMap,oflux,pflux) = findBoundaryProduction(model, ibounds, None, objectiveName, productionName) print "[%s] objective %s => production %s" % (controlTag, oflux, pflux) iter += 1 #iEnzymeNames = model.annotateGeneList(icontrolNames) #for icontrolName in iEnzymeNames: # result.add(iter, icontrolName, "active") result.add(controlTag,"natural",oflux) result.add(controlTag,"production",pflux) return result
def parseAlignments(self,records,featureLocations): result = Report() index = 0 logFile = open("oligoLog.txt","w") targetMap = {} for r in records: id = r.id features = featureLocations[index] hits = len(features) genomicStart = features[0].location.start.position genomicEnd = features[0].location.end.position genomicStrand = features[0].strand laggingComplementStrand = self.strandChooser(features[0]) if "alignment" in features[0].qualifiers.keys(): aMatch = features[0].qualifiers["alignment"] else: aMatch = '' targetMap[id] = (genomicStart + genomicEnd)/2 logFile.write(id+"\n") logFile.write(aMatch+"\n") #if self.verbose: print aMatch s = str(r.seq) originalString = s result.add(id,"original", originalString) result.add(id,"hits", hits) result.add(id,"genomic_start", genomicStart) result.add(id,"genomic_end", genomicEnd) result.add(id,"genomic_strand", genomicStrand) result.add(id,"match", aMatch) return result
def getControlReport(self,genes,targetRecord,sequence,boundary=0,range=1000): ''' Create report of control regions for listed genes ''' result = Report() seqProp = RecombinationOligoFactory() sdata = str(sequence).lower() promoters = {} locations = {} arrow= ["<-","-","->"] if self.verbose: print "finding local features" localFeatures = self.localFeatures(genes,targetRecord,range=range) if self.verbose: print "features found, creating report" for feature in genes: name = feature.qualifiers["gene"][0] locTag = feature.qualifiers["locus_tag"][0] start = feature.location.start.position end = feature.location.end.position strand = feature.strand result.add(name,"locus_tag",locTag) result.add(name,"gene_start",start) result.add(name,"gene_end",end) result.add(name,"gene_strand",strand) if strand == 1: loc = start if strand == -1: loc = end rbsSeq = self.getSequenceRegion(sdata, loc, 3, boundary, strand) oligoStrandRbs = seqProp.strandChooser(feature) result.add(name,"rbs_start",loc) result.add(name,"rbs_region",rbsSeq) result.add(name,"rbs_Oligo_Strand",str(oligoStrandRbs)) iLocalFeatures = localFeatures[name] count = 0 for tFeature in iLocalFeatures: count = count + 1 tName = tFeature.qualifiers["gene"][0] tStart = tFeature.location.start.position tEnd = tFeature.location.end.position tStrand = tFeature.strand oligoStrand = seqProp.strandChooser(tFeature) iArrow = arrow[tStrand + 1] if tStrand == strand: tSize = -1*tStrand pSeq = self.getSequenceRegion(sdata, tStart, tSize, boundary, strand) tag = "%s:[%s %s %s] t[%s] = %s" % (tName,tStart,iArrow,tEnd,oligoStrand,pSeq) colName = "promoter_%s" % (count) result.add(name,colName,tag) return result
def findPrimers(self,seq,targetMap,boundary,oligoSize,searchSize,targetTm): ''' @input targetMap: a list of genomic locations with sequence names find a list of sequencing primers for a list of target locations usage: set boundary and oligo size read in report, list of locations run find sequence primers. write report of result ''' result = Report() for k in targetMap.keys(): targetLocation = targetMap[k] upLocation = targetLocation - boundary upStart = targetLocation - boundary - searchSize - oligoSize upEnd = targetLocation - boundary + searchSize upSeq = seq[upStart:upEnd] downLocation = targetLocation + boundary downStart = targetLocation + boundary - searchSize downEnd = targetLocation + boundary + searchSize + oligoSize downSeq = seq[downStart:downEnd] downSeq = downSeq.reverse_complement() start = searchSize (uTm,uAdjust,oUpSeq) = self.scanOligoTm(upSeq,start,oligoSize,searchSize,targetTm) (dTm,dAdjust,oDownSeq) = self.scanOligoTm(downSeq,start,oligoSize,searchSize,targetTm) dAdjust = -dAdjust ucLocation = upLocation + uAdjust dcLocation = downLocation + dAdjust result.add(k,"sequencing location",targetLocation) result.add(k,"foward primer",oUpSeq) result.add(k,"foward adjust",uAdjust) result.add(k,"foward location",ucLocation) result.add(k,"foward TM",uTm) result.add(k,"reverse primer",oDownSeq) result.add(k,"reverse adjust",dAdjust) result.add(k,"reverse location",dcLocation + dAdjust) result.add(k,"reverse TM",dTm) return result
def generateTargetingOligos(self, records, featureLocations, tagRE, boundary, searchSize, cutOff): ''' Generate a list of oligos for recombination in target locations and return report with the targets and sequencing oligos All oligos printed 5' -> 3' Control upstream will be to the left if strands are preserved and to the right if strands are switched when matching lagging complement oligos are selected discovered as an optimized subsection of the presented sequences @records: sequences from which to select oligos @featureLocations: a list of locations that place the features in a genome @tagRE: regular expression for finding taged sequence with in feature sequences. @bounary: oligo flanking region size @searchSize: distance in base pairs to search for optimal oligo @cutOff: limit of viable fold change energy for chose oligos. ''' result = Report() index = 0 logFile = open("oligoLog.txt","w") targetMap = {} sRegions = [] for r in records: id = r.id features = featureLocations[index] hits = len(features) genomicStart = features[0].location.start.position genomicEnd = features[0].location.end.position genomicStrand = features[0].strand laggingComplementStrand = self.strandChooser(features[0]) if "alignment" in features[0].qualifiers.keys(): aMatch = features[0].qualifiers["alignment"] else: aMatch = '' targetMap[id] = (genomicStart + genomicEnd)/2 logFile.write(id+"\n") logFile.write(aMatch+"\n") #if self.verbose: print aMatch s = str(r.seq) originalString = s #Find larget section using special targeting tag matchTag = re.search(tagRE,s) if matchTag == None: tagLoc = len(s)/2 else: targetTag = matchTag.group(0) tagLoc = s.index(targetTag) + len(targetTag)/2 if self.verbose: print "Target [%s] location [%s]" % (targetTag,tagLoc) start = int(tagLoc - boundary) end = int(tagLoc + boundary) #!May need a little touch up if start < 0: start = 0 end = int(boundary*2) if end > len(s): end = len(s) if self.verbose: print"region %s -> %s of %s" % (start,end,len(s)) lowSearchBound = start - searchSize - 5 highSearchBound = end + searchSize + 5 if lowSearchBound < 0: lowSearchBound = 0 if highSearchBound > len(s): highSearchBound = len(s) if genomicStrand == laggingComplementStrand: s = r.seq[lowSearchBound:highSearchBound] sx = r.seq[start:end] else: s = r.seq[lowSearchBound:highSearchBound].reverse_complement() sx = r.seq[start:end].reverse_complement() searchEnd = (end-start+searchSize+5) searchStart = searchSize + 5 try: testSeqs = self.optimizeSecondaryStructure(s, searchStart, searchEnd, searchSize, cutOff, ratio = 2) testSeqs.sort() if len(testSeqs) == 0: (score,adjust,foldScore,bestSeq) = ("na","na","na","na") else: (score,adjust,foldScore,bestSeq) = testSeqs.pop(0) except: testSeqs = [] (score,adjust,foldScore,bestSeq) = ("na","na","na",sx) print "failed to exicute secondary structure test" #print "[%s]" % (s) #bestSeq = self.addStars(bestSeq,self.stars) if len(bestSeq) < boundary*2: print "Short Sequence" if self.verbose: print "%s best %s S:%s [%s] (%s)" % (len(testSeqs), adjust, score, foldScore, len(bestSeq)) if self.verbose: print "[%s]" % (bestSeq) result.add(id,"original", originalString) result.add(id,"hits", hits) result.add(id,"genomic_start", genomicStart) result.add(id,"genomic_end", genomicEnd) result.add(id,"genomic_strand", genomicStrand) result.add(id,"lagging_complement_strand", laggingComplementStrand) result.add(id,"best", bestSeq) result.add(id,"fold score", foldScore) result.add(id,"off center", adjust) #append to list of sequence regions sRegion = r sRegion.seq = sx sRegions.append(sRegion) index = index + 1 logFile.close() return (targetMap,result,sRegions)