def rows(): '''takes xml blast file and returns a generator of best pairwise results ''' hg=B.ncbiXmlHitGenerator(iFile) pairCache={} canCache={} i=0 for rec in hg: i+=1 seqID=int(rec['def']) #1st variable foundandidate_Sequence_Energy canName=rec['query-def'] #2nd variable found qseq=rec['hsps'][0]['qseq'] hseq=rec['hsps'][0]['hseq'] rqseq=B.reverseComplement(qseq) en=A.energy(hseq,rqseq) #3rd variable found try: canID=canCache[canName] except KeyError: canCache[canName]=Candidate(canName).ID() canID=canCache[canName] #timesaver so we only have to look it up once try: if en < pairCache[canID,seqID]: pairCache[canID,seqID]=en except KeyError: pairCache[canID,seqID]=en #store the most negative value for the pair for canID,seqID in pairCache.iterkeys(): rv=[canID,seqID,pairCache[canID,seqID]] yield rv
def rows(): ''' ''' canCache={} pairCache={} lastCanName='' hg=B.ncbiXmlHitGenerator(iFile) i=0 for rec in hg: i+=1 seqID=int(rec['def']) #1st variable found canName=rec['query-def'] #2nd variable found if canName!=lastCanName: for tID,canID in pairCache.iterkeys(): min_E=min(pairCache[tID,canID]) max_E=max(pairCache[tID,canID]) mean_E=N.mean(pairCache[tID,canID]) SD_E=N.std(pairCache[tID,canID]) N_E=0 N_E30=0 N_E40=0 N_E50=0 N_E100=0 for ener in pairCache[tID,canID]: N_E+=1 if ener<=-30: N_E30+=1 if ener<=-40: N_E40+=1 if ener<=-50: N_E50+=1 if ener<=-100: N_E100+=1 rv=[canID,tID,min_E,max_E,mean_E,SD_E,N_E,N_E30,N_E40, N_E50,N_E100] yield rv pairCache={} lastCanName=canName #stores and refreshes dictionary #once we have moved onto next pair try: canID=canCache[canName] except KeyError: canCache[canName]=Candidate(canName).ID() canID=canCache[canName] #timesaver so we only have to look it up once if canID%4!=0: continue #hit=str(rec) #3rd variable found qseq=rec['hsps'][0]['qseq'] hseq=rec['hsps'][0]['hseq'] rqseq=B.reverseComplement(qseq) en=A.energy(hseq,rqseq) #3rd variable found for t in Sequence(seqID).taxa(): tID=t.ID() try: pairCache[tID,canID].append(en) except KeyError: pairCache[tID,canID]=[en] #checks to see if energy is most negative for the pair #replaces old result if it is for tID,canID in pairCache.iterkeys(): min_E=min(pairCache[tID,canID]) max_E=max(pairCache[tID,canID]) mean_E=N.mean(pairCache[tID,canID]) SD_E=N.std(pairCache[tID,canID]) N_E=0 N_E30=0 N_E40=0 N_E50=0 N_E100=0 for ener in pairCache[tID,canID]: N_E+=1 if ener<=-30: N_E30+=1 if ener<=-40: N_E40+=1 if ener<=-50: N_E50+=1 if ener<=-100: N_E100+=1 rv=[canID,tID,min_E,max_E,mean_E,SD_E,N_E,N_E30,N_E40, N_E50,N_E100] yield rv