def dbORF( inFileName, nb=0, size=0, outFileName="" ): inFile = open( inFileName ) if outFileName == "": outFileName = inFileName + ".orf.map" outFile = open( outFileName, "w" ) seq = Bioseq() seq_out = Bioseq() numseq = 0 while 1: seq.read( inFile ) if seq.sequence == None: break seq.upCase() numseq = numseq + 1 print 'sequence #',numseq,'=',seq.getLength(),'[',seq.header[0:40],'...]' orf = seq.findORF() best_orf = [] for i in orf.keys(): l = len(orf[i]) for j in xrange(1,l): start = orf[i][j-1] + 4 end = orf[i][j] + 3 if end - start >= size: best_orf.append( ( end-start, i+1, start, end ) ) seq.sequence = seq.complement() orf = seq.findORF() seqlen = seq.getLength() for i in orf.keys(): l = len(orf[i]) for j in xrange(1,l): start = seqlen - orf[i][j-1] - 3 end = seqlen - orf[i][j] - 2 if start - end >= size: best_orf.append( ( start-end, (i+1)*-1, start, end ) ) best_orf.sort() best_orf.reverse() l = len(best_orf) if nb > l or nb == 0 : nb = l for i in xrange(0,nb): print best_orf[i] outFile.write("%s\t%s\t%d\t%d\n"%("ORF|"+str(best_orf[i][1])+\ "|"+str(best_orf[i][0]),seq.header, best_orf[i][2],best_orf[i][3])) inFile.close() outFile.close() return 0
def dbTraduit(inFileName,phase=0,complement='T',pep_filename=""): """ deprecated """ file_db=open(inFileName) if pep_filename=="": pep_filename=inFileName+'.pep' file_pep=open(pep_filename,'w') seq=Bioseq() seq_out=Bioseq() numseq=0 while 1: seq.read(file_db) if seq.sequence==None: break numseq=numseq+1 print 'sequence #',numseq,'=',seq.getLength(),\ '[',seq.header[0:40],'...]' if phase>=0 : if phase==1 or phase==0 : seq_out.sequence=seq.traduit(1) seq_out.header=seq.header+" (phase 1)" seq_out.write(file_pep) if phase==2 or phase==0 : seq_out.sequence=seq.traduit(2) seq_out.header=seq.header+" (phase 2)" seq_out.write(file_pep) if phase==3 or phase==0 : seq_out.sequence=seq.traduit(3) seq_out.header=seq.header+" (phase 3)" seq_out.write(file_pep) if complement=='T' or phase<0 : seq.sequence=seq.complement() if phase==-1 or phase==0 : seq_out.sequence=seq.traduit(1) seq_out.header=seq.header+" (phase -1)" seq_out.write(file_pep) if phase==-2 or phase==0 : seq_out.sequence=seq.traduit(2) seq_out.header=seq.header+" (phase -2)" seq_out.write(file_pep) if phase==-3 or phase==0 : seq_out.sequence=seq.traduit(3) seq_out.header=seq.header+" (phase -3)" seq_out.write(file_pep) file_db.close() file_pep.close()
def dbComplement(inFileName,comp_filename=""): """ deprecated """ file_db=open(inFileName) if comp_filename=="": comp_filename=inFileName+'.comp' file_comp=open(comp_filename,'w') seq=Bioseq() numseq=0 while 1: seq.read(file_db) if seq.sequence==None: break numseq=numseq+1 print 'sequence #',numseq,'=',seq.getLength(),'[',seq.header[0:40],'...]' seq.sequence=seq.complement() seq.header=seq.header+" (complement!)" seq.write(file_comp) file_db.close() file_comp.close()