def main(): fastaCodes = helpers.getProtienCodesForFasta('E.Coli.txt') GIFile = open('GI.txt', 'a+') for fastaCode in fastaCodes: keyCode = fastaCode[0:4].lower() fastaURL = 'http://www.ncbi.nlm.nih.gov/protein/' + fastaCode + '?report=gilist&log$=seqview&format=text' print fastaURL codonFileName = 'protiens/' + keyCode + '/' + fastaCode + ".xml" if not os.path.exists(codonFileName): fastaRequest = urllib2.Request(fastaURL, None, headers={'User-Agent' : 'Mozilla/5.0'}) fastaResponse = urllib2.urlopen(fastaRequest) GI = fastaResponse.read().split('<pre>')[1].split('</pre>')[0] print GI getCodon(keyCode, fastaCode, GI) GIFile.write(GI) GIFile.close()
CodonSeq = open('AA_CODONS.csv','r') reader = csv.reader(CodonSeq) for row in reader: if(row[0] == aa1): k=1 while(k<7): if(row[k] == codon1): CodonSeq.close() return True k+=1 CodonSeq.close() return False finally: l=0 keyCodes = helpers.getProtienCodesForFasta('E.Coli.txt') for keyCode in keyCodes: tempKeyCode = keyCode[0:4].lower() filename1 = 'protiens/' + tempKeyCode + '/' + tempKeyCode + '_*_AA_CODONS.txt' filenames = glob.glob(filename1) for filename in filenames: print filename CODON = open(filename, 'r') AA = CODON.readline() CS = next(CODON) CS = CS.upper()
def driver(): print helpers.getProtienCodesForFasta('E.Coli.txt')
def main(): keycodes = helpers.getProtienCodesForFasta('E.Coli.txt') for keycode in keycodes: tempKeycode = keycode[0:4].lower() path = "protiens/"+tempKeycode+"/"+tempKeycode+"_*_GROUP.csv" filenames = glob.glob(path) for filename in filenames: infile = open(filename, 'r') reader = csv.reader(infile) d={} allCodons = [] for codons in itertools.product('ACGT', repeat=9): s1 = ''.join(codons) s1 = s1[0]+s1[1]+s1[2]+'|'+s1[3]+s1[4]+s1[5]+'|'+s1[6]+s1[7]+s1[8] allCodons.append(s1) for structure in allStructures: d[(s1,structure)] = 0 count = 0 for line in reader: if(count==0): count+=1 continue if (line[2].rstrip(), line[1].rstrip()) in d: d[(line[2].rstrip(), line[1].rstrip())] += 1 fin[(line[2].rstrip(), line[1].rstrip())] += 1 # print 'Incremented' + ' ' + line[2].rstrip() + ' ' + line[1].rstrip() + ' : ' + str(d[(line[2].rstrip(), line[1].rstrip())]) listStructure = [''] for structure in allStructures: listStructure.append(structure) newfile = open('protiens/' + tempKeycode + '/' +'1kf6_'+filename[-13]+'_MATRIX.csv','w') matrixcsv = csv.writer(newfile) matrixcsv.writerow(listStructure) for codons in allCodons: listCodon=[] listCodon.append(codons) for structure in allStructures: listCodon.append(d[(codons,structure)]) # print listCodon matrixcsv.writerow(listCodon) print filename listStructure = [''] for structure in allStructures: listStructure.append(structure) newfile = open('FINAL_MATRIX.csv','w') matrixcsv = csv.writer(newfile) matrixcsv.writerow(listStructure) for codons in allCodons: listCodon=[] listCodon.append(codons) for structure in allStructures: listCodon.append(fin[(codons,structure)]) # print listCodon matrixcsv.writerow(listCodon)