Esempio n. 1
0
def main():
	fastaCodes = helpers.getProtienCodesForFasta('E.Coli.txt')
	GIFile = open('GI.txt', 'a+')
	for fastaCode in fastaCodes:
		keyCode = fastaCode[0:4].lower()
		fastaURL = 'http://www.ncbi.nlm.nih.gov/protein/' + fastaCode + '?report=gilist&log$=seqview&format=text'
		print fastaURL
		codonFileName = 'protiens/' + keyCode + '/' + fastaCode + ".xml"
		if not os.path.exists(codonFileName):
			fastaRequest = urllib2.Request(fastaURL, None, headers={'User-Agent' : 'Mozilla/5.0'})
			fastaResponse = urllib2.urlopen(fastaRequest)
			GI = fastaResponse.read().split('<pre>')[1].split('</pre>')[0]
			print GI
			getCodon(keyCode, fastaCode, GI)
			GIFile.write(GI)

	GIFile.close()
Esempio n. 2
0
		CodonSeq = open('AA_CODONS.csv','r')
		reader = csv.reader(CodonSeq)
   		for row in reader:
   			if(row[0] == aa1):
   				k=1
   				while(k<7):
   					if(row[k] == codon1):
   						CodonSeq.close()
   						return True
   					k+=1	
   				CodonSeq.close()	
   				return False	 				  						
	finally:
		l=0

keyCodes = helpers.getProtienCodesForFasta('E.Coli.txt')

for keyCode in keyCodes:
	tempKeyCode = keyCode[0:4].lower()
	filename1 = 'protiens/' + tempKeyCode + '/' + tempKeyCode + '_*_AA_CODONS.txt'

	filenames = glob.glob(filename1)
	
	for filename in filenames:
		print filename
		CODON = open(filename, 'r')
		AA = CODON.readline()
		CS = next(CODON)
		CS = CS.upper()

Esempio n. 3
0
def driver():
	print helpers.getProtienCodesForFasta('E.Coli.txt')
def main():
    keycodes = helpers.getProtienCodesForFasta('E.Coli.txt')

    for keycode in keycodes:
        tempKeycode = keycode[0:4].lower()
        path = "protiens/"+tempKeycode+"/"+tempKeycode+"_*_GROUP.csv"
        filenames = glob.glob(path)
        for filename in filenames:
            infile = open(filename, 'r')
            reader = csv.reader(infile)

            d={}

            allCodons = []
            for codons in itertools.product('ACGT', repeat=9):
                s1 = ''.join(codons)
                s1 = s1[0]+s1[1]+s1[2]+'|'+s1[3]+s1[4]+s1[5]+'|'+s1[6]+s1[7]+s1[8]
                allCodons.append(s1)
                for structure in allStructures:
                	d[(s1,structure)] = 0


            count = 0
            for line in reader:
                if(count==0):
                     count+=1
                     continue
                if (line[2].rstrip(), line[1].rstrip()) in d:
                     d[(line[2].rstrip(), line[1].rstrip())] += 1
                     fin[(line[2].rstrip(), line[1].rstrip())] += 1
                     # print 'Incremented' + ' ' + line[2].rstrip() + ' ' + line[1].rstrip() + ' : ' + str(d[(line[2].rstrip(), line[1].rstrip())])

            listStructure = ['']
            for structure in allStructures:
                listStructure.append(structure)

            newfile = open('protiens/' + tempKeycode + '/' +'1kf6_'+filename[-13]+'_MATRIX.csv','w')
            matrixcsv = csv.writer(newfile)
            matrixcsv.writerow(listStructure)

            for codons in allCodons:
                listCodon=[]
                listCodon.append(codons)
                for structure in allStructures:
                	listCodon.append(d[(codons,structure)])
                    # print listCodon
                matrixcsv.writerow(listCodon)
            print filename


            listStructure = ['']
            for structure in allStructures:
                listStructure.append(structure)

            newfile = open('FINAL_MATRIX.csv','w')
            matrixcsv = csv.writer(newfile)
            matrixcsv.writerow(listStructure)

            for codons in allCodons:
                listCodon=[]
                listCodon.append(codons)
                for structure in allStructures:
                    listCodon.append(fin[(codons,structure)])
                    # print listCodon
                matrixcsv.writerow(listCodon)