예제 #1
0
def query_seqs_in_file(filename, new_dir_path):
    print "printing out sequences!"
    try:
        os.stat(new_dir_path)
    except:
        os.mkdir(new_dir_path)

    records = SeqIO.parse(filename, "fastq")
    i = 0
    file_names=[]
    json_dest = new_dir_path+"data.json"
    for record in records:
        print "printing out a sequence!"
        print "%s: %d" % (record.id, len(record.seq))
        xml_obj = blastquery.query(record.seq)
#        print str(xml_obj.read())
        xml_dest=new_dir_path+"file_"+str(i)+".xml"
        with open(xml_dest,'w') as open_file:
            open_file.write(xml_obj.read())
        open_file.close()
        file_names.append(xml_dest)
        species_counts = classifier.getSpeciesFromXMLs(file_names,0)
        with open(json_dest,'w') as open_file:
            json.dump(species_counts,open_file)
        open_file.close()
        i+=1
예제 #2
0
파일: main.py 프로젝트: blt2114/gen_snacks
import os

from classifier import getSpeciesFromXMLs
from confusionMat import getConfusionMatrix

# Python's open() assumes current working directory, so relative path is okay for this call.
# Otherwise, absolute path is needed.
xmls = []
for file in os.listdir(os.getcwd()):
	if file.endswith(".xml"):
		xmls.append(file)

speciesDict = getSpeciesFromXMLs(xmls,1)

confusionMat = getConfusionMatrix("Salmo salar", xmls, speciesDict)

# Prints out answer to question 3
for key in confusionMat:
	print key + " : " + str(confusionMat[key])