prefs = {'evalue': 0.001, 'outfmt_pref': 6} print "blasting,", if blast_mode == 'n': local_blastn_2file(infile, dbfile_path, outfile, prefs) elif blast_mode == 'tx': local_tblastx_2file(infile, dbfile_path, outfile, prefs) elif blast_mode == 'tn': local_tblastn_2file(infile, dbfile_path, outfile, prefs) except Exception: print "failed to blast" break else: print "finding range," try: # load blast results from file rec_array = read_array(outfile, blast_dtypes) except IOError: print "failed to load blast results" break except Exception: print "failed to load blast results (unknown error)" break else: print rec_array[0] # take first line (= best hit) only try: line = rec_array[0] except IndexError: print "empty blast results" break if line[8] < line[9]:
from sys import argv from Bio.SeqRecord import SeqRecord from libs.common import read_array, blast_dtypes, load_fasta, write_fasta, ensure_dir data_dir = "data/" + argv[1] + "/" main_in = data_dir + argv[2] + "_results.txt" main_out = data_dir + argv[2] + "_ctxt.fas" ctx_dir = data_dir + "context/" capture_span = int(argv[3]) ensure_dir([ctx_dir]) records = [] rec_array = read_array(main_in, blast_dtypes) descript = "Context of " + argv[2] + " (" + argv[3] + " bp either side)" for line in rec_array: query = line[0] subject = line[1] print subject rev_flag = False if line[8] < line[9]: q_start, q_stop = line[8] - 1, line[9] rev_flag = False else:
from libs.common import from_dir, read_array, blast_dtypes, load_fasta, write_fasta data_dir = "data/" + argv[1] + "/" blast_out_dir = "data/" + argv[1] + "/blast_out/" idp = int(argv[2]) main_out = open(data_dir + "comp_results.txt", 'w') records_dict = {} # list files in blast results directory filenames = from_dir(blast_out_dir, re.compile(r'.*\.txt.*')) for filename in filenames: counter = 0 # load text rec_array = read_array(blast_out_dir + filename, blast_dtypes) # parse lines for line in rec_array: # if idp is higher than spec'd: if line[2] > idp: query = line[0] subject = line[1] # write line to compiled results file main_out.write("\t".join([str(item) for item in line]) + "\n") outfile = data_dir + query + "_results.txt" if not path.exists(outfile): # create file out_handle = open(outfile, 'w') else: counter += 1 out_handle = open(outfile, 'a')
from sys import argv from Bio.SeqRecord import SeqRecord from libs.common import read_array, blast_dtypes, load_fasta, write_fasta, ensure_dir data_dir = "data/"+argv[1]+"/" main_in = data_dir+argv[2]+"_results.txt" main_out = data_dir+argv[2]+"_ctxt.fas" ctx_dir = data_dir+"context/" capture_span = int(argv[3]) ensure_dir([ctx_dir]) records = [] rec_array = read_array(main_in, blast_dtypes) descript = "Context of "+argv[2]+" ("+argv[3]+" bp either side)" for line in rec_array: query = line[0] subject = line[1] print subject rev_flag = False if line[8] < line[9]: q_start, q_stop = line[8]-1, line[9] rev_flag = False else:
else: # tblastn against the reference DB infile = data_dir + 'temp.fas' outfile = data_dir + 'temp.txt' write_fasta(infile, SeqRecord(rec.seq.translate(), id='temp')) prefs = {'evalue': 0.001, 'outfmt_pref': 6} try: local_tblastn_2file(infile, db_path, outfile, prefs) except Exception: print "failed to blast" exit() # parse output -- take only first hit try: hit = read_array(outfile, blast_dtypes)[0] except IndexError: print '-', # add record to DB as new symbol sym_cnt += 1 symbol = 'N' + str(sym_cnt) rec.id = symbol symbolDB[symbol] = [rec] g_vector.append(symbol) init_DB = True else: if hit[2] > threshold: print '+', # add record to DB as secondary match symbol = hit[1] rec.id = symbol + '_' + str(
from libs.common import from_dir, read_array, blast_dtypes, load_fasta, write_fasta data_dir = "data/"+argv[1]+"/" blast_out_dir = "data/"+argv[1]+"/blast_out/" idp = int(argv[2]) main_out = open(data_dir+"comp_results.txt", 'w') records_dict = {} # list files in blast results directory filenames = from_dir(blast_out_dir, re.compile(r'.*\.txt.*')) for filename in filenames: counter = 0 # load text rec_array = read_array(blast_out_dir+filename, blast_dtypes) # parse lines for line in rec_array: # if idp is higher than spec'd: if line[2] > idp: query = line[0] subject = line[1] # write line to compiled results file main_out.write("\t".join([str(item) for item in line])+"\n") outfile = data_dir+query+"_results.txt" if not path.exists(outfile): # create file out_handle = open(outfile, 'w') else: counter +=1 out_handle = open(outfile, 'a')
else: # tblastn against the reference DB infile = data_dir+'temp.fas' outfile = data_dir+'temp.txt' write_fasta(infile, SeqRecord(rec.seq.translate(), id='temp')) prefs = {'evalue': 0.001, 'outfmt_pref': 6} try: local_tblastn_2file(infile, db_path, outfile, prefs) except Exception: print "failed to blast" exit() # parse output -- take only first hit try: hit = read_array(outfile, blast_dtypes)[0] except IndexError: print '-', # add record to DB as new symbol sym_cnt +=1 symbol = 'N'+str(sym_cnt) rec.id = symbol symbolDB[symbol] = [rec] g_vector.append(symbol) init_DB = True else: if hit[2] > threshold: print '+', # add record to DB as secondary match symbol = hit[1] rec.id = symbol+'_'+str(len(symbolDB[symbol])+1)