def filter_potential_sines(in_fname, sine_string, sine_header=67, maxerr=19, reverse_complement=False): """ Finds candidate SINEs with a certain distance from a prefix length. To be used for preliminary screening (input for later steps). """ with gene_lib.open_compressed(in_fname, 'rt') as in_file_handle: records = SeqIO.parse(in_file_handle, format="fastq") re = tre.compile(sine[:sine_header], tre.EXTENDED) fuzziness = tre.Fuzzyness(maxerr=maxerr) for rec in records: if reverse_complement: cur_seq = rec.seq.reverse_complement() else: cur_seq = rec.seq match = re.search(str(cur_seq), fuzziness) if match: # log(rec.seq) #sine_location = match.groups() #returns tuple of tuples (in this case: ((2,78), ) for example SeqIO.write(rec, sys.stdout, 'fastq')
def check(self, path): """ the real check """ super(UniventionPackageCheck, self).check(path) fz = tre.Fuzzyness(maxerr=2) pt = tre.compile("\<univention\>", tre.EXTENDED | tre.ICASE) for fn in uub.FilteredDirWalkGenerator(path): fd = open(fn, 'r') try: for lnr, line in enumerate(fd, start=1): origline = line if UniventionPackageCheck.RE_WHITELINE.match(line): continue pos = 0 while True: m = pt.search(line[pos:], fz) if m: if not UniventionPackageCheck.RE_WHITEWORD.match( m[0]): self.debug('%s:%d: found="%s" origline="%s"' % (fn, lnr, m[0], origline)) self.addmsg( '0015-2', 'univention is incorrectly spelled: %s' % m[0], filename=fn, line=lnr) pos += m.groups()[0][1] else: break finally: fd.close()
def __init__(self, regex, target_group=0, maxerr=1, caseSensitive=True): self.regex = regex self.target_group = target_group self.fuzzyness = tre.Fuzzyness(maxerr = maxerr) if not caseSensitive: self.r = tre.compile(regex, tre.ICASE | tre.EXTENDED) else: self.r = tre.compile(regex, tre.EXTENDED)
def filter_potential_sines(records, sine_pattern, sine_header=67, maxerr=14): re = tre.compile(sine_pattern[:sine_header], tre.EXTENDED) fuzziness = tre.Fuzzyness(maxerr=maxerr) for rec in records: match = re.search(str(rec.seq), fuzziness) if match: yield rec
def search_cdr3_seq(self, seq, mmaxerr, end_to_end=True): for maxerr in range(0, 1 + mmaxerr): fuzzyness = tre.Fuzzyness(maxerr=maxerr) for p in (self.patterns_end_to_end if end_to_end else self.patterns_any): if p[1].search(seq, fuzzyness): return p[0] return np.nan
def new_SINES_filter_for_histogram(in_file_initial_filtering, main_dict, noDuplicate, distribution_of_neighbors, lenght, key_size=9, maxerr=3): fuzziness = tre.Fuzzyness(maxerr=maxerr) # Create slave processes with open_any(in_file_initial_filtering, "rt") as handle_read_initial_filtering: records = gene_records_parse(handle_read_initial_filtering) #q = queue.Queue() new_SINES_filter_proc_histogram(records, main_dict, noDuplicate, key_size, fuzziness, distribution_of_neighbors, lenght)
def new_SINES_filter_for_graph(in_file_initial_filtering, main_dict, i=0, key_size=9, maxerr=3): fuzziness = tre.Fuzzyness(maxerr=maxerr) with open_compressed(in_file_initial_filtering, "rt") as handle_read_initial_filtering: records = gene_records_parse(handle_read_initial_filtering) new_SINES_filter_proc_graph(records, main_dict, key_size, fuzziness, i)
def getNTweetsWithFuzz(locations, numTweets, fuzz): """ Give this function like 500 results if numTweets is 200 so it has enough to pull entries with the requested fuzz/cost. We use this function to check the precision of the algorithm at various edit distances. These will be unique matches, """ global fz fz = tre.Fuzzyness(maxerr=fuzz) res = [] print print("Starting search...") startTime = time.time() pool = Pool(16) seen = [] while len(res) < numTweets: # Pass the function chunks until it finds enough hits with the requested cost. randomLocations = [ locations.pop(random.randrange(len(locations))) for _ in xrange(numTweets * 100) ] print("Testing {} random locations.".format(len(randomLocations))) res = res + mainSearch(randomLocations, fuzz) # Filter matches we've already had and items of the wrong cost. new = [] for r in res: if r["match"] not in seen and r["cost"] == fuzz: new.append(r) seen.append(r["match"]) res = new print("Number of tweets at cost {} found so far: {}".format( fuzz, len(res))) print("--- %s seconds ---" % (time.time() - startTime)) return res[:numTweets]
def filter_potential_sines_and_locations(in_file_unify, in_file_sine, out_file_with_sine, out_file_location, sine_header=67, maxerr=14): sine = gene_lib.get_sine_forward(in_file_sine) #"B1.fasta" re = tre.compile(sine[:sine_header], tre.EXTENDED) fuzziness = tre.Fuzzyness(maxerr=maxerr) with open_compressed(in_file_unify, "rt") as handle_read, \ open_compressed(out_file_with_sine, "wt") as handle_write_sine,\ open_compressed(out_file_location, "wt") as handle_write_loc: records = gene_records_parse(handle_read) rec_i = 0 filter_potential_sines_and_locations_proc(records, re, fuzziness, handle_write_sine, handle_write_loc)
def mainSearch(locations, fuzz): """ Main functionality. Tests all 1.3 million locations against the small tweet file. Majority of study was conducted on this function. """ # Creating the fuzziness object. This maxerr represents the max local edit distance. global fz # The global fz prevents having to pass fz to checkLocations each call in pool.map fz = tre.Fuzzyness(maxerr=fuzz) print print("Starting search...") startTime = time.time() pool = Pool(16) res = pool.map(checkLocations, chunkGen(locations)) res = [item for sublist in res for item in sublist] # Flattening the list of dicts. print("--- %s seconds ---" % (time.time() - startTime)) return res
def showResult(file_centers,in_file_sine,sine_header=67, maxerr=19): sum = 0 hist = {} sine = gene_lib.get_sine_forward(in_file_sine) # "B1.fasta" re = tre.compile(sine[:sine_header], tre.EXTENDED) stringSine=sine print ('original sine',stringSine) fuzziness = tre.Fuzzyness(maxerr=maxerr) with open(file_centers, "r") as centerFile: for line in centerFile: currentLine = line.strip() # re2 = tre.compile(currentLine, tre.EXTENDED) # match = re2.search(stringSine, fuzziness) match = re.search(currentLine, fuzziness) sine_location=match.groups() # print (sine_location) # print ('current center', currentLine) # print ('match sine', str(sine[sine_location[0][0] :sine_location[0][1]])) # print ('current center', nltk.edit_distance(sine[sine_location[0][0] :sine_location[0][1]],currentLine)) hist[nltk.edit_distance(stringSine[sine_location[0][0] :sine_location[0][1]],currentLine)] = hist.get(nltk.edit_distance(stringSine[sine_location[0][0] :sine_location[0][1]],currentLine), 0) + 1 sum = sum + nltk.edit_distance(stringSine[sine_location[0][0] :sine_location[0][1]],currentLine) print(sum/1000) print(sorted(hist.items()))
def filter_potential_sines_and_locations(in_file_unify, in_file_sine, out_file_with_sine, out_file_location, sine_header=67, maxerr=14): sine = gene_lib.get_sine_forward(in_file_sine) # "B1.fasta" re = tre.compile(sine[:sine_header], tre.EXTENDED) fuzziness = tre.Fuzzyness(maxerr=maxerr) with open_compressed(in_file_unify, "rt") as handle_read, \ open_compressed(out_file_with_sine, "wt") as handle_write_sine, \ open_compressed(out_file_location, "wt") as handle_write_loc: records = gene_records_parse(handle_read) for rec in tqdm(records, miniters=100): match = re.search(str(rec.seq), fuzziness) if match: sine_location = match.groups() gene_record_write(rec, handle_write_sine, 'fasta') handle_write_loc.write( ",".join([str(i) for i in sine_location[0]]) + "\n")
import tre fz = tre.Fuzzyness(maxerr=3) print(fz) pt = tre.compile("Don(ald( Ervin)?)? Knuth", tre.EXTENDED) data = """ In addition to fundamental contributions in several branches of theoretical computer science, Donnald Erwin Kuth is the creator of the TeX computer typesetting system, the related METAFONT font definition language and rendering system, and the Computer Modern family of typefaces. """ m = pt.search(data, fz) if m: print(m.groups()) print(m[0])
def new_SINES_filter(in_file_initial_filtering, out_file_new_SINES, out_file_inherited_SINES, main_dict, key_size=9, maxerr=3): fuzziness = tre.Fuzzyness(maxerr=maxerr) # Create slave processes procs = [] for _ in range(multiprocessing.cpu_count() - 3): # Create a communication queue between this process and slave process q = GeneDQueue() # Create and start slave process p = Process(target=new_SINES_filter_proc, args=(q, main_dict, key_size, fuzziness)) p.start() procs.append({'p': p, 'q': q, 'batch': [], 'write_i': 0}) with open_compressed(in_file_initial_filtering, "rt") as handle_read_initial_filtering,\ open_compressed(out_file_new_SINES, "wt") as handle_write_new,\ open_compressed(out_file_inherited_SINES, "wt") as handle_write_inherited: records = gene_records_parse(handle_read_initial_filtering) rec_i = 0 for rec in tqdm(records): # Simple round-robin between the slave processes proc = procs[rec_i % len(procs)] # Add a new record into a local batch array of slave process proc['batch'].append(rec) if len(proc['batch']) >= 10: new_SINES_filter_write(proc['q'], handle_write_inherited, handle_write_new) # Put batch of new records into slave process queue proc['q'].put(proc['batch']) # Reset local batch of slave process proc['batch'] = [] # Uncomment for testing a small amount of records # if rec_i == 100000: # break rec_i += 1 print_step("cleanup") # Cleanup slave processes for proc in procs: # Get found potential sine from slave process queue, before last batch new_SINES_filter_write(proc['q'], handle_write_inherited, handle_write_new) for proc in procs: # Put last batch, if avaliable if len(proc['batch']): proc['q'].put(proc['batch']) proc['batch'] = [] for proc in procs: # Make slave proccess terminate proc['q'].put(None) for proc in procs: # Get found potential sine from slave process queue, very last time new_SINES_filter_write(proc['q'], handle_write_inherited, handle_write_new, wait_none=True) for proc in procs: # Wait for termination proc['p'].join()
def search_sines(sines, r1_f, override=0, upper_mut_dist=20, step_print=1000000, nlines=100000000, sine_l=70): print('override =', override) sine_set = [] stats = collections.Counter() global bar_codes bar_codes = {} global detailed_stats detailed_stats = collections.Counter() global distances_from_combined_regexp distances_from_combined_regexp = {} complete_regexp = '''|'''.join([sine[:sine_l] for sine in sines]) p = tre.compile(complete_regexp, tre.EXTENDED) if override == 1: bases = ['A', 'C', 'G', 'T'] ind_list = [random.randrange(4) for i in range(sine_l)] r_sine = ''.join([bases[ipnd_list[i]] for i in range(sine_l)]) r_sine_rc = ''.join([bases[3 - ind_list[i]] for i in range(sine_l)]) sine_set = [r_sine, r_sine_rc] complete_regexp = '|'.join(sine_set) p = tre.compile(complete_regexp, tre.EXTENDED) # Also specifies the shift range if override > 1: if override > 2: d = override - 1 #random.randrange(2, override) print('skipping ', d) for (i, cur_seq) in enumerate(r1_f): if i == d: break sine_set = [] for (i, s) in enumerate(r1_f): cur_seq = Seq(s[:sine_l], IUPAC.IUPACAmbiguousDNA()) cur_seq_rc = cur_seq.reverse_complement() sine_set.append(str(cur_seq)) sine_set.append(str(cur_seq_rc)) if i == 2: break complete_regexp = '|'.join(sine_set) p = tre.compile(complete_regexp, tre.EXTENDED) total = 0 cnt = 0 start_time = time() print('''sequences = ''') # bar_code_len = 60 for cur_seq in r1_f: total += 1 m = p.search(cur_seq, tre.Fuzzyness(maxerr=sine_l - 10)) if m: res = m.group(0) d = m.cost # Filter out strings that were cut out. Approximate by max-length matches # 10 is arbitrary, not very small # barcodes are not in place here stats[d] += 1 bar_code_min_len = 23 # if (m.groups()[0][1] < len(cur_seq) - 5) and (d <= upper_mut_dist): if (m.groups()[0][0] >= bar_code_min_len) and (d <= upper_mut_dist): cnt += 1 detailed_stats[res] += 1 bar_code = cur_seq[m.groups()[0][0] - bar_code_min_len:m.groups()[0][0]] bar_codes.setdefault(bar_code, 0) bar_codes[bar_code] += 1 # distances_from_combined_regexp[res] = d if (total % step_print == 0) or (total == nlines): print('''stats for first''', total, '''segments \n''') print('''========================''') print('''time elapsed''', (time() - start_time) / 60.0, '''minutes''') for k in sorted(stats): print('edit distance =', k, 'matches =', stats[k], '''/''', cnt) # pprint.pprint(collections.Counter(detailed_stats.values())) if (total == nlines): return bar_codes
def merged_paired_ends(records1, records2): tot_good = 0 tot_great = 0 tot = 0 # log('in merged_paired_ends',records1,records2) for (rec1, rec2) in zip(records1, records2): tot += 1 str1 = str(rec1.seq) str2 = str(rec2.seq.reverse_complement()) # log('-------------------------------------------\n matching ',str1,'\n',str2,'\n===================================================') end1 = str1[-common_req:] re = tre.compile(end1, tre.EXTENDED) # we expect small errors here res_seq = None match = re.search(str2, tre.Fuzzyness(maxerr=init_err)) if match: tot_good += 1 match_loc = match.groups()[0][0] to_search_len = match_loc + common_req fuzzyness = max(tot_err, ceil(0.1 * to_search_len)) re = tre.compile(str1[-to_search_len:], tre.EXTENDED) match_tot = re.search(str2, tre.Fuzzyness(maxerr=fuzzyness)) # log('step1: matched ',end1,' at',match_loc,' testing prefix ',str2[:to_search_len],'cost ',match.cost) if match_tot: # if (tot_good % 100 == 0): # log('fuzzyness = ', fuzzyness) # log('step2: matched ',str1[-to_search_len:],' at',match_tot.groups()[0][0],' testing prefix ','cost ',match.cost) tot_great += 1 # An arbitrary decision: take the common string from r2 res_str = str1[:-to_search_len] + str2 # TODO: preserve qualities res_seq = SeqRecord(Seq(res_str), id=rec1.id, name=rec1.name, description=rec1.description, letter_annotations={ "phred_quality": [30 for i in range(len(res_str))] }) if (tot_great % step == 0): log('nicely matched ', str1, '\n', str2, to_search_len, match_tot.group(0), match.group(0), match_tot.cost, match.cost) # log('result = ',str(res_seq.seq)) yield res_seq continue res_str = str1 + ('N' * padding) + str2 res_seq = SeqRecord(Seq(res_str), id=rec1.id, name=rec1.name, description=rec1.description, letter_annotations={ "phred_quality": [30 for i in range(len(res_str))] }) if (tot % step == 0): log(tot, tot_good, tot_great) # log('matched ',str1,'\n',str2, len(str1), len(str2)) # log('result = ',str(res_seq.seq)) yield res_seq
def search_sines(sine_f, r1_f, override = 0, upper_mut_dist = 30, step_print = 10000, nlines = 500000, sine_l = 80): print ('override =',override) sine_set = [] stats = collections.Counter() global bar_codes bar_codes = {} global detailed_stats detailed_stats = collections.Counter() global distances_from_combined_regexp distances_from_combined_regexp = {} matcher = difflib.SequenceMatcher() for sine_record in SeqIO.parse(sine_f, "fasta"): cur_seq = Seq(str(sine_record.seq)[:sine_l], IUPAC.IUPACAmbiguousDNA()) cur_seq_rc = cur_seq.reverse_complement() sine_set.append(str(cur_seq)) sine_set.append(str(cur_seq_rc)) print(cur_seq, cur_seq_rc, '''\n ======================''') complete_regexp = '''|'''.join(sine_set) p = tre.compile(complete_regexp, tre.EXTENDED) if override == 1: bases = ['A','C','G','T'] ind_list = [random.randrange(4) for i in range(sine_l)] r_sine = ''.join( [bases[ind_list[i]] for i in range(sine_l)] ) r_sine_rc = ''.join( [bases[3-ind_list[i]] for i in range(sine_l)] ) sine_set = [r_sine, r_sine_rc] complete_regexp = '''|'''.join(sine_set) p = tre.compile(complete_regexp, tre.EXTENDED) # Also specifies the shift range if override > 1: if override > 2: d = override - 1 #random.randrange(2, override) print('skipping ',d) for (i,cur_seq) in enumerate(r1_f): if i == d: break sine_set = [] for (i,s) in enumerate(r1_f): cur_seq = Seq(s[:sine_l], IUPAC.IUPACAmbiguousDNA()) cur_seq_rc = cur_seq.reverse_complement() sine_set.append(str(cur_seq)) sine_set.append(str(cur_seq_rc)) if i == 2: break complete_regexp = '''|'''.join(sine_set) p = tre.compile(complete_regexp, tre.EXTENDED) total = 0 cnt = 0 start_time = time() print('''sequences = ''') bar_code_len = 60 for cur_seq in r1_f: total += 1 m = p.search(cur_seq, tre.Fuzzyness(maxerr = upper_mut_dist)) if m: res = m.group(0) d = m.cost # Filter out strings that were cut out. Approximate by max-length matches # 10 is arbitrary, not very small if (m.groups()[0][1] < len(cur_seq) - 10) and (m.groups()[0][0] > 40): # print(m.groups(), len(cur_seq)) cnt += 1 stats[d] += 1 bar_code = cur_seq[m.groups()[0][0] - 40 : m.groups()[0][0]] if bar_code in bar_codes: bar_codes[bar_code] += 1 else: bar_codes[bar_code] = 1 detailed_stats[res] += 1 distances_from_combined_regexp[res] = d if (total % step_print == 0 or total == nlines): print('''distances for first''', total, '''segments \n''') print('''========================''') print('''time elapsed''', (time() - start_time)/60.0, '''minutes''') for k in sorted(stats): print('edit distance =', k, 'matches =', stats[k], '''/''',cnt) if (total == nlines): break
def search_sines2(sine, r1_f, frac_bound, pref_bound, start_line=0, step_print=1000000, nlines=200000000, thresh=9, pref=60): global stats stats = {} print('step ', step_print, nlines) sine = sine[:pref] matcher = difflib.SequenceMatcher(isjunk=None, a=sine, b='', autojunk=False) total = 0 cnt = 0 start_time = time() print('''condidates for sine = ''') if start_line > 0: for (i, cur_seq) in enumerate(r1_f): if i == start_line - 1: break for cur_seq in r1_f: if (total % step_print == 0 or total == nlines): print('''distances for first''', total, '''segments \n''') print('''========================''') print('''time elapsed''', (time() - start_time) / 60.0, '''minutes''') for k in sorted(stats): n = sum([i for i in stats[k][1].values()]) print('longest common =', k, 'num matches =', n, stats[k][0], '''/''', cnt) if (total >= nlines) and (k >= thresh): for (i, frac) in enumerate(sorted(stats[k][1])): print(k, 'Fraction = ', frac) if i == 20: break if (total == nlines): break total += 1 matcher.set_seq2(cur_seq) res = matcher.find_longest_match(0, len(sine), 0, len(cur_seq)) com = res[2] complete_regexp = sine[:res[0]] + '$' p = tre.compile(complete_regexp, tre.EXTENDED) max_fuzz = res[ 0] # int(frac_bound*res[0]) is better perhaps, but want to trivialize it for now m = p.search( cur_seq[:res[1]], tre.Fuzzyness(maxcost=max_fuzz, delcost=int(1 / 4.0 * max_fuzz) + 1, inscost=int(1 / 4.0 * max_fuzz) + 1)) if m == None: continue start_p = m.groups()[0][0] d = m.cost # This is the fraction of edit distance out of all. # In most cases, this is the right edit distance for the overall prefix if (res[0] + com) == 0: print('How peculier!', 'com =', com, 'res[0] = ', res[0], m.cost) continue frac = Fraction(d, res[0] + com) stats.setdefault(com, [0, collections.Counter()]) stats[com][0] += 1 try: if (start_p >= pref_bound) and Fraction(d, res[0]) <= frac_bound: stats[com][1][frac] += 1 cnt += 1 except (ZeroDivisionError): pass
import tre fz = tre.Fuzzyness(maxcost = 3) print fz pt = tre.compile("(foo)(bar)", tre.EXTENDED) m = pt.match("zoobag", fz) if m: print m.groups() print m[2]
#!/usr/bin/env python import difflib import sys import tre import gzip # define barcode format; build regex objects for approximate string matching linker1 = "CCTAGTCGCGTAGAC" l1reg = tre.compile(linker1) linker1Length = len(linker1) # define Fuzzyness for tre matching fz = tre.Fuzzyness(maxins=0, maxdel=0, maxsub=1) # pull in read for parsing def readread(s): return [ s.readline().rstrip('\n'), s.readline().rstrip('\n'), s.readline().rstrip('\n'), s.readline().rstrip('\n') ] def diff_letters(a, b): return sum(a[i] != b[i] for i in range(len(a))) def parseRead(s, o):
# along with this program. If not, see # <http://www.gnu.org/licenses/>. # # OCR for hand-written digits # import tre from . import geometry as g param_cross_num_lines = 15 param_cell_margin = 2 param_max_errors = 4 # Tre initializations tre_fz = tre.Fuzzyness(maxerr=param_max_errors) regexps = [ ( r'^1{0,2}222+1{0,2}$', # zero r'^1{0,2}222+1{0,2}$', r'^/(XXX/|X._/|_.X/|.X./)+X_X/(X_X/)+(XXX/|X._/|_.X/|.X./)+$', r'^/(XXX/|X._/|_.X/|.X./)+X_X/(X_X/)+(XXX/|X._/|_.X/|.X./)+$'), ( r'^11+(22+1+|211+|111+)11+$', # one r'^1+$|^1{0,2}2+11+$', r'^/(_.X/|_X./)(_.X/|_X./)+(X.X/)*(_X_/|__X/|XX_/)+' + r'(.XX/|XX./){0,2}$', r'(XXX/|XX_/_XX)'), ( r'^1+2{0,4}111+2{0,2}11{0,2}$', # two r'^1{0,3}2*3+2+1{0,3}$',
def filter_potential_sines_and_locations(in_file_unify, in_file_sine, out_file_with_sine, out_file_location, sine_header=67, maxerr=14): sine = gene_lib.get_sine_forward(in_file_sine) #"B1.fasta" re = tre.compile(sine[:sine_header], tre.EXTENDED) fuzziness = tre.Fuzzyness(maxerr=maxerr) # Create slave processes procs = [] for _ in range(multiprocessing.cpu_count() - 3): # Create a communication queue between this process and slave process q = GeneDQueue() # Create and start slave process p = Process(target=filter_potential_sines_and_locations_proc, args=(q, re, fuzziness)) p.start() procs.append({ 'p': p, 'q': q, 'batch': [], 'write_i': 0 }) with open_any(in_file_unify, "rt") as handle_read, \ open_any(out_file_with_sine, "wt") as handle_write_sine,\ open_any(out_file_location, "wt") as handle_write_loc: records = gene_records_parse(handle_read) rec_i = 0 for rec in tqdm(records, miniters=100): # Simple round-robin between the slave processes proc = procs[rec_i % len(procs)] # Add a new record into a local batch array of slave process proc['batch'].append(rec) if len(proc['batch']) >= 20: # Get found potential sine from slave process queue # # Optimization: # Don't check the slave queue every iteration, as the check slows down. # Moreover we won't get a potential sine for every record. if proc['write_i'] > 3: filter_potential_sines_and_locations_write(proc['q'], handle_write_sine, handle_write_loc) proc['write_i'] = 0 else: proc['write_i'] += 1 # Put batch of new records into slave process queue proc['q'].put(proc['batch']) # Reset local batch of slave process proc['batch'] = [] # Uncomment for testing a small amount of records # if rec_i == 100000: # break rec_i += 1 # Cleanup slave processes for proc in procs: # Get found potential sine from slave process queue, before last batch filter_potential_sines_and_locations_write(proc['q'], handle_write_sine, handle_write_loc) # Put last batch, if avaliable if len(proc['batch']): proc['q'].put(proc['batch']) proc['batch'] = [] # Make slave proccess terminate proc['q'].put(None) # Wait for termination proc['p'].join() # Get found potential sine from slave process queue, very last time filter_potential_sines_and_locations_write(proc['q'], handle_write_sine, handle_write_loc)
except ValueError: print "Invalid product length for region", primername max_product_len = 10000 min_product_len = 0 else: max_product_len = 10000 min_product_len = 0 primeroutput[primername] = open(options.prefix + primername + ".fasta", "w") patterns[primername].append(max_product_len) patterns[primername].append(min_product_len) print "Found", len(patterns), "primer pairs" fz = tre.Fuzzyness(maxerr=options.maxcost) for file in seqorder: alltaboutput = open(options.prefix + file[0] + "_" + "pcr_regions.tab", 'w') print >> alltaboutput, 'ID pcr_regions' print "Searching file: " + file[0] totallength = 0 for x, seq in enumerate(file[1]): for primername in patterns.keys(): #print "Searching for", primername