def findSequencePositions(start_position, end_position, start_sequence, end_sequence, sentinel, headers, seqs): """Find sequence positions. If start_position is a number, it is returned. Otherwise, start_sequence is searched for.""" res_start_position = None res_end_position = None def found_sentinel(x): return sentinel is None or (sentinel in x) if not start_position is None: res_start_position = start_position else: if not start_sequence is None: # Find this sequence for (h, s) in zip(headers, seqs): if found_sentinel(h): ind = geneutil.gappedFind(s, start_sequence, start=True) if ind > 0: res_start_position = ind break if not end_position is None: end_position = end_position else: if not end_sequence is None: # Find this sequence for (h, s) in zip(headers, seqs): if found_sentinel(h): endpos = geneutil.gappedFind(s, end_sequence, start=False) if endpos > 0: res_end_position = endpos break return res_start_position, res_end_position
def findSequencePositions(start_position, end_position, start_sequence, end_sequence, sentinel, headers, seqs): """Find sequence positions. If start_position is a number, it is returned. Otherwise, start_sequence is searched for.""" res_start_position = None res_end_position = None def found_sentinel(x): return sentinel is None or (sentinel in x) if not start_position is None: res_start_position = start_position else: if not start_sequence is None: # Find this sequence for (h,s) in zip(headers,seqs): if found_sentinel(h): ind = geneutil.gappedFind(s, start_sequence, start=True) if ind > 0: res_start_position = ind break if not end_position is None: end_position = end_position else: if not end_sequence is None: # Find this sequence for (h,s) in zip(headers,seqs): if found_sentinel(h): endpos = geneutil.gappedFind(s, end_sequence, start=False) if endpos > 0: res_end_position = endpos break return res_start_position, res_end_position
def test_gapped_find(self): """Gapped-find testcases""" self.assertTrue(geneutil.gappedFind('AAASS--SAA','SSS')==3) self.assertTrue(geneutil.gappedFind('AAASS--SAA','SSSS')==-1) #print geneutil.gappedFind('AAASS--SAA','SSS',start=False) self.assertTrue(geneutil.gappedFind('AAASS--SAA','SSS',start=False)==8) self.assertTrue(geneutil.gappedFind('AAASSxxSAA','SSS',start=False)==-1) self.assertTrue(geneutil.gappedFind('AAASSxxSAA','SSS',start=False,gap='x')==8)
def test_gapped_find(self): """Gapped-find testcases""" self.assertTrue(geneutil.gappedFind('AAASS--SAA', 'SSS') == 3) self.assertTrue(geneutil.gappedFind('AAASS--SAA', 'SSSS') == -1) #print geneutil.gappedFind('AAASS--SAA','SSS',start=False) self.assertTrue( geneutil.gappedFind('AAASS--SAA', 'SSS', start=False) == 8) self.assertTrue( geneutil.gappedFind('AAASSxxSAA', 'SSS', start=False) == -1) self.assertTrue( geneutil.gappedFind('AAASSxxSAA', 'SSS', start=False, gap='x') == 8)
if options.query in h: query_ids.append(xi) if len(query_ids) == 0: info_outs.write("# Could not find sequences '{}'; exiting\n".format(options.query)) sys.exit() if len(query_ids) > 1: info_outs.write("# Found more than one sequence matching '{}'; using the first one: \n#\t{}\n".format(options.query, headers[xi])) # Pick the first one query_id = query_ids[0] gap = '-' # Find subsequence (start_position,end_position) = (options.start_position, options.end_position) if not options.start_sequence is None: assert not options.end_sequence is None start_index = geneutil.gappedFind(seqs[query_id], options.start_sequence, gapless=False, gap=gap) end_index = geneutil.gappedFind(seqs[query_id], options.end_sequence, start=False, gapless=False, gap=gap) #print(start_index, end_index) #print(seqs[query_id][start_index:end_index]) elif not options.start_position is None: assert not options.end_position is None start_index = geneutil.gappedIndex(seqs[query_id], options.start_position, gap=gap) end_index = geneutil.gappedIndex(seqs[query_id], options.end_position, gap=gap) else: # No start/end given; info_outs.write("# No starting position or sequence given; nothing to do. Exiting\n") new_headers = [] new_seqs = [] for (h,seq) in zip(headers,seqs): if not options.exclude: ex_seq = seq[start_index:end_index]
sys.exit() if len(query_ids) > 1: info_outs.write( "# Found more than one sequence matching '{}'; using the first one: \n#\t{}\n" .format(options.query, headers[xi])) # Pick the first one query_id = query_ids[0] gap = '-' # Find subsequence (start_position, end_position) = (options.start_position, options.end_position) if not options.start_sequence is None: assert not options.end_sequence is None start_index = geneutil.gappedFind(seqs[query_id], options.start_sequence, gapless=False, gap=gap) end_index = geneutil.gappedFind(seqs[query_id], options.end_sequence, start=False, gapless=False, gap=gap) print(start_index, end_index) print(seqs[query_id][start_index:end_index]) elif not options.start_position is None: assert not options.end_position is None start_index = geneutil.gappedIndex(seqs[query_id], options.start_position, gap=gap) end_index = geneutil.gappedIndex(seqs[query_id], options.end_position,