Beispiel #1
0
def findSequencePositions(start_position, end_position, start_sequence,
                          end_sequence, sentinel, headers, seqs):
    """Find sequence positions. If start_position is a number, it is returned. Otherwise, start_sequence is searched for."""
    res_start_position = None
    res_end_position = None

    def found_sentinel(x):
        return sentinel is None or (sentinel in x)

    if not start_position is None:
        res_start_position = start_position
    else:
        if not start_sequence is None:
            # Find this sequence
            for (h, s) in zip(headers, seqs):
                if found_sentinel(h):
                    ind = geneutil.gappedFind(s, start_sequence, start=True)
                    if ind > 0:
                        res_start_position = ind
                        break
    if not end_position is None:
        end_position = end_position
    else:
        if not end_sequence is None:
            # Find this sequence
            for (h, s) in zip(headers, seqs):
                if found_sentinel(h):
                    endpos = geneutil.gappedFind(s, end_sequence, start=False)
                    if endpos > 0:
                        res_end_position = endpos
                        break

    return res_start_position, res_end_position
Beispiel #2
0
def findSequencePositions(start_position, end_position, start_sequence, end_sequence, sentinel, headers, seqs):
	"""Find sequence positions. If start_position is a number, it is returned. Otherwise, start_sequence is searched for."""
	res_start_position = None
	res_end_position = None

	def found_sentinel(x):
		return sentinel is None or (sentinel in x)

	if not start_position is None:
		res_start_position = start_position
	else:
		if not start_sequence is None:
			# Find this sequence
			for (h,s) in zip(headers,seqs):
				if found_sentinel(h):
					ind = geneutil.gappedFind(s, start_sequence, start=True)
					if ind > 0:
						res_start_position = ind
						break
	if not end_position is None:
		end_position = end_position
	else:
		if not end_sequence is None:
			# Find this sequence
			for (h,s) in zip(headers,seqs):
				if found_sentinel(h):
					endpos = geneutil.gappedFind(s, end_sequence, start=False)
					if endpos > 0:
						res_end_position = endpos
						break

	return res_start_position, res_end_position
Beispiel #3
0
	def test_gapped_find(self):
		"""Gapped-find testcases"""
		self.assertTrue(geneutil.gappedFind('AAASS--SAA','SSS')==3)
		self.assertTrue(geneutil.gappedFind('AAASS--SAA','SSSS')==-1)
		#print geneutil.gappedFind('AAASS--SAA','SSS',start=False)
		self.assertTrue(geneutil.gappedFind('AAASS--SAA','SSS',start=False)==8)
		self.assertTrue(geneutil.gappedFind('AAASSxxSAA','SSS',start=False)==-1)
		self.assertTrue(geneutil.gappedFind('AAASSxxSAA','SSS',start=False,gap='x')==8)
Beispiel #4
0
 def test_gapped_find(self):
     """Gapped-find testcases"""
     self.assertTrue(geneutil.gappedFind('AAASS--SAA', 'SSS') == 3)
     self.assertTrue(geneutil.gappedFind('AAASS--SAA', 'SSSS') == -1)
     #print geneutil.gappedFind('AAASS--SAA','SSS',start=False)
     self.assertTrue(
         geneutil.gappedFind('AAASS--SAA', 'SSS', start=False) == 8)
     self.assertTrue(
         geneutil.gappedFind('AAASSxxSAA', 'SSS', start=False) == -1)
     self.assertTrue(
         geneutil.gappedFind('AAASSxxSAA', 'SSS', start=False, gap='x') ==
         8)
Beispiel #5
0
		if options.query in h:
				query_ids.append(xi)
	if len(query_ids) == 0:
		info_outs.write("# Could not find sequences '{}'; exiting\n".format(options.query))
		sys.exit()
	if len(query_ids) > 1:
		info_outs.write("# Found more than one sequence matching '{}'; using the first one: \n#\t{}\n".format(options.query, headers[xi]))
	# Pick the first one
	query_id = query_ids[0]
	
	gap = '-'
	# Find subsequence
	(start_position,end_position) = (options.start_position, options.end_position)
	if not options.start_sequence is None:
		assert not options.end_sequence is None
		start_index = geneutil.gappedFind(seqs[query_id], options.start_sequence, gapless=False, gap=gap)
		end_index = geneutil.gappedFind(seqs[query_id], options.end_sequence, start=False, gapless=False, gap=gap)
		#print(start_index, end_index)
		#print(seqs[query_id][start_index:end_index])
	elif not options.start_position is None:
		assert not options.end_position is None
		start_index = geneutil.gappedIndex(seqs[query_id], options.start_position, gap=gap)
		end_index = geneutil.gappedIndex(seqs[query_id], options.end_position, gap=gap)
	else: # No start/end given; 
		info_outs.write("# No starting position or sequence given; nothing to do. Exiting\n")

	new_headers = []
	new_seqs = []
	for (h,seq) in zip(headers,seqs):
		if not options.exclude:
			ex_seq = seq[start_index:end_index]
Beispiel #6
0
        sys.exit()
    if len(query_ids) > 1:
        info_outs.write(
            "# Found more than one sequence matching '{}'; using the first one: \n#\t{}\n"
            .format(options.query, headers[xi]))
    # Pick the first one
    query_id = query_ids[0]

    gap = '-'
    # Find subsequence
    (start_position, end_position) = (options.start_position,
                                      options.end_position)
    if not options.start_sequence is None:
        assert not options.end_sequence is None
        start_index = geneutil.gappedFind(seqs[query_id],
                                          options.start_sequence,
                                          gapless=False,
                                          gap=gap)
        end_index = geneutil.gappedFind(seqs[query_id],
                                        options.end_sequence,
                                        start=False,
                                        gapless=False,
                                        gap=gap)
        print(start_index, end_index)
        print(seqs[query_id][start_index:end_index])
    elif not options.start_position is None:
        assert not options.end_position is None
        start_index = geneutil.gappedIndex(seqs[query_id],
                                           options.start_position,
                                           gap=gap)
        end_index = geneutil.gappedIndex(seqs[query_id],
                                         options.end_position,