Beispiel #1
0
	def match(self, read):
		"""
		Try to match this adapter to the given read and return an AdapterMatch instance.

		Return None if the minimum overlap length is not met or the error rate is too high.
		"""
		read_seq = read.sequence.upper()
		pos = -1
		# try to find an exact match first unless wildcards are allowed
		if not self.match_adapter_wildcards:
			if self.where == PREFIX:
				pos = 0 if read_seq.startswith(self.sequence) else -1
			else:
				pos = read_seq.find(self.sequence)
		if pos >= 0:
			match = AdapterMatch(
				0, len(self.sequence), pos, pos + len(self.sequence),
				len(self.sequence), 0, self._front_flag, self, read)
		else:
			# try approximate matching
			alignment = align.globalalign_locate(self.sequence, read_seq,
				self.max_error_rate, self.where, self.wildcard_flags)
			# TODO line-based profiling tells me that the following line
			# is slow (takes 30% of match()'s running time)
			match = AdapterMatch(*(alignment + (self._front_flag, self, read)))

		# TODO globalalign_locate should be modified to allow the following
		# assertion.
		# assert length == 0 or match.errors / length <= self.max_error_rate
		if match.length < self.min_overlap or match.errors / match.length > self.max_error_rate:
			return None
		return match
Beispiel #2
0
    def match(self, read):
        """Return AdapterMatch instance"""
        if self.where != PREFIX:
            return super(ColorspaceAdapter, self).match(read)
        # create artificial adapter that includes a first color that encodes the
        # transition from primer base into adapter
        asequence = colorspace.ENCODE[
            read.primer + self.nucleotide_sequence[0]] + self.sequence
        pos = 0 if read.sequence.startswith(asequence) else -1
        if pos >= 0:
            match = AdapterMatch(0, len(asequence), pos, pos + len(asequence),
                                 len(asequence), 0, self._front_flag, self,
                                 read)
        else:
            # try approximate matching
            alignment = align.globalalign_locate(asequence, read.sequence,
                                                 self.max_error_rate,
                                                 self.where,
                                                 self.wildcard_flags)
            match = AdapterMatch(*(alignment + (self._front_flag, self, read)))

        # TODO globalalign_locate should be modified to allow the following
        # assertion.
        # assert length == 0 or match.errors / length <= self.max_error_rate
        if match.length < self.min_overlap or match.errors / match.length > self.max_error_rate:
            return None
        return match
Beispiel #3
0
    def match(self, read):
        """
		Try to match this adapter to the given read and return an AdapterMatch instance.

		Return None if the minimum overlap length is not met or the error rate is too high.
		"""
        read_seq = read.sequence.upper()
        pos = -1
        # try to find an exact match first unless wildcards are allowed
        if not self.match_adapter_wildcards:
            if self.where == PREFIX:
                pos = 0 if read_seq.startswith(self.sequence) else -1
            else:
                pos = read_seq.find(self.sequence)
        if pos >= 0:
            match = AdapterMatch(0, len(self.sequence),
                                 pos, pos + len(self.sequence),
                                 len(self.sequence), 0, self._front_flag, self,
                                 read)
        else:
            # try approximate matching
            alignment = align.globalalign_locate(self.sequence, read_seq,
                                                 self.max_error_rate,
                                                 self.where,
                                                 self.wildcard_flags)
            # TODO line-based profiling tells me that the following line
            # is slow (takes 30% of match()'s running time)
            match = AdapterMatch(*(alignment + (self._front_flag, self, read)))

        # TODO globalalign_locate should be modified to allow the following
        # assertion.
        # assert length == 0 or match.errors / length <= self.max_error_rate
        if match.length < self.min_overlap or match.errors / match.length > self.max_error_rate:
            return None
        return match
def seqs_align(seq1, seq2, error_rate=0.1):
    '''Do sequences 1 and 2 align given the following criteria:

    1. Error rate = 0.1 per 10 nucleotides (int(floor(0.1 * len(seq))))
    2. The alignments perfectly match up
    '''
    len_seq1 = len(seq1)
    if len_seq1 != len(seq2):
        return False
    # use C bindings for superfast alignment!
    aln = globalalign_locate(seq1, seq2, error_rate) 
    # if start1 = start2 and stop1 = stop2 and
    return aln[0] == aln[2] and aln[1] == aln[3]
def seqs_align(seq1, seq2, error_rate=0.1):
    '''Do sequences 1 and 2 align given the following criteria:

    1. Error rate = 0.1 per 10 nucleotides (int(floor(0.1 * len(seq))))
    2. The alignments perfectly match up
    '''
    len_seq1 = len(seq1)
    if len_seq1 != len(seq2):
        return False
    # use C bindings for superfast alignment!
    aln = globalalign_locate(seq1, seq2, error_rate)
    # if start1 = start2 and stop1 = stop2 and
    return aln[0] == aln[2] and aln[1] == aln[3]
Beispiel #6
0
	def match(self, read):
		"""Return AdapterMatch instance"""
		if self.where != PREFIX:
			return super(ColorspaceAdapter, self).match(read)
		# create artificial adapter that includes a first color that encodes the
		# transition from primer base into adapter
		asequence = colorspace.ENCODE[read.primer + self.nucleotide_sequence[0]] + self.sequence
		pos = 0 if read.sequence.startswith(asequence) else -1
		if pos >= 0:
			match = AdapterMatch(
				0, len(asequence), pos, pos + len(asequence),
				len(asequence), 0, self._front_flag, self, read)
		else:
			# try approximate matching
			alignment = align.globalalign_locate(asequence, read.sequence,
				self.max_error_rate, self.where, self.wildcard_flags)
			match = AdapterMatch(*(alignment + (self._front_flag, self, read)))

		# TODO globalalign_locate should be modified to allow the following
		# assertion.
		# assert length == 0 or match.errors / length <= self.max_error_rate
		if match.length < self.min_overlap or match.errors / match.length > self.max_error_rate:
			return None
		return match
Beispiel #7
0
def test_polya():
    s = 'AAAAAAAAAAAAAAAAA'
    t = 'ACAGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
    result = globalalign_locate(s, t, 0.0, BACK)
    #start_s, stop_s, start_t, stop_t, matches, cost = result
    assert result == (0, len(s), 4, 4 + len(s), len(s), 0)
Beispiel #8
0
def test_polya():
	s = 'AAAAAAAAAAAAAAAAA'
	t = 'ACAGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
	result = globalalign_locate(s, t, 0.0, BACK)
	#start_s, stop_s, start_t, stop_t, matches, cost = result
	assert result == (0, len(s), 4, 4 + len(s), len(s), 0)