Python globalalign_locate Examples, cutadapt.align.globalalign_locate Python Examples

Example #1

0

Show file

File: adapters.py Project: bitbub/cutadapt

	def match(self, read):
		"""
		Try to match this adapter to the given read and return an AdapterMatch instance.

		Return None if the minimum overlap length is not met or the error rate is too high.
		"""
		read_seq = read.sequence.upper()
		pos = -1
		# try to find an exact match first unless wildcards are allowed
		if not self.match_adapter_wildcards:
			if self.where == PREFIX:
				pos = 0 if read_seq.startswith(self.sequence) else -1
			else:
				pos = read_seq.find(self.sequence)
		if pos >= 0:
			match = AdapterMatch(
				0, len(self.sequence), pos, pos + len(self.sequence),
				len(self.sequence), 0, self._front_flag, self, read)
		else:
			# try approximate matching
			alignment = align.globalalign_locate(self.sequence, read_seq,
				self.max_error_rate, self.where, self.wildcard_flags)
			# TODO line-based profiling tells me that the following line
			# is slow (takes 30% of match()'s running time)
			match = AdapterMatch(*(alignment + (self._front_flag, self, read)))

		# TODO globalalign_locate should be modified to allow the following
		# assertion.
		# assert length == 0 or match.errors / length <= self.max_error_rate
		if match.length < self.min_overlap or match.errors / match.length > self.max_error_rate:
			return None
		return match

Example #2

0

Show file

    def match(self, read):
        """Return AdapterMatch instance"""
        if self.where != PREFIX:
            return super(ColorspaceAdapter, self).match(read)
        # create artificial adapter that includes a first color that encodes the
        # transition from primer base into adapter
        asequence = colorspace.ENCODE[
            read.primer + self.nucleotide_sequence[0]] + self.sequence
        pos = 0 if read.sequence.startswith(asequence) else -1
        if pos >= 0:
            match = AdapterMatch(0, len(asequence), pos, pos + len(asequence),
                                 len(asequence), 0, self._front_flag, self,
                                 read)
        else:
            # try approximate matching
            alignment = align.globalalign_locate(asequence, read.sequence,
                                                 self.max_error_rate,
                                                 self.where,
                                                 self.wildcard_flags)
            match = AdapterMatch(*(alignment + (self._front_flag, self, read)))

        # TODO globalalign_locate should be modified to allow the following
        # assertion.
        # assert length == 0 or match.errors / length <= self.max_error_rate
        if match.length < self.min_overlap or match.errors / match.length > self.max_error_rate:
            return None
        return match

Example #3

0

Show file

    def match(self, read):
        """
		Try to match this adapter to the given read and return an AdapterMatch instance.

		Return None if the minimum overlap length is not met or the error rate is too high.
		"""
        read_seq = read.sequence.upper()
        pos = -1
        # try to find an exact match first unless wildcards are allowed
        if not self.match_adapter_wildcards:
            if self.where == PREFIX:
                pos = 0 if read_seq.startswith(self.sequence) else -1
            else:
                pos = read_seq.find(self.sequence)
        if pos >= 0:
            match = AdapterMatch(0, len(self.sequence),
                                 pos, pos + len(self.sequence),
                                 len(self.sequence), 0, self._front_flag, self,
                                 read)
        else:
            # try approximate matching
            alignment = align.globalalign_locate(self.sequence, read_seq,
                                                 self.max_error_rate,
                                                 self.where,
                                                 self.wildcard_flags)
            # TODO line-based profiling tells me that the following line
            # is slow (takes 30% of match()'s running time)
            match = AdapterMatch(*(alignment + (self._front_flag, self, read)))

        # TODO globalalign_locate should be modified to allow the following
        # assertion.
        # assert length == 0 or match.errors / length <= self.max_error_rate
        if match.length < self.min_overlap or match.errors / match.length > self.max_error_rate:
            return None
        return match

Example #4

0

Show file

File: deparse_bedtools_output.py Project: dkoppstein/prime-and-realign

def seqs_align(seq1, seq2, error_rate=0.1):
    '''Do sequences 1 and 2 align given the following criteria:

    1. Error rate = 0.1 per 10 nucleotides (int(floor(0.1 * len(seq))))
    2. The alignments perfectly match up
    '''
    len_seq1 = len(seq1)
    if len_seq1 != len(seq2):
        return False
    # use C bindings for superfast alignment!
    aln = globalalign_locate(seq1, seq2, error_rate) 
    # if start1 = start2 and stop1 = stop2 and
    return aln[0] == aln[2] and aln[1] == aln[3]

Example #5

0

Show file

File: deparse_bedtools_output.py Project: duanjunhyq/influenza-analysis

def seqs_align(seq1, seq2, error_rate=0.1):
    '''Do sequences 1 and 2 align given the following criteria:

    1. Error rate = 0.1 per 10 nucleotides (int(floor(0.1 * len(seq))))
    2. The alignments perfectly match up
    '''
    len_seq1 = len(seq1)
    if len_seq1 != len(seq2):
        return False
    # use C bindings for superfast alignment!
    aln = globalalign_locate(seq1, seq2, error_rate)
    # if start1 = start2 and stop1 = stop2 and
    return aln[0] == aln[2] and aln[1] == aln[3]

Example #6

0

Show file

File: adapters.py Project: bitbub/cutadapt

	def match(self, read):
		"""Return AdapterMatch instance"""
		if self.where != PREFIX:
			return super(ColorspaceAdapter, self).match(read)
		# create artificial adapter that includes a first color that encodes the
		# transition from primer base into adapter
		asequence = colorspace.ENCODE[read.primer + self.nucleotide_sequence[0]] + self.sequence
		pos = 0 if read.sequence.startswith(asequence) else -1
		if pos >= 0:
			match = AdapterMatch(
				0, len(asequence), pos, pos + len(asequence),
				len(asequence), 0, self._front_flag, self, read)
		else:
			# try approximate matching
			alignment = align.globalalign_locate(asequence, read.sequence,
				self.max_error_rate, self.where, self.wildcard_flags)
			match = AdapterMatch(*(alignment + (self._front_flag, self, read)))

		# TODO globalalign_locate should be modified to allow the following
		# assertion.
		# assert length == 0 or match.errors / length <= self.max_error_rate
		if match.length < self.min_overlap or match.errors / match.length > self.max_error_rate:
			return None
		return match

Example #7

0

Show file

File: testalign.py Project: qyang13/gnumap

def test_polya():
    s = 'AAAAAAAAAAAAAAAAA'
    t = 'ACAGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
    result = globalalign_locate(s, t, 0.0, BACK)
    #start_s, stop_s, start_t, stop_t, matches, cost = result
    assert result == (0, len(s), 4, 4 + len(s), len(s), 0)

Example #8

0

Show file

File: testalign.py Project: bitbub/cutadapt

def test_polya():
	s = 'AAAAAAAAAAAAAAAAA'
	t = 'ACAGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
	result = globalalign_locate(s, t, 0.0, BACK)
	#start_s, stop_s, start_t, stop_t, matches, cost = result
	assert result == (0, len(s), 4, 4 + len(s), len(s), 0)