Beispiel #1
0
    def fracSimilar(self, other, similar_pairs):
        """Returns fraction of positions where self[i] is similar to other[i].

        similar_pairs must be a dict such that d[(i,j)] exists if i and j are
        to be counted as similar. Use PairsFromGroups in cogent.util.misc to construct
        such a dict from a list of lists of similar residues.

        Truncates at the length of the shorter sequence.

        Note: current implementation re-creates the distance function each
        time, so may be expensive compared to creating the distance function
        using for_seq separately.

        Returns 0 if one sequence is empty.
        """
        if not self or not other:
            return 0.0

        return for_seq(f = lambda x, y: (x,y) in similar_pairs, \
            normalizer=per_shortest)(self, other)
Beispiel #2
0
(either here or in another module), but they're all general enough that
putting them in SequenceI seems like a reasonable compromise.
"""
from __future__ import division
from random import shuffle
from old_cogent.util.transform import keep_chars, for_seq, per_shortest, per_longest
from old_cogent.parse.record import MappedRecord
from old_cogent.util.misc import Delegator, ConstrainedString, ConstrainedList, \
    ConstrainedContainer, ConstraintError, DistanceFromMatrix
from old_cogent.base.info import Info as InfoClass
from old_cogent.base.alphabet import DnaAlphabet, RnaAlphabet, ProteinAlphabet
from string import maketrans
from operator import eq, ne

#standard distance functions: left  because generally useful
frac_same = for_seq(f=eq, aggregator=sum, normalizer=per_shortest)
frac_diff = for_seq(f=ne, aggregator=sum, normalizer=per_shortest)

class SequenceI(Delegator):
    """Sequence object interface.

    SequenceI should be treated as an abstract class (it basically allows for
    implementations of immutable and immutable sequences that inherit from
    different built-in types). Mostly, it delegates sequence methods to that
    sequence's Alphabet, passing in the sequence as data. However, it will not
    raise an exception if you instantiate it directly.
   
    Alphabet is a synonym for Constraint. Cannot set Alphabet in sequence
    init directly (though it can be changed afterwards if necessary): should 
    instead set as class data.
    def test_for_seq(self):
        """for_seq should return the correct function"""
        is_eq = lambda x,y: x == y
        is_ne = lambda x,y: x != y
        lt_5 =  lambda x,y: x + y < 5
        diff =  lambda x,y: x - y

        sumsq = lambda x: sum([i*i for i in x])

        long_norm = lambda s, x, y: (s + 0.0) / max(len(x), len(y))
        times_two = lambda s, x, y: 2*s

        empty = []
        s1 = [1,2,3,4,5]
        s2 = [1,3,2,4,5]
        s3 = [1,1,1,1,1]
        s4 = [5,5,5,5,5]
        s5 = [3,3,3,3,3]
        short = [1]

        #test behavior with default aggregator and normalizer
        f = for_seq(is_eq)
        self.assertFloatEqual(f(s1, s1), 1.0)
        self.assertFloatEqual(f(s1, short), 1.0)
        self.assertFloatEqual(f(short, s1), 1.0)
        self.assertFloatEqual(f(short, s4), 0.0)
        self.assertFloatEqual(f(s4, short), 0.0)
        self.assertFloatEqual(f(s1,s2), 0.6)
        
        f = for_seq(is_ne)
        self.assertFloatEqual(f(s1, s1), 0.0)
        self.assertFloatEqual(f(s1, short), 0.0)
        self.assertFloatEqual(f(short, s1), 0.0)
        self.assertFloatEqual(f(short, s4), 1.0)
        self.assertFloatEqual(f(s4, short), 1.0)
        self.assertFloatEqual(f(s1, s2), 0.4)
         
        f = for_seq(lt_5)
        self.assertFloatEqual(f(s3,s3), 1.0)
        self.assertFloatEqual(f(s3,s4), 0.0)
        self.assertFloatEqual(f(s2,s3), 0.6)

        f = for_seq(diff)
        self.assertFloatEqual(f(s1,s1), 0.0)
        self.assertFloatEqual(f(s4,s1), 2.0)
        self.assertFloatEqual(f(s1,s4), -2.0)

        #test behavior with different aggregator
        f = for_seq(diff)
        self.assertFloatEqual(f(s1,s5), 0)
        f = for_seq(diff, aggregator=sum)
        self.assertFloatEqual(f(s1,s5), 0)
        f = for_seq(diff, aggregator=sumsq)
        self.assertFloatEqual(f(s1,s5), 2.0)

        #test behavior with different normalizer
        f = for_seq(diff, aggregator=sumsq, normalizer=None)
        self.assertFloatEqual(f(s1,s5), 10)
        f = for_seq(diff, aggregator=sumsq)
        self.assertFloatEqual(f(s1,s5), 2.0)
        f = for_seq(diff, aggregator=sumsq, normalizer=times_two)
        self.assertFloatEqual(f(s1,s5), 20)
        f = for_seq(diff, aggregator=sumsq)
        self.assertFloatEqual(f(s5,short), 4)
        f = for_seq(diff, aggregator=sumsq, normalizer=long_norm)
        self.assertFloatEqual(f(s5,short), 0.8)