Beispiel #1
0
    def test_alphabet_chr(self):
        a = generic_alphabet
        for i, c in enumerate(a):
            self.assertEqual(ord(a.chr(i)), i + 32)

        a = Alphabet("alph")
        self.assertEqual("h", a.chr(3))
Beispiel #2
0
    def test_normalize(self):
        a = Alphabet("ABCDE")
        s = 'aBbc'
        n = a.normalize(s)
        self.assertEqual(str(n), 'ABBC')

        self.assertRaises(ValueError, a.normalize, 'aslkfdnnr33')
Beispiel #3
0
    def test_alphabet_ord(self):
        a = generic_alphabet
        for i, c in enumerate(a):
            self.assertEqual(a.ord(c), i)

        a = Alphabet("alph")
        self.assertEqual(2, a.ord("p"))
Beispiel #4
0
    def test_normalize(self):
        a = Alphabet("ABCDE")
        s = "aBbc"
        n = a.normalize(s)
        self.assertEqual(str(n), "ABBC")

        self.assertRaises(ValueError, a.normalize, "aslkfdnnr33")
Beispiel #5
0
    def test_alphabet_ords(self):
        a = Alphabet("alph")
        self.assertEqual(0, a.ords("alphalph")[4])

        a = generic_alphabet
        o = a.ords(a)
        for i, c in enumerate(o):
            self.assertEqual(c, i)
Beispiel #6
0
    def test_create_from_alphabet(self):
        """ If we pass an alphabet to the constuctor, it's passed
        right back """
        a1 = Alphabet("kjdahf")
        a2 = Alphabet(a1)
        self.assertTrue(a1 == a2)

        self.assertFalse(a1 == "not an alphabet")
Beispiel #7
0
    def test_isaligned(self):
        a = Alphabet("ABCD")

        s0 = Seq("ABCDD", a)
        s1 = Seq("AAAAD", a)
        s2 = Seq("AAABD", a)
        s3 = Seq("AAACD", a)
        seqs = SeqList([s0, s1, s2, s3], a)
        assert seqs.isaligned()

        seqs = SeqList([s0, s1, s2, s3], Alphabet("ABCDE"))
        assert not seqs.isaligned()
Beispiel #8
0
    def test_get_subMatrix(self):
        ab = Alphabet('ABCD')
        ar = np.asarray([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12],
                         [13, 14, 15, 16]])
        mat = SubMatrix(ab, ar)

        mat2 = mat.reindex('ABC')
        assert np.all(
            mat2.array == np.asarray([[1, 2, 3], [5, 6, 7], [9, 10, 11]]))

        mat2 = mat.reindex('BA')
        assert np.all(mat2.array == np.asarray([[6, 5], [2, 1]]))

        mat2 = mat.reindex(Alphabet('BA'))
        assert np.all(mat2.array == np.asarray([[6, 5], [2, 1]]))
Beispiel #9
0
    def test_profile(self):
        a = Alphabet("ABCD")

        s0 = Seq("ABCDD", a)
        s1 = Seq("AAAAD", a)
        s2 = Seq("AAABD", a)
        s3 = Seq("AAACD", a)

        seqs = SeqList([s0, s1, s2, s3], a)

        tally = seqs.profile()

        self.assertEqual(list(tally[0]), [4, 0, 0, 0])
        self.assertEqual(list(tally[1]), [3, 1, 0, 0])
        self.assertEqual(list(tally[2]), [3, 0, 1, 0])
        self.assertEqual(list(tally[3]), [1, 1, 1, 1])
        self.assertEqual(list(tally[4]), [0, 0, 0, 4])

        self.assertEqual(tally[4, 'D'], 4)

        seqs = SeqList([Seq("AAACD", a), Seq("AAACDA", a)], a)
        self.assertRaises(ValueError, seqs.profile)

        seqs = SeqList([Seq("AAACD", a), Seq("AAACD", a)])
        self.assertRaises(ValueError, seqs.profile)
Beispiel #10
0
    def test_repr(self):
        ab = Alphabet('ABCD')
        ar = np.asarray([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12],
                         [13, 14, 15, 16]])
        s = SubMatrix(ab, ar)

        repr(s)
Beispiel #11
0
    def __init__(self, alphabets, values=None, dtype=None):
        """
        Args:
        - alphabets -- a list of alphabets (as string or Alphabet objects) to
                    be used to convert strings into indices. The lengths of
                    the alphabets match the shape of the indexed array.
                    Alternatively, an integer or None in the list indicate a
                    non-alphabetic dimension. If None the dimension length is
                    taken from values argument.
        - values -- An array of values to be indexed. If None a new
                 array is created. If this argument is not a numpy array
                 then the alphabet list must be explicit (cannot contain
                 None.)
        - dtype -- An optional numpy type code.
        """

        # A dummy object to be used in place of None in the alphabets list
        # so that we get meaningful error messages if we try to index a
        # nonalphabetic dimension with a string.
        class NullAlphabet(object):
            def ord(self, key):
                raise IndexError("This dimension does not have an alphabet"
                                 )  # pragma: no cover

            def ords(self, key):
                raise IndexError("This dimension does not have an alphabet"
                                 )  # pragma: no cover

        alpha = []
        shape = []
        for a in alphabets:
            if isinstance(a, str):
                a = Alphabet(a)

            if a is None:
                shape.append(None)
                alpha.append(NullAlphabet())
            elif isinstance(a, Alphabet):
                shape.append(len(a))
                alpha.append(a)
            else:
                shape.append(int(a))  # pragma: no cover
                alpha.append(None)  # pragma: no cover

        shape = tuple(shape)
        if values is None:
            values = np.zeros(shape=shape, dtype=dtype)
        else:
            values = np.asarray(values, dtype=dtype)
            vshape = values.shape
            if len(shape) != len(vshape):
                raise ValueError(
                    "The values array is the wrong shape.")  # pragma: no cover
            for s1, s2 in zip(shape, vshape):
                if s1 is not None and s1 != s2:
                    raise ValueError("The values array is the wrong shape."
                                     )  # pragma: no cover
        self.array = values
        self.alphabets = tuple(alpha)
Beispiel #12
0
    def test_create_alphabet(self):
        # Alphabet contains repeated character
        self.assertRaises(ValueError, Alphabet, "alphabet")

        # Alphabet contains null character
        self.assertRaises(ValueError, Alphabet, "alph\x00")

        Alphabet("alphbet")
Beispiel #13
0
    def test_parse_prior_equiprobable(self):
        self.assertTrue(
            all(20. * equiprobable_distribution(20) == parse_prior(
                'equiprobable', unambiguous_protein_alphabet, weight=20.)))

        self.assertTrue(
            all(1.2 * equiprobable_distribution(3) == parse_prior(
                ' equiprobablE  ', Alphabet('123'), 1.2)))
Beispiel #14
0
    def test_parse_prior_equiprobable(self):
        self.assertTrue(
            all(20.0 * equiprobable_distribution(20) == parse_prior(
                "equiprobable", unambiguous_protein_alphabet, weight=20.0)))

        self.assertTrue(
            all(1.2 * equiprobable_distribution(3) == parse_prior(
                " equiprobablE  ", Alphabet("123"), 1.2)))
Beispiel #15
0
    def complement(self):
        """Complement nucleic acid sequence."""
        from weblogo.seq import Seq, Alphabet
        alphabet = self.alphabet
        complement_alphabet = Alphabet(Seq(alphabet, alphabet).complement())
        self.alphabets = (None, complement_alphabet)

        m = self.reindex(alphabet)
        self.alphabets = (None, alphabet)
        self.array = m.array
Beispiel #16
0
    def test_fail_get(self):
        ab = Alphabet('ABCD')
        ar = np.asarray([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12],
                         [13, 14, 15, 16]])
        s = SubMatrix(ab, ar)

        self.assertRaises(IndexError, s.__getitem__, ('E', 'A'))
        self.assertRaises(IndexError, s.__getitem__, ('5', '6'))

        # FIXME
        self.assertRaises(IndexError, s.index, ('E', 'A'))
Beispiel #17
0
    def test_fail_get(self):
        ab = Alphabet("ABCD")
        ar = np.asarray([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12],
                         [13, 14, 15, 16]])
        s = SubMatrix(ab, ar)

        self.assertRaises(IndexError, s.__getitem__, ("E", "A"))
        self.assertRaises(IndexError, s.__getitem__, ("5", "6"))

        # FIXME
        self.assertRaises(IndexError, s.index, ("E", "A"))
Beispiel #18
0
 def test_get(self):
     ab = Alphabet('ABCD')
     ar = np.asarray([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12],
                      [13, 14, 15, 16]])
     s = SubMatrix(ab, ar)
     s1 = 'DCCBBBAAA'
     s2 = 'BA'
     v = s.index((s1, s2))
     # print v
     for m, i in enumerate(s1):
         for n, j in enumerate(s2):
             assert s[i, j] == v[m, n]
Beispiel #19
0
 def __init__(self,
              alphabet,
              array=None,
              typeof=None,
              name=None,
              description=None,
              scale=None):
     AlphabeticArray.__init__(self, (alphabet, alphabet), array, typeof)
     self.alphabet = Alphabet(alphabet)
     self.name = name
     self.description = description
     self.scale = scale
Beispiel #20
0
    def test_read_alphabets(self):

        # incompatable alphabets
        f = StringIO(test_matrix3)
        self.assertRaises(ValueError, SubMatrix.read, f)

        f = StringIO(test_matrix3)
        SubMatrix.read(f, alphabet=Alphabet('ARNDCQEGHILKMFPSTWYV'))

        f2 = StringIO(test_matrix1)
        self.assertRaises(ValueError, SubMatrix.read, f2,
                          unambiguous_protein_alphabet)
Beispiel #21
0
    def test_which_alphabet(self):
        a = Alphabet.which(Seq("ARNDCQEGHILKMFPSTWYVX"))
        assert a == unambiguous_protein_alphabet

        f1 = data_stream('cap.fa')
        f2 = data_stream('cox2.msf')
        f3 = data_stream('Rv3829c.fasta')
        f4 = data_stream('chain_B.fasta')

        tests = (
            (seq_io.read(f1), unambiguous_dna_alphabet),
            (seq_io.read(f2), unambiguous_protein_alphabet),
            (seq_io.read(f3), unambiguous_protein_alphabet),
            (seq_io.read(f4), unambiguous_protein_alphabet),
        )
        for t in tests:
            self.assertEqual(Alphabet.which(t[0]), t[1])

        f1.close()
        f2.close()
        f3.close()
        f4.close()
Beispiel #22
0
    def test_ords(self):
        s0 = Seq("ACGTURYBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s1 = Seq("ACGTURYSDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s2 = Seq("ACGTURSWKMBDHVNACGTURKMBDHVN", nucleic_alphabet)
        seqs = SeqList([s0, s1, s2], nucleic_alphabet)
        seqs.ords()
        # self.assertEqual( a.shape, (3, 28) )

        # Fails if seqs are of different lengths
        # FIXME?
        # s3 = Seq("ACGTUR", nucleic_alphabet )
        # seqs2 = SeqList( [ s0,s1,s3,s2],  nucleic_alphabet)
        # self.assertRaises(ValueError, seqs2.ords )

        # Use a different alphabet
        seqs.ords(nucleic_alphabet)

        # No alphabet
        seqs3 = SeqList([s0, s1, s2])
        seqs3.ords(alphabet=Alphabet("ABC"))

        # Fail if no alphabet
        self.assertRaises(ValueError, seqs3.ords)
Beispiel #23
0
 def test_alphabet_alphabetic(self):
     a = Alphabet("alphbet")
     self.assertTrue(a.alphabetic("alphbet"))
     self.assertTrue(not a.alphabetic("alphbetX"))
Beispiel #24
0
    def read_transfac(cls, fin, alphabet=None):
        """Parse a TRANSFAC-format PWM from a file.
        Returns a Motif object, representing the provided
        PWM along with an inferred or provided alphabet.
        """

        items = []

        start = False
        for line in fin:
            if line.isspace() or line[0] == "#":
                continue  # pragma: no cover

            stuff = line.split()

            if stuff[0] == "PO" or stuff[0] == "P0":
                start = True

            # 'XX' delimiters may precede the first motif
            if start:
                if stuff[0] in cls._TRANSFAC_DELIM_LINES:
                    break
                else:
                    items.append(stuff)

        if len(items) < 2:
            raise ValueError("Vacuous file.")

        # Is the first line a header line?
        header = items.pop(0)
        hcols = len(header)
        rows = len(items)
        cols = len(items[0])
        if not (header[0] == "PO" or header[0] == "P0" or hcols == cols - 1
                or hcols == cols - 2):
            raise ValueError("Missing header line!")  # pragma: no cover

        # Do all lines (except the first) contain the same number of items?
        cols = len(items[0])
        for i in range(1, len(items)):
            if cols != len(items[i]):
                raise ValueError("Inconsistant length, row: {}".format(
                    i))  # pragma: no cover

        # Vertical or horizontal arrangement?
        if header[0] == "PO" or header[0] == "P0":
            header.pop(0)

        position_header = True

        for h in header:
            if not ischar(h):
                raise ValueError("Expected a single character per header "
                                 'item, but got "{}" as one item'.format(
                                     h))  # pragma: no cover
            if not isint(h):
                position_header = False

        alphabet_header = False if position_header else True

        # Check row headers
        if alphabet_header:
            for i, r in enumerate(items):
                if not isint(r[0]) and r[0][0] != "P":
                    raise ValueError("Expected position "
                                     "as first item on line {}".format(
                                         i))  # pragma: no cover
                r.pop(0)
                defacto_alphabet = "".join(header)
        else:
            a = []  # pragma: no cover
            for i, r in enumerate(items):  # pragma: no cover
                if not ischar(r[0]) and r[0][0] != "P":  # pragma: no cover
                    raise ValueError("Expected position "  # pragma: no cover
                                     "as first item on line {}".format(
                                         i))  # pragma: no cover
                a.append(r.pop(0))  # pragma: no cover
            defacto_alphabet = "".join(a)  # pragma: no cover

        # Check defacto_alphabet
        defacto_alphabet = Alphabet(defacto_alphabet)

        if alphabet:
            alphabet = Alphabet(alphabet)
            if not defacto_alphabet.alphabetic(alphabet):
                # Allow alphabet to be a superset of defacto_alphabet
                alphabet = defacto_alphabet

        else:
            alphabets = (
                unambiguous_rna_alphabet,
                unambiguous_dna_alphabet,
                unambiguous_protein_alphabet,
            )
            for a in alphabets:
                if defacto_alphabet.alphabetic(a):
                    alphabet = a
                    break
            if not alphabet:
                alphabet = defacto_alphabet  # pragma: no cover

        # The last item of each row may be extra cruft. Remove
        if len(items[0]) == len(header) + 1:
            for r in items:
                r.pop()

        # items should now be a list of lists of numbers (as strings)
        rows = len(items)
        cols = len(items[0])
        matrix = np.zeros((rows, cols), dtype=np.float64)
        for r in range(rows):
            for c in range(cols):
                matrix[r, c] = float(items[r][c])

        if position_header:
            matrix.transpose()  # pragma: no cover

        return Motif(defacto_alphabet, matrix).reindex(alphabet)
Beispiel #25
0
        try:
            return object.__getattr__(self, name)
        except AttributeError:
            return getattr(self.array, name)

    def __setattr__(self, name, value):
        try:
            return object.__setattr__(self, name, value)
        except AttributeError:  # pragma: no cover
            return setattr(self.array, name, value)  # pragma: no cover


# End class AlphabeticArray

# TODO: move to seq?
submatrix_alphabet = Alphabet("ARNDCQEGHILKMFPSTWYVBZX")


class SubMatrix(AlphabeticArray):
    """A two dimensional array indexed by an Alphabet. Used to hold substitution
    matrices and similar information.

    Various standard substitution matrices are available from the data package
    >>> from weblogo import data
    >>> mat = SubMatrix.read(data.data_stream('blosum100'))

    Attr:
    - alphabet     -- An Alphabet
    - array        -- A numpy array
    - name         -- The name of this matrix (if any) as a string.
    - description  -- The description, if any.
Beispiel #26
0
 def test_ords(self):
     a = Alphabet("ABC")
     s = Seq("ABCCBA", a)
     self.assertEqual(list(s.ords()), [0, 1, 2, 2, 1, 0])
Beispiel #27
0
 def test_repr(self):
     a = Alphabet("kjdahf")
     repr(a)
     str(a)
Beispiel #28
0
 def test_tally_nonalphabetic(self):
     s = Seq("AGTCAGCTACGACGCGC", dna_alphabet)
     c = s.tally(Alphabet("AC"))
     self.assertEqual(2, len(c))
     self.assertEqual(list(c), [4, 6])
Beispiel #29
0
 def test_none(self):
     a1 = Alphabet(None)
     self.assertEqual(a1, generic_alphabet)
Beispiel #30
0
 def test_alphabet_chrs(self):
     a = Alphabet("alph")
     self.assertEqual(Seq("ppla", a), a.chrs((2, 2, 1, 0)))