Python Alphabet Beispiele, weblogo.seq.Alphabet Python Beispiele

Beispiel #1

0

Datei anzeigen

    def test_alphabet_chr(self):
        a = generic_alphabet
        for i, c in enumerate(a):
            self.assertEqual(ord(a.chr(i)), i + 32)

        a = Alphabet("alph")
        self.assertEqual("h", a.chr(3))

Beispiel #2

0

Datei anzeigen

    def test_normalize(self):
        a = Alphabet("ABCDE")
        s = 'aBbc'
        n = a.normalize(s)
        self.assertEqual(str(n), 'ABBC')

        self.assertRaises(ValueError, a.normalize, 'aslkfdnnr33')

Beispiel #3

0

Datei anzeigen

    def test_alphabet_ord(self):
        a = generic_alphabet
        for i, c in enumerate(a):
            self.assertEqual(a.ord(c), i)

        a = Alphabet("alph")
        self.assertEqual(2, a.ord("p"))

Beispiel #4

0

Datei anzeigen

    def test_normalize(self):
        a = Alphabet("ABCDE")
        s = "aBbc"
        n = a.normalize(s)
        self.assertEqual(str(n), "ABBC")

        self.assertRaises(ValueError, a.normalize, "aslkfdnnr33")

Beispiel #5

0

Datei anzeigen

    def test_alphabet_ords(self):
        a = Alphabet("alph")
        self.assertEqual(0, a.ords("alphalph")[4])

        a = generic_alphabet
        o = a.ords(a)
        for i, c in enumerate(o):
            self.assertEqual(c, i)

Beispiel #6

0

Datei anzeigen

    def test_create_from_alphabet(self):
        """ If we pass an alphabet to the constuctor, it's passed
        right back """
        a1 = Alphabet("kjdahf")
        a2 = Alphabet(a1)
        self.assertTrue(a1 == a2)

        self.assertFalse(a1 == "not an alphabet")

Beispiel #7

0

Datei anzeigen

    def test_isaligned(self):
        a = Alphabet("ABCD")

        s0 = Seq("ABCDD", a)
        s1 = Seq("AAAAD", a)
        s2 = Seq("AAABD", a)
        s3 = Seq("AAACD", a)
        seqs = SeqList([s0, s1, s2, s3], a)
        assert seqs.isaligned()

        seqs = SeqList([s0, s1, s2, s3], Alphabet("ABCDE"))
        assert not seqs.isaligned()

Beispiel #8

0

Datei anzeigen

    def test_get_subMatrix(self):
        ab = Alphabet('ABCD')
        ar = np.asarray([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12],
                         [13, 14, 15, 16]])
        mat = SubMatrix(ab, ar)

        mat2 = mat.reindex('ABC')
        assert np.all(
            mat2.array == np.asarray([[1, 2, 3], [5, 6, 7], [9, 10, 11]]))

        mat2 = mat.reindex('BA')
        assert np.all(mat2.array == np.asarray([[6, 5], [2, 1]]))

        mat2 = mat.reindex(Alphabet('BA'))
        assert np.all(mat2.array == np.asarray([[6, 5], [2, 1]]))

Beispiel #9

0

Datei anzeigen

    def test_profile(self):
        a = Alphabet("ABCD")

        s0 = Seq("ABCDD", a)
        s1 = Seq("AAAAD", a)
        s2 = Seq("AAABD", a)
        s3 = Seq("AAACD", a)

        seqs = SeqList([s0, s1, s2, s3], a)

        tally = seqs.profile()

        self.assertEqual(list(tally[0]), [4, 0, 0, 0])
        self.assertEqual(list(tally[1]), [3, 1, 0, 0])
        self.assertEqual(list(tally[2]), [3, 0, 1, 0])
        self.assertEqual(list(tally[3]), [1, 1, 1, 1])
        self.assertEqual(list(tally[4]), [0, 0, 0, 4])

        self.assertEqual(tally[4, 'D'], 4)

        seqs = SeqList([Seq("AAACD", a), Seq("AAACDA", a)], a)
        self.assertRaises(ValueError, seqs.profile)

        seqs = SeqList([Seq("AAACD", a), Seq("AAACD", a)])
        self.assertRaises(ValueError, seqs.profile)

Beispiel #10

0

Datei anzeigen

    def test_repr(self):
        ab = Alphabet('ABCD')
        ar = np.asarray([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12],
                         [13, 14, 15, 16]])
        s = SubMatrix(ab, ar)

        repr(s)

Beispiel #11

0

Datei anzeigen

Datei: matrix.py Projekt: cthyge/weblogo

    def __init__(self, alphabets, values=None, dtype=None):
        """
        Args:
        - alphabets -- a list of alphabets (as string or Alphabet objects) to
                    be used to convert strings into indices. The lengths of
                    the alphabets match the shape of the indexed array.
                    Alternatively, an integer or None in the list indicate a
                    non-alphabetic dimension. If None the dimension length is
                    taken from values argument.
        - values -- An array of values to be indexed. If None a new
                 array is created. If this argument is not a numpy array
                 then the alphabet list must be explicit (cannot contain
                 None.)
        - dtype -- An optional numpy type code.
        """

        # A dummy object to be used in place of None in the alphabets list
        # so that we get meaningful error messages if we try to index a
        # nonalphabetic dimension with a string.
        class NullAlphabet(object):
            def ord(self, key):
                raise IndexError("This dimension does not have an alphabet"
                                 )  # pragma: no cover

            def ords(self, key):
                raise IndexError("This dimension does not have an alphabet"
                                 )  # pragma: no cover

        alpha = []
        shape = []
        for a in alphabets:
            if isinstance(a, str):
                a = Alphabet(a)

            if a is None:
                shape.append(None)
                alpha.append(NullAlphabet())
            elif isinstance(a, Alphabet):
                shape.append(len(a))
                alpha.append(a)
            else:
                shape.append(int(a))  # pragma: no cover
                alpha.append(None)  # pragma: no cover

        shape = tuple(shape)
        if values is None:
            values = np.zeros(shape=shape, dtype=dtype)
        else:
            values = np.asarray(values, dtype=dtype)
            vshape = values.shape
            if len(shape) != len(vshape):
                raise ValueError(
                    "The values array is the wrong shape.")  # pragma: no cover
            for s1, s2 in zip(shape, vshape):
                if s1 is not None and s1 != s2:
                    raise ValueError("The values array is the wrong shape."
                                     )  # pragma: no cover
        self.array = values
        self.alphabets = tuple(alpha)

Beispiel #12

0

Datei anzeigen

    def test_create_alphabet(self):
        # Alphabet contains repeated character
        self.assertRaises(ValueError, Alphabet, "alphabet")

        # Alphabet contains null character
        self.assertRaises(ValueError, Alphabet, "alph\x00")

        Alphabet("alphbet")

Beispiel #13

0

Datei anzeigen

    def test_parse_prior_equiprobable(self):
        self.assertTrue(
            all(20. * equiprobable_distribution(20) == parse_prior(
                'equiprobable', unambiguous_protein_alphabet, weight=20.)))

        self.assertTrue(
            all(1.2 * equiprobable_distribution(3) == parse_prior(
                ' equiprobablE  ', Alphabet('123'), 1.2)))

Beispiel #14

0

Datei anzeigen

    def test_parse_prior_equiprobable(self):
        self.assertTrue(
            all(20.0 * equiprobable_distribution(20) == parse_prior(
                "equiprobable", unambiguous_protein_alphabet, weight=20.0)))

        self.assertTrue(
            all(1.2 * equiprobable_distribution(3) == parse_prior(
                " equiprobablE  ", Alphabet("123"), 1.2)))

Beispiel #15

0

Datei anzeigen

Datei: matrix.py Projekt: waylandy/HelperBunny

    def complement(self):
        """Complement nucleic acid sequence."""
        from weblogo.seq import Seq, Alphabet
        alphabet = self.alphabet
        complement_alphabet = Alphabet(Seq(alphabet, alphabet).complement())
        self.alphabets = (None, complement_alphabet)

        m = self.reindex(alphabet)
        self.alphabets = (None, alphabet)
        self.array = m.array

Beispiel #16

0

Datei anzeigen

    def test_fail_get(self):
        ab = Alphabet('ABCD')
        ar = np.asarray([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12],
                         [13, 14, 15, 16]])
        s = SubMatrix(ab, ar)

        self.assertRaises(IndexError, s.__getitem__, ('E', 'A'))
        self.assertRaises(IndexError, s.__getitem__, ('5', '6'))

        # FIXME
        self.assertRaises(IndexError, s.index, ('E', 'A'))

Beispiel #17

0

Datei anzeigen

Datei: test_matrix.py Projekt: cthyge/weblogo

    def test_fail_get(self):
        ab = Alphabet("ABCD")
        ar = np.asarray([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12],
                         [13, 14, 15, 16]])
        s = SubMatrix(ab, ar)

        self.assertRaises(IndexError, s.__getitem__, ("E", "A"))
        self.assertRaises(IndexError, s.__getitem__, ("5", "6"))

        # FIXME
        self.assertRaises(IndexError, s.index, ("E", "A"))

Beispiel #18

0

Datei anzeigen

 def test_get(self):
     ab = Alphabet('ABCD')
     ar = np.asarray([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12],
                      [13, 14, 15, 16]])
     s = SubMatrix(ab, ar)
     s1 = 'DCCBBBAAA'
     s2 = 'BA'
     v = s.index((s1, s2))
     # print v
     for m, i in enumerate(s1):
         for n, j in enumerate(s2):
             assert s[i, j] == v[m, n]

Beispiel #19

0

Datei anzeigen

Datei: matrix.py Projekt: cthyge/weblogo

 def __init__(self,
              alphabet,
              array=None,
              typeof=None,
              name=None,
              description=None,
              scale=None):
     AlphabeticArray.__init__(self, (alphabet, alphabet), array, typeof)
     self.alphabet = Alphabet(alphabet)
     self.name = name
     self.description = description
     self.scale = scale

Beispiel #20

0

Datei anzeigen

    def test_read_alphabets(self):

        # incompatable alphabets
        f = StringIO(test_matrix3)
        self.assertRaises(ValueError, SubMatrix.read, f)

        f = StringIO(test_matrix3)
        SubMatrix.read(f, alphabet=Alphabet('ARNDCQEGHILKMFPSTWYV'))

        f2 = StringIO(test_matrix1)
        self.assertRaises(ValueError, SubMatrix.read, f2,
                          unambiguous_protein_alphabet)

Beispiel #21

0

Datei anzeigen

    def test_which_alphabet(self):
        a = Alphabet.which(Seq("ARNDCQEGHILKMFPSTWYVX"))
        assert a == unambiguous_protein_alphabet

        f1 = data_stream('cap.fa')
        f2 = data_stream('cox2.msf')
        f3 = data_stream('Rv3829c.fasta')
        f4 = data_stream('chain_B.fasta')

        tests = (
            (seq_io.read(f1), unambiguous_dna_alphabet),
            (seq_io.read(f2), unambiguous_protein_alphabet),
            (seq_io.read(f3), unambiguous_protein_alphabet),
            (seq_io.read(f4), unambiguous_protein_alphabet),
        )
        for t in tests:
            self.assertEqual(Alphabet.which(t[0]), t[1])

        f1.close()
        f2.close()
        f3.close()
        f4.close()

Beispiel #22

0

Datei anzeigen

    def test_ords(self):
        s0 = Seq("ACGTURYBDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s1 = Seq("ACGTURYSDHVNACGTURYSWKMBDHVN", nucleic_alphabet)
        s2 = Seq("ACGTURSWKMBDHVNACGTURKMBDHVN", nucleic_alphabet)
        seqs = SeqList([s0, s1, s2], nucleic_alphabet)
        seqs.ords()
        # self.assertEqual( a.shape, (3, 28) )

        # Fails if seqs are of different lengths
        # FIXME?
        # s3 = Seq("ACGTUR", nucleic_alphabet )
        # seqs2 = SeqList( [ s0,s1,s3,s2],  nucleic_alphabet)
        # self.assertRaises(ValueError, seqs2.ords )

        # Use a different alphabet
        seqs.ords(nucleic_alphabet)

        # No alphabet
        seqs3 = SeqList([s0, s1, s2])
        seqs3.ords(alphabet=Alphabet("ABC"))

        # Fail if no alphabet
        self.assertRaises(ValueError, seqs3.ords)

Beispiel #23

0

Datei anzeigen

 def test_alphabet_alphabetic(self):
     a = Alphabet("alphbet")
     self.assertTrue(a.alphabetic("alphbet"))
     self.assertTrue(not a.alphabetic("alphbetX"))

Beispiel #24

0

Datei anzeigen

Datei: matrix.py Projekt: cthyge/weblogo

    def read_transfac(cls, fin, alphabet=None):
        """Parse a TRANSFAC-format PWM from a file.
        Returns a Motif object, representing the provided
        PWM along with an inferred or provided alphabet.
        """

        items = []

        start = False
        for line in fin:
            if line.isspace() or line[0] == "#":
                continue  # pragma: no cover

            stuff = line.split()

            if stuff[0] == "PO" or stuff[0] == "P0":
                start = True

            # 'XX' delimiters may precede the first motif
            if start:
                if stuff[0] in cls._TRANSFAC_DELIM_LINES:
                    break
                else:
                    items.append(stuff)

        if len(items) < 2:
            raise ValueError("Vacuous file.")

        # Is the first line a header line?
        header = items.pop(0)
        hcols = len(header)
        rows = len(items)
        cols = len(items[0])
        if not (header[0] == "PO" or header[0] == "P0" or hcols == cols - 1
                or hcols == cols - 2):
            raise ValueError("Missing header line!")  # pragma: no cover

        # Do all lines (except the first) contain the same number of items?
        cols = len(items[0])
        for i in range(1, len(items)):
            if cols != len(items[i]):
                raise ValueError("Inconsistant length, row: {}".format(
                    i))  # pragma: no cover

        # Vertical or horizontal arrangement?
        if header[0] == "PO" or header[0] == "P0":
            header.pop(0)

        position_header = True

        for h in header:
            if not ischar(h):
                raise ValueError("Expected a single character per header "
                                 'item, but got "{}" as one item'.format(
                                     h))  # pragma: no cover
            if not isint(h):
                position_header = False

        alphabet_header = False if position_header else True

        # Check row headers
        if alphabet_header:
            for i, r in enumerate(items):
                if not isint(r[0]) and r[0][0] != "P":
                    raise ValueError("Expected position "
                                     "as first item on line {}".format(
                                         i))  # pragma: no cover
                r.pop(0)
                defacto_alphabet = "".join(header)
        else:
            a = []  # pragma: no cover
            for i, r in enumerate(items):  # pragma: no cover
                if not ischar(r[0]) and r[0][0] != "P":  # pragma: no cover
                    raise ValueError("Expected position "  # pragma: no cover
                                     "as first item on line {}".format(
                                         i))  # pragma: no cover
                a.append(r.pop(0))  # pragma: no cover
            defacto_alphabet = "".join(a)  # pragma: no cover

        # Check defacto_alphabet
        defacto_alphabet = Alphabet(defacto_alphabet)

        if alphabet:
            alphabet = Alphabet(alphabet)
            if not defacto_alphabet.alphabetic(alphabet):
                # Allow alphabet to be a superset of defacto_alphabet
                alphabet = defacto_alphabet

        else:
            alphabets = (
                unambiguous_rna_alphabet,
                unambiguous_dna_alphabet,
                unambiguous_protein_alphabet,
            )
            for a in alphabets:
                if defacto_alphabet.alphabetic(a):
                    alphabet = a
                    break
            if not alphabet:
                alphabet = defacto_alphabet  # pragma: no cover

        # The last item of each row may be extra cruft. Remove
        if len(items[0]) == len(header) + 1:
            for r in items:
                r.pop()

        # items should now be a list of lists of numbers (as strings)
        rows = len(items)
        cols = len(items[0])
        matrix = np.zeros((rows, cols), dtype=np.float64)
        for r in range(rows):
            for c in range(cols):
                matrix[r, c] = float(items[r][c])

        if position_header:
            matrix.transpose()  # pragma: no cover

        return Motif(defacto_alphabet, matrix).reindex(alphabet)

Beispiel #25

0

Datei anzeigen

Datei: matrix.py Projekt: cthyge/weblogo

        try:
            return object.__getattr__(self, name)
        except AttributeError:
            return getattr(self.array, name)

    def __setattr__(self, name, value):
        try:
            return object.__setattr__(self, name, value)
        except AttributeError:  # pragma: no cover
            return setattr(self.array, name, value)  # pragma: no cover


# End class AlphabeticArray

# TODO: move to seq?
submatrix_alphabet = Alphabet("ARNDCQEGHILKMFPSTWYVBZX")


class SubMatrix(AlphabeticArray):
    """A two dimensional array indexed by an Alphabet. Used to hold substitution
    matrices and similar information.

    Various standard substitution matrices are available from the data package
    >>> from weblogo import data
    >>> mat = SubMatrix.read(data.data_stream('blosum100'))

    Attr:
    - alphabet     -- An Alphabet
    - array        -- A numpy array
    - name         -- The name of this matrix (if any) as a string.
    - description  -- The description, if any.

Beispiel #26

0

Datei anzeigen

 def test_ords(self):
     a = Alphabet("ABC")
     s = Seq("ABCCBA", a)
     self.assertEqual(list(s.ords()), [0, 1, 2, 2, 1, 0])

Beispiel #27

0

Datei anzeigen

 def test_repr(self):
     a = Alphabet("kjdahf")
     repr(a)
     str(a)

Beispiel #28

0

Datei anzeigen

 def test_tally_nonalphabetic(self):
     s = Seq("AGTCAGCTACGACGCGC", dna_alphabet)
     c = s.tally(Alphabet("AC"))
     self.assertEqual(2, len(c))
     self.assertEqual(list(c), [4, 6])

Beispiel #29

0

Datei anzeigen

 def test_none(self):
     a1 = Alphabet(None)
     self.assertEqual(a1, generic_alphabet)

Beispiel #30

0

Datei anzeigen

 def test_alphabet_chrs(self):
     a = Alphabet("alph")
     self.assertEqual(Seq("ppla", a), a.chrs((2, 2, 1, 0)))