Esempio n. 1
0
 def test_type(self):
     """VectorFromMatches should return correct type of vector"""
     v = VectorFromMatches('a', 'a')
     self.assertEqual(v, Bitvector('1'))
     self.assertTrue(isinstance(v, ImmutableBitvector))
     v = VectorFromMatches('a', 'a', constructor=MutableBitvector)
     self.assertEqual(v, Bitvector('1'))
     v[0] = 0
     self.assertEqual(v, Bitvector('0'))
     self.assertTrue(isinstance(v, MutableBitvector))
Esempio n. 2
0
    def testSingleBasePattern(self):
        """VectorFromMatches should match every matching char in string"""
        sequences = ['', 'a', 'b', 'aaa', 'bbb', 'aba', 'bab']
        a_matches = ['', '1', '0', '111', '000', '101', '010']
        b_matches = ['', '0', '1', '000', '111', '010', '101']

        for i, s in enumerate(sequences):
            vec = VectorFromMatches(s, 'a')
            self.assertEqual(str(vec), a_matches[i])
            vec = VectorFromMatches(s, 'b')
            self.assertEqual(str(vec), b_matches[i])
Esempio n. 3
0
 def testMultiBasePattern(self):
     """VectorFromMatches should match multi-char string matches"""
     pattern = 'aba'
     sequences = [
         '', 'a', 'aba', 'abab', 'ababa', 'ababab', 'abaaba', 'aaba'
     ]
     overlap = ['', '0', '111', '1110', '11111', '111110', '111111', '0111']
     discrete = [
         '', '0', '111', '1110', '11100', '111000', '111111', '0111'
     ]
     for i, s in enumerate(sequences):
         vec = VectorFromMatches(s, pattern)
         self.assertEqual(str(vec), overlap[i])
         vec = VectorFromMatches(s, pattern, 1)
         self.assertEqual(str(vec), overlap[i])
         vec = VectorFromMatches(s, pattern, 0)
         self.assertEqual(str(vec), discrete[i])
Esempio n. 4
0
    def testSingleBaseRegex(self):
        """VectorFromMatches should match every matching character in regex"""
        sequences = ['', 'a', 'b', 'aaa', 'bbb', 'aba', 'bab', 'axb', 'xxx']
        a_matches = ['', '1', '0', '111', '000', '101', '010', '100', '000']
        b_matches = ['', '0', '1', '000', '111', '010', '101', '001', '000']
        ab_matches = ['', '1', '1', '111', '111', '111', '111', '101', '000']

        a = re.compile('a')
        b = re.compile('b')
        ab = re.compile('a|b')

        for i, s in enumerate(sequences):
            #test that a works as regex or list
            vec = VectorFromMatches(s, a)
            self.assertEqual(str(vec), a_matches[i])
            vec = VectorFromMatches(s, ['a'])
            self.assertEqual(str(vec), a_matches[i])

            #test that b works as regex or list
            vec = VectorFromMatches(s, b)
            self.assertEqual(str(vec), b_matches[i])
            vec = VectorFromMatches(s, ['b'])
            self.assertEqual(str(vec), b_matches[i])

            #test that [a or b] works as regex or list
            vec = VectorFromMatches(s, ab)
            self.assertEqual(str(vec), ab_matches[i])
            vec = VectorFromMatches(s, ['a', 'b'])
            self.assertEqual(str(vec), ab_matches[i])
Esempio n. 5
0
    def testMultiBaseRegex(self):
        """VectorFromMatches should match every matching combination of chars"""
        sequences = ['aaabbb', 'aaaxbbb', 'ababab', 'abaabaabab']
        patterns = ['aaa', 'bbb', 'aba', 'aaa|bbb', 'aaa|aab']

        overlap = {
            'aaa': [
                '111000',  #aaabbb
                '1110000',  #aaaxbbb
                '000000',  #ababab
                '0000000000',  #abaabaabab
            ],
            'bbb': [
                '000111',  #aaabbb
                '0000111',  #aaaxbbb
                '000000',  #ababab
                '0000000000',  #abaabaabab
            ],
            'aba': [
                '000000',  #aaabbb
                '0000000',  #aaaxbbb
                '111110',  #ababab
                '1111111110',  #abaabaabab
            ],
            'aaa|bbb': [
                '111111',  #aaabbb
                '1110111',  #aaaxbbb
                '000000',  #ababab
                '0000000000',  #abaabaabab
            ],
            'aaa|aab': [
                '111100',  #aaabbb
                '1110000',  #aaaxbbb
                '000000',  #ababab
                '0011111100',  #abaabaabab
            ]
        }

        no_overlap = {
            'aaa': [
                '111000',  #aaabbb
                '1110000',  #aaaxbbb
                '000000',  #ababab
                '0000000000',  #abaabaabab
            ],
            'bbb': [
                '000111',  #aaabbb
                '0000111',  #aaaxbbb
                '000000',  #ababab
                '0000000000',  #abaabaabab
            ],
            'aba': [
                '000000',  #aaabbb
                '0000000',  #aaaxbbb
                '111000',  #ababab
                '1111111110',  #abaabaabab
            ],
            'aaa|bbb': [
                '111111',  #aaabbb
                '1110111',  #aaaxbbb
                '000000',  #ababab
                '0000000000',  #abaabaabab
            ],
            'aaa|aab': [
                '111000',  #aaabbb
                '1110000',  #aaaxbbb
                '000000',  #ababab
                '0011111100',  #abaabaabab
            ],
        }
        for i, s in enumerate(sequences):
            for pat in patterns:
                regex = re.compile(pat)
                vec = VectorFromMatches(s, regex, 1)  #overlapping
                self.assertEqual(str(vec), overlap[pat][i])
                vec = VectorFromMatches(s, regex, 0)  #non-overlapping
                self.assertEqual(str(vec), no_overlap[pat][i])
Esempio n. 6
0
 def testEmptyPattern(self):
     """VectorFromMatches empty pattern should return zeroes for len(string)"""
     sequences = ['', 'a', 'aa', 'aaaaaaaaaa']
     for s in sequences:
         vec = VectorFromMatches(s, '')
         self.assertEqual(str(vec), '0' * len(s))
Esempio n. 7
0
 def testBothEmpty(self):
     """VectorFromMatches empty string/pattern should return empty vector"""
     self.assertEqual(str(VectorFromMatches('', '')), '')