Esempio n. 1
0
    def test_dump_load_and_get_position(self):
        dump = BinaryOutput()
        self.fm.dump(dump)
        fm = FMIndex()
        fm.load(BinaryInput(dump.result()))

        for i, expect in enumerate(self.pd):
            self.assertEqual(expect, fm.get_position(i))
Esempio n. 2
0
    def test_dump_load_and_get_substring(self):
        dump = BinaryOutput()
        self.fm.dump(dump)
        fm = FMIndex()
        fm.load(BinaryInput(dump.result()))

        for i, expect in enumerate(self.sd):
            actual = fm.get_substring(i, fm.size())
            self.assertEqual(expect, actual)
Esempio n. 3
0
 def test_get_substring2(self):
     self.fm = FMIndex()
     self.fm.append("abracadabra")
     self.fm.append("mississippi")
     self.fm.append("abracadabra mississippi")
     self.fm.build(3, 256)
     self.assertEqual('abracadabra', self.fm.get_substring(0, 11))
     self.assertEqual('mississippi', self.fm.get_substring(11, 11))
     self.assertEqual('abracadabra mississippi',
                      self.fm.get_substring(22, 23))
Esempio n. 4
0
    def test_dump_load_and_get_substring_boundary(self):
        dump = BinaryOutput()
        self.fm.dump(dump)
        fm = FMIndex()
        fm.load(BinaryInput(dump.result()))

        try:
            fm.get_substring(fm.size(), 0)
        except:
            pass
        else:
            self.fail("fm.get_substring()")
Esempio n. 5
0
    def test_dump_load_and_get_rows(self):
        dump = BinaryOutput()
        self.fm.dump(dump)
        fm = FMIndex()
        fm.load(BinaryInput(dump.result()))

        for i in range(fm.size()):
            for j in range(i + 1, fm.size()):
                s = self.str[i:j]
                self.fm.get_rows(s)
                fm.get_rows(s)
                self.assertEqual(self.rd[s], fm.get_rows(s))
Esempio n. 6
0
 def test_get_substring_before_build(self):
     self.fm = FMIndex()
     self.fm.append("abracadabra")
     self.fm.append('\x01')
     self.fm.append("mississippi")
     self.fm.append('\x01')
     self.fm.append("abracadabra mississippi")
     self.fm.append('\x01')
     self.assertEqual('abracadabra', self.fm.get_substring(0, 11))
     self.assertEqual('mississippi', self.fm.get_substring(12, 11))
     self.assertEqual('abracadabra mississippi',
                      self.fm.get_substring(24, 23))
Esempio n. 7
0
    def setUp(self):
        self.str = ""
        self.sd = []
        self.rd = {}
        self.pd = []
        self.didd = []
        self.docd = []
        self.fm = FMIndex()

        self.docd.append("abracadabra")
        self.docd.append("mississippi")
        self.docd.append("abracadabra mississippi")

        for did, doc in enumerate(self.docd):
            self.str += doc
            for j in range(len(doc)):
                self.didd.append(did)
            self.fm.append(doc)

        self.didd.append(len(self.docd))
        #import cProfile
        #cProfile.runctx('self.fm.build(3)', globals(), locals())
        self.fm.build(25, 256)
        self.str += chr(0)  # end_marker
        for i in range(len(self.str)):
            for j in range(1, len(self.str) - i + 1):
                s = self.str[i:i + j]
                self.rd[s] = self.rd.get(s, 0) + 1
        v = []
        for i in range(len(self.str)):
            s = self.str[i:] + self.str[0:i]
            v.append((s, i))
        v.sort()
        for rotatedstr, index in v:
            self.pd.append(index)
        for i in range(len(self.str)):
            self.sd.append(self.str[i:].replace(chr(0), ''))
Esempio n. 8
0
    def test_get_substring_with_compressed_word(self):
        codes = [
            '\x00', '\x01', '\x03', 'a', 'b', 'r', 'c', 'd', 'm', 'i', 's',
            'p', ' '
        ]

        def encode(string):
            return [codes.index(c) for c in string]

        def decode(rawcodes):
            return "".join((codes[rawcode] for rawcode in rawcodes))

        self.fm = FMIndex(rawmode=True)
        self.fm.append(encode("abracadabra"))
        self.fm.append([1])
        self.fm.append(encode("mississippi"))
        self.fm.append([1])
        self.fm.append(encode("abracadabra mississippi"))
        self.fm.append([1])
        self.fm.build(3, 256)
        self.assertEqual('abracadabra', decode(self.fm.get_substring(0, 11)))
        self.assertEqual('mississippi', decode(self.fm.get_substring(12, 11)))
        self.assertEqual('abracadabra mississippi',
                         decode(self.fm.get_substring(24, 23)))