def test_matcher_cache(self): inputs = [ (u'す'.encode('utf8'), pack('I', 1)), (u'すも'.encode('utf8'), pack('I', 2)), (u'すもも'.encode('utf8'), pack('I', 3)) ] processed, dictionary = fst.create_minimum_transducer(inputs) data = [fst.compileFST(dictionary)] m = Matcher(data) # matches 'す', 'すも', 'すもも' self.assertEqual( (True, set([pack('I', 1), pack('I', 2), pack('I', 3)])), m.run(u'すもも'.encode('utf8'), True)) self.assertEqual( (True, set([pack('I', 1), pack('I', 2)])), m.run(u'すもうとり'.encode('utf8'), True)) self.assertEqual( (True, set([pack('I', 1), pack('I', 2), pack('I', 3)])), m.run(u'すもも'.encode('utf8'), True)) self.assertEqual( (True, set([pack('I', 1), pack('I', 2), pack('I', 3)])), m.run(u'すもももももももものうち'.encode('utf8'), True)) self.assertEqual( (True, set([pack('I', 1)])), m.run(u'す'.encode('utf8'), True))
def test_create_minimum_transducer1(self): inputs = [('apr'.encode('utf8'), pack('I', 30)), ('aug'.encode('utf8'), pack('I', 31)), ('dec'.encode('utf8'), pack('I', 31)), ('feb'.encode('utf8'), pack('I', 28)), ('feb'.encode('utf8'), pack('I', 29)), ('jan'.encode('utf8'), pack('I', 31)), ('jul'.encode('utf8'), pack('I', 31)), ('jun'.encode('utf8'), pack('I', 30)), ('may'.encode('utf8'), pack('I', 31))] dictionary = fst.create_minimum_transducer(inputs) data = fst.compileFST(dictionary) m = Matcher(data) # accepted strings self.assertEqual((True, set([pack('I', 30)])), m.run('apr'.encode('utf8'))) self.assertEqual((True, set([pack('I', 31)])), m.run('aug'.encode('utf8'))) self.assertEqual((True, set([pack('I', 31)])), m.run('dec'.encode('utf8'))) self.assertEqual( (True, set([pack('I', 28), pack('I', 29)])), m.run('feb'.encode('utf8'))) self.assertEqual((True, set([pack('I', 31)])), m.run('jan'.encode('utf8'))) self.assertEqual((True, set([pack('I', 31)])), m.run('jul'.encode('utf8'))) self.assertEqual((True, set([pack('I', 30)])), m.run('jun'.encode('utf8'))) self.assertEqual((True, set([pack('I', 31)])), m.run('may'.encode('utf8'))) # not accepted string self.assertEqual((False, set()), m.run('mar'))
def test_create_minimum_transducer2(self): inputs = [(u'さくら'.encode('utf8'), u'白'.encode('utf8')), (u'さくらんぼ'.encode('utf8'), u'赤'.encode('utf8')), (u'すもも'.encode('utf8'), u'赤'.encode('utf8')), (u'なし'.encode('utf8'), u'茶'.encode('utf8')), (u'もも'.encode('utf8'), u'桃'.encode('utf8'))] dictionary = fst.create_minimum_transducer(inputs) data = fst.compileFST(dictionary) m = Matcher(data) # accepted strings self.assertEqual((True, set([u'白'.encode('utf8')])), m.run(u'さくら'.encode('utf8'))) self.assertEqual( (True, set([u'白'.encode('utf8'), u'赤'.encode(u'utf8')])), m.run(u'さくらんぼ'.encode('utf8'))) self.assertEqual((True, set([u'白'.encode('utf8')])), m.run(u'さくらさく'.encode('utf8'))) self.assertEqual((True, set([u'赤'.encode('utf8')])), m.run(u'すもも'.encode('utf8'))) self.assertEqual((True, set([u'茶'.encode('utf8')])), m.run(u'なし'.encode('utf8'))) self.assertEqual((True, set([u'桃'.encode('utf8')])), m.run(u'もも'.encode('utf8'))) # not accepted string self.assertEqual((False, set()), m.run(u'みかん'.encode('utf8')))
def test_create_minimum_transducer2(self): inputs1 = [('さくら'.encode('utf8'), '白'.encode('utf8')), ('さくらんぼ'.encode('utf8'), '赤'.encode('utf8')), ('すもも'.encode('utf8'), '赤'.encode('utf8'))] inputs2 = [('なし'.encode('utf8'), '茶'.encode('utf8')), ('もも'.encode('utf8'), '桃'.encode('utf8'))] processed, dictionary1 = fst.create_minimum_transducer(inputs1) processed, dictionary2 = fst.create_minimum_transducer(inputs2) data = [fst.compileFST(dictionary1), fst.compileFST(dictionary2)] m = Matcher(data) # accepted strings self.assertEqual((True, set(['白'.encode('utf8')])), m.run('さくら'.encode('utf8'))) self.assertEqual((True, set(['白'.encode('utf8'), '赤'.encode('utf8')])), m.run('さくらんぼ'.encode('utf8'))) self.assertEqual((True, set(['白'.encode('utf8')])), m.run('さくらさく'.encode('utf8'))) self.assertEqual((True, set(['赤'.encode('utf8')])), m.run('すもも'.encode('utf8'))) self.assertEqual((True, set(['茶'.encode('utf8')])), m.run('なし'.encode('utf8'))) self.assertEqual((True, set(['桃'.encode('utf8')])), m.run('もも'.encode('utf8'))) # not accepted string self.assertEqual((False, set()), m.run('みかん'.encode('utf8')))
def test_common_prefix_match(self): inputs = [(u'す'.encode('utf8'), pack('I', 1)), (u'すも'.encode('utf8'), pack('I', 2)), (u'すもも'.encode('utf8'), pack('I', 3))] dictionary = fst.create_minimum_transducer(inputs) data = fst.compileFST(dictionary) m = Matcher(data) # matches 'す', 'すも', 'すもも' expected_outputs = set([pack('I', 1), pack('I', 2), pack('I', 3)]) self.assertEqual((True, expected_outputs), m.run(u'すもも'.encode('utf8'), True))
def test_perfect_match(self): inputs = [('す'.encode('utf8'), pack('I', 1)), ('すも'.encode('utf8'), pack('I', 2)), ('すもも'.encode('utf8'), pack('I', 3))] processed, dictionary = fst.create_minimum_transducer(inputs) data = [fst.compileFST(dictionary)] m = Matcher(data) # matches 'すもも' expected_outputs = set([pack('I', 3)]) self.assertEqual((True, expected_outputs), m.run('すもも'.encode('utf8'), False))
def test_create_minimum_transducer2(self): inputs1 = [ (u'さくら'.encode('utf8'), u'白'.encode('utf8')), (u'さくらんぼ'.encode('utf8'), u'赤'.encode('utf8')), (u'すもも'.encode('utf8'), u'赤'.encode('utf8')) ] inputs2 = [ (u'なし'.encode('utf8'), u'茶'.encode('utf8')), (u'もも'.encode('utf8'), u'桃'.encode('utf8')) ] processed, dictionary1 = fst.create_minimum_transducer(inputs1) processed, dictionary2 = fst.create_minimum_transducer(inputs2) data = [fst.compileFST(dictionary1), fst.compileFST(dictionary2)] m = Matcher(data) # accepted strings self.assertEqual((True, set([u'白'.encode('utf8')])), m.run(u'さくら'.encode('utf8'))) self.assertEqual((True, set([u'白'.encode('utf8'), u'赤'.encode(u'utf8')])), m.run(u'さくらんぼ'.encode('utf8'))) self.assertEqual((True, set([u'白'.encode('utf8')])), m.run(u'さくらさく'.encode('utf8'))) self.assertEqual((True, set([u'赤'.encode('utf8')])), m.run(u'すもも'.encode('utf8'))) self.assertEqual((True, set([u'茶'.encode('utf8')])), m.run(u'なし'.encode('utf8'))) self.assertEqual((True, set([u'桃'.encode('utf8')])), m.run(u'もも'.encode('utf8'))) # not accepted string self.assertEqual((False, set()), m.run(u'みかん'.encode('utf8')))
def test_create_minimum_transducer1(self): inputs1 = [ ('apr'.encode('utf8'), pack('I', 30)), ('aug'.encode('utf8'), pack('I', 31)), ('dec'.encode('utf8'), pack('I', 31)), ('feb'.encode('utf8'), pack('I', 28)) ] inputs2 = [ ('feb'.encode('utf8'), pack('I', 29)), ('jan'.encode('utf8'), pack('I', 31)), ('jul'.encode('utf8'), pack('I', 31)), ('jun'.encode('utf8'), pack('I', 30)), ('may'.encode('utf8'), pack('I', 31)) ] processed, dictionary1 = fst.create_minimum_transducer(inputs1) processed, dictionary2 = fst.create_minimum_transducer(inputs2) data = [fst.compileFST(dictionary1), fst.compileFST(dictionary2)] m = Matcher(data) # accepted strings self.assertEqual((True, set([pack('I', 30)])), m.run('apr'.encode('utf8'))) self.assertEqual((True, set([pack('I', 31)])), m.run('aug'.encode('utf8'))) self.assertEqual((True, set([pack('I', 31)])), m.run('dec'.encode('utf8'))) self.assertEqual((True, set([pack('I', 28), pack('I', 29)])), m.run('feb'.encode('utf8'))) self.assertEqual((True, set([pack('I', 31)])), m.run('jan'.encode('utf8'))) self.assertEqual((True, set([pack('I', 31)])), m.run('jul'.encode('utf8'))) self.assertEqual((True, set([pack('I', 30)])), m.run('jun'.encode('utf8'))) self.assertEqual((True, set([pack('I', 31)])), m.run('may'.encode('utf8'))) # not accepted string self.assertEqual((False, set()), m.run('mar'))
def test_common_prefix_match(self): inputs = [ (u'す'.encode('utf8'), pack('I', 1)), (u'すも'.encode('utf8'), pack('I', 2)), (u'すもも'.encode('utf8'), pack('I', 3)) ] processed, dictionary = fst.create_minimum_transducer(inputs) data = [fst.compileFST(dictionary)] m = Matcher(data) # matches 'す', 'すも', 'すもも' expected_outputs = set([pack('I', 1), pack('I', 2), pack('I', 3)]) self.assertEqual((True, expected_outputs), m.run(u'すもも'.encode('utf8'), True))
def test_perfect_match(self): inputs = [ (u'す'.encode('utf8'), pack('I', 1)), (u'すも'.encode('utf8'), pack('I', 2)), (u'すもも'.encode('utf8'), pack('I', 3)) ] dictionary = fst.create_minimum_transducer(inputs) data = fst.compileFST(dictionary) m = Matcher(data) # matches 'すもも' expected_outputs = set([pack('I', 3)]) self.assertEqual((True, expected_outputs), m.run(u'すもも'.encode('utf8'), False))
def test_matcher_cache(self): inputs = [(u'す'.encode('utf8'), pack('I', 1)), (u'すも'.encode('utf8'), pack('I', 2)), (u'すもも'.encode('utf8'), pack('I', 3))] dictionary = fst.create_minimum_transducer(inputs) data = fst.compileFST(dictionary) m = Matcher(data) # matches 'す', 'すも', 'すもも' self.assertEqual( (True, set([pack('I', 1), pack('I', 2), pack('I', 3)])), m.run(u'すもも'.encode('utf8'), True)) self.assertEqual((True, set([pack('I', 1), pack('I', 2)])), m.run(u'すもうとり'.encode('utf8'), True)) self.assertEqual( (True, set([pack('I', 1), pack('I', 2), pack('I', 3)])), m.run(u'すもも'.encode('utf8'), True)) self.assertEqual( (True, set([pack('I', 1), pack('I', 2), pack('I', 3)])), m.run(u'すもももももももものうち'.encode('utf8'), True)) self.assertEqual((True, set([pack('I', 1)])), m.run(u'す'.encode('utf8'), True))