def test_foo(self): dawg = make_dawg(('foo', 'bar')) found = set(Iterator('baz', 1, dawg)) self.assertEqual({'bar'}, found) found = set(Iterator('baz', 2, dawg)) self.assertEqual({'bar'}, found)
def test_tolerance(self): dictionary = ('meter', 'otter', 'potter') dawg = make_dawg(dictionary) found = set(Iterator('mutter', 1, dawg)) self.assertEqual(set(), found) found = set(Iterator('mutter', 2, dawg)) self.assertEqual(set(dictionary), found)
def test_this(self): dictionary = ('this', 'that', 'other') dawg = make_dawg(dictionary) found = set(Iterator('the', 1, dawg)) self.assertEqual(set(), found) found = set(Iterator('the', 2, dawg)) self.assertEqual(set(dictionary), found)
def main(): if len(sys.argv) != 2: raise Exception("usage: %s input-file" % sys.argv[0]) input_path = os.path.abspath(sys.argv[1]) dictionary = make_test_dict(input_path) dawg = make_dawg(dictionary) totals = [] count_children(dawg.root, totals) i = 0 l = len(totals) while i < l: if totals[i] > 0: print("%d: %d" % (i, totals[i])) i += 1
def test_initial_final(self): dawg = make_dawg(('', 'a')) found = set(Iterator('b', 1, dawg)) self.assertEqual({'', 'a'}, found)
def test_binary(self): dictionary = ('ababa', 'babab') dawg = make_dawg(dictionary) found = set(Iterator('abba', 3, dawg)) self.assertEqual(set(dictionary), found)
def test_long_head(self): found = set(Iterator('abtrtz', 1, make_dawg(('abtrbtz', )))) self.assertEqual({'abtrbtz'}, found)
def main(): parser = argparse.ArgumentParser( description='MUlti-word EDit DIstance test') parser.add_argument('--tolerance', '-t', type=int, default=1, help='max allowed number of edits') parser.add_argument('input', nargs=1, help='input file path') parser.add_argument('--result', '-r', type=str, default='result.tsv', help='input file path') parser.add_argument('--single-dict', '-s', action='store_true', help='include tested word in the dictionary') args = parser.parse_args() n = args.tolerance if n <= 0: raise Exception("max allowed number of edits must be positive") input_path = os.path.abspath(args.input[0]) result_path = args.result single_mode = args.single_dict dictionary = make_test_dict(input_path) dd = copy.copy(dictionary) if single_mode else dictionary dawg = make_dawg(dd) if single_mode else None if not os.path.isfile(result_path): with open(result_path, 'w', newline='') as f: writer = csv.writer(f, delimiter='\t') writer.writerow([input_path, str(n), int(single_mode)]) last_word = None first = True for tword in sorted(dictionary): print("%s..." % tword) if not single_mode: dd.remove(tword) if (first): first = False else: dd.add(last_word) dawg = make_dawg(dd) last_word = tword test_independent(tword, n, dd, dawg, writer) else: with open(result_path, newline='') as f: reader = csv.reader(f, delimiter='\t') first_row = next(reader) if len(first_row) != 3: raise Exception("three-column header expected") if (first_row[0] != input_path) or (int(first_row[1]) != n) or ( int(first_row[2]) != int(single_mode)): raise Exception("inputs changed") last_word = None first = True for tword in sorted(dictionary): print("%s..." % tword) if not single_mode: dd.remove(tword) if (first): first = False else: dd.add(last_word) dawg = make_dawg(dd) last_word = tword test_repeat(tword, n, dawg, reader)