Beispiel #1
0
    def test_foo(self):
        dawg = make_dawg(('foo', 'bar'))
        found = set(Iterator('baz', 1, dawg))
        self.assertEqual({'bar'}, found)

        found = set(Iterator('baz', 2, dawg))
        self.assertEqual({'bar'}, found)
Beispiel #2
0
    def test_tolerance(self):
        dictionary = ('meter', 'otter', 'potter')
        dawg = make_dawg(dictionary)
        found = set(Iterator('mutter', 1, dawg))
        self.assertEqual(set(), found)

        found = set(Iterator('mutter', 2, dawg))
        self.assertEqual(set(dictionary), found)
Beispiel #3
0
    def test_this(self):
        dictionary = ('this', 'that', 'other')
        dawg = make_dawg(dictionary)
        found = set(Iterator('the', 1, dawg))
        self.assertEqual(set(), found)

        found = set(Iterator('the', 2, dawg))
        self.assertEqual(set(dictionary), found)
Beispiel #4
0
def main():
    if len(sys.argv) != 2:
        raise Exception("usage: %s input-file" % sys.argv[0])

    input_path = os.path.abspath(sys.argv[1])
    dictionary = make_test_dict(input_path)
    dawg = make_dawg(dictionary)
    totals = []
    count_children(dawg.root, totals)
    i = 0
    l = len(totals)
    while i < l:
        if totals[i] > 0:
            print("%d: %d" % (i, totals[i]))

        i += 1
Beispiel #5
0
 def test_initial_final(self):
     dawg = make_dawg(('', 'a'))
     found = set(Iterator('b', 1, dawg))
     self.assertEqual({'', 'a'}, found)
Beispiel #6
0
 def test_binary(self):
     dictionary = ('ababa', 'babab')
     dawg = make_dawg(dictionary)
     found = set(Iterator('abba', 3, dawg))
     self.assertEqual(set(dictionary), found)
Beispiel #7
0
 def test_long_head(self):
     found = set(Iterator('abtrtz', 1, make_dawg(('abtrbtz', ))))
     self.assertEqual({'abtrbtz'}, found)
Beispiel #8
0
def main():
    parser = argparse.ArgumentParser(
        description='MUlti-word EDit DIstance test')
    parser.add_argument('--tolerance',
                        '-t',
                        type=int,
                        default=1,
                        help='max allowed number of edits')
    parser.add_argument('input', nargs=1, help='input file path')
    parser.add_argument('--result',
                        '-r',
                        type=str,
                        default='result.tsv',
                        help='input file path')
    parser.add_argument('--single-dict',
                        '-s',
                        action='store_true',
                        help='include tested word in the dictionary')
    args = parser.parse_args()

    n = args.tolerance
    if n <= 0:
        raise Exception("max allowed number of edits must be positive")

    input_path = os.path.abspath(args.input[0])
    result_path = args.result
    single_mode = args.single_dict

    dictionary = make_test_dict(input_path)

    dd = copy.copy(dictionary) if single_mode else dictionary
    dawg = make_dawg(dd) if single_mode else None

    if not os.path.isfile(result_path):
        with open(result_path, 'w', newline='') as f:
            writer = csv.writer(f, delimiter='\t')
            writer.writerow([input_path, str(n), int(single_mode)])

            last_word = None
            first = True
            for tword in sorted(dictionary):
                print("%s..." % tword)

                if not single_mode:
                    dd.remove(tword)

                    if (first):
                        first = False
                    else:
                        dd.add(last_word)

                    dawg = make_dawg(dd)
                    last_word = tword

                test_independent(tword, n, dd, dawg, writer)
    else:
        with open(result_path, newline='') as f:
            reader = csv.reader(f, delimiter='\t')
            first_row = next(reader)
            if len(first_row) != 3:
                raise Exception("three-column header expected")

            if (first_row[0] != input_path) or (int(first_row[1]) != n) or (
                    int(first_row[2]) != int(single_mode)):
                raise Exception("inputs changed")

            last_word = None
            first = True
            for tword in sorted(dictionary):
                print("%s..." % tword)

                if not single_mode:
                    dd.remove(tword)

                    if (first):
                        first = False
                    else:
                        dd.add(last_word)

                    dawg = make_dawg(dd)
                    last_word = tword

                test_repeat(tword, n, dawg, reader)