Example #1
0
def learn_prob(trees, reverse_dict, oracle_settings, table):
    print(oracle_settings['equiv_settings'])
    if oracle_settings['type'] is ProbabilityTeacher:
        print(oracle_settings)
        d = oracle_settings['comparator']()
        teacher = ProbabilityTeacher(d, oracle_settings['args'][0], 0.001)
    else:
        teacher = oracle_settings['type'](*oracle_settings['args'])
    for tree in trees:
        teacher.addExample(*tree)
    if oracle_settings['type'] is ProbabilityTeacher:
        con = TreeConstructor(table)
        con.set_concat(True)
        con.set_lambda(1.0)
        teacher.setup_constructor_generator(con,
                                            *oracle_settings['equiv_settings'])
    print('setting')
    # set_verbose(LOG_DEBUG)
    print('starting')
    t = time()
    acc = learnMultPos(teacher)
    acc.print_desc()
    g = convert_pmta_to_pcfg(acc, reverse_dict)
    #gui.set_pcfg(g)
    with open('output_g', 'w') as output_file:
        for nt in get_nonterminals(g):
            output_file.write('\startprod{%s} ' % nt)
            prod_list = get_prod_by_nt(g, nt)
            if len(prod_list) <= 4:
                for prod in prod_list:
                    rhs_str = ' '.join([str(r) for r in prod.rhs()])
                    output_file.write(
                        '\derivation{%s}{%.3f}{%d}' %
                        (rhs_str, prod.prob(), int(100 * prod.prob())))
            else:
                for i in range(0, len(prod_list), 4):
                    curr_sub_list = prod_list[i:min(i + 4, len(prod_list))]
                    output_file.write('\makebox[10cm]{')
                    for prod in curr_sub_list:
                        rhs_str = ' '.join([str(r) for r in prod.rhs()])
                        output_file.write(
                            '\derivation{%s}{%.3f}{%d}' %
                            (rhs_str, prod.prob(), int(100 * prod.prob())))
                    output_file.write('}\n\n')
            output_file.write('\n\n')
    g_str = convert_pcfg_to_str(g)
    with open('grammar.json', 'w') as json_file:
        json.dump(g_str, json_file)
    print('took {}'.format(time() - t))
    return g
Example #2
0
def dup_learning_test():
    converter = TreesConverter()
    trees = get_trees_tests()
    cmp = DuplicationComparator()
    teacher = ProbabilityTeacher(cmp, 0.2, 0.0000001)
    for tree, prob in trees:
        teacher.addExample(converter.convert_tree(tree), prob)
    teacher.setup_duplications_generator(2)
    print('starting')
    t = time()
    acc = learnMultPos(teacher)
    g = convert_pmta_to_pcfg(acc, converter.reverse_dict)
    g = convert_pcfg_to_str(g)
    with open('grammar_dup.txt', 'w') as file:
        file.write(g)
    print(g)
Example #3
0
def simple_learning_test():
    converter = TreesConverter()
    table = get_table_tests()
    table = {converter.convert_ngram(key): val for key, val in table}
    trees = get_trees_tests()
    con = TreeConstructor(table)
    con.set_concat(True)
    con.set_lambda(1.0)
    teacher = SimpleMultiplicityTeacher(epsilon=0.000001, default_val=0)
    for tree, prob in trees:
        teacher.addExample(converter.convert_tree(tree), prob)
    print('starting')
    t = time()
    acc = learnMultPos(teacher)
    g = convert_pmta_to_pcfg(acc, converter.reverse_dict)
    g = convert_pcfg_to_str(g)
    with open('grammar_simple.txt', 'w') as file:
        file.write(g)
    print(g)
Example #4
0
def swap_learning_test():
    converter = TreesConverter()
    table = get_table_tests()
    table = {converter.convert_ngram(key): val for key, val in table}
    trees = get_trees_tests()
    con = TreeConstructor(table)
    con.set_concat(True)
    con.set_lambda(1.0)
    cmp = SwapComparator()
    teacher = ProbabilityTeacher(cmp, 0.25, 0.0000001)
    for tree, prob in trees:
        teacher.addExample(converter.convert_tree(tree), prob)
    teacher.setup_constructor_generator(con, 4, -1)
    print('starting')
    t = time()
    acc = learnMultPos(teacher)
    g = convert_pmta_to_pcfg(acc, converter.reverse_dict)
    g = convert_pcfg_to_str(g)
    with open('grammar_swap.txt', 'w') as file:
        file.write(g)
    print(g)
Example #5
0
        def thread_task():
            table = self._table.rows
            table = {
                self._tree_converter.convert_ngram(key): val
                for key, val in table
            }
            if oracle_settings['type'] is ProbabilityTeacher:
                print(oracle_settings)
                d = oracle_settings['comparator']()
                teacher = ProbabilityTeacher(d, oracle_settings['args'][0],
                                             0.000001)
            else:
                teacher = oracle_settings['type'](*oracle_settings['args'])
            for tree, prob in lst.get_selected_elements():
                teacher.addExample(self._tree_converter.convert_tree(tree),
                                   prob)

            if oracle_settings['comparator'] is SwapComparator:
                con = TreeConstructor(table)
                con.set_concat(True)
                con.set_lambda(1.0)
                teacher.setup_constructor_generator(
                    con, *oracle_settings['equiv_settings'])
            if oracle_settings['comparator'] is DuplicationComparator:
                teacher.setup_duplications_generator(2)
            #set_verbose(LOG_DEBUG)
            print('starting')
            t = time()
            acc = learnMultPos(teacher)
            g = convert_pmta_to_pcfg(acc, self._tree_converter.reverse_dict)
            g = convert_pcfg_to_str(g)
            with open('grammar.json', 'w') as json_file:
                json.dump(g, json_file)
            with open('grammar.txt', 'w') as output:
                output.write(g)
            print(g)
Example #6
0
 def get_grammar(self, pmta):
     return convert_pmta_to_pcfg(pmta, self._reverse_dict)