Esempio n. 1
0
 def add_constructor_generator(teacher, **kwargs):
     if 'table' not in kwargs:
         raise BaseException("no table")
     table = kwargs['table']
     con = TreeConstructor(table)
     con.set_concat(True)
     con.set_lambda(1.0)
     teacher.setup_constructor_generator(con, max_len, num_examples)
Esempio n. 2
0
 def construct_tree_from_string(self, string):
     table = self._table.rows
     table = {
         self._tree_converter.convert_ngram(key): val
         for key, val in table
     }
     con = TreeConstructor(table)
     con.set_concat(True)
     con.set_lambda(1.0)
     ngram = self._tree_converter.convert_ngram(string.split(' '))
     tree = make_tree_nodes_int(con.construct_tree(ngram))
     return self._tree_converter.reverse_convert_tree(tree)
Esempio n. 3
0
def learn_prob(trees, reverse_dict, oracle_settings, table):
    print(oracle_settings['equiv_settings'])
    if oracle_settings['type'] is ProbabilityTeacher:
        print(oracle_settings)
        d = oracle_settings['comparator']()
        teacher = ProbabilityTeacher(d, oracle_settings['args'][0], 0.001)
    else:
        teacher = oracle_settings['type'](*oracle_settings['args'])
    for tree in trees:
        teacher.addExample(*tree)
    if oracle_settings['type'] is ProbabilityTeacher:
        con = TreeConstructor(table)
        con.set_concat(True)
        con.set_lambda(1.0)
        teacher.setup_constructor_generator(con,
                                            *oracle_settings['equiv_settings'])
    print('setting')
    # set_verbose(LOG_DEBUG)
    print('starting')
    t = time()
    acc = learnMultPos(teacher)
    acc.print_desc()
    g = convert_pmta_to_pcfg(acc, reverse_dict)
    #gui.set_pcfg(g)
    with open('output_g', 'w') as output_file:
        for nt in get_nonterminals(g):
            output_file.write('\startprod{%s} ' % nt)
            prod_list = get_prod_by_nt(g, nt)
            if len(prod_list) <= 4:
                for prod in prod_list:
                    rhs_str = ' '.join([str(r) for r in prod.rhs()])
                    output_file.write(
                        '\derivation{%s}{%.3f}{%d}' %
                        (rhs_str, prod.prob(), int(100 * prod.prob())))
            else:
                for i in range(0, len(prod_list), 4):
                    curr_sub_list = prod_list[i:min(i + 4, len(prod_list))]
                    output_file.write('\makebox[10cm]{')
                    for prod in curr_sub_list:
                        rhs_str = ' '.join([str(r) for r in prod.rhs()])
                        output_file.write(
                            '\derivation{%s}{%.3f}{%d}' %
                            (rhs_str, prod.prob(), int(100 * prod.prob())))
                    output_file.write('}\n\n')
            output_file.write('\n\n')
    g_str = convert_pcfg_to_str(g)
    with open('grammar.json', 'w') as json_file:
        json.dump(g_str, json_file)
    print('took {}'.format(time() - t))
    return g
Esempio n. 4
0
def create_trees(sequences,
                 table,
                 contract=False,
                 lambda_val=0.0,
                 key='annot'):
    ans = []
    seqs = []
    constructor = TreeConstructor(table)
    constructor.set_lambda(lambda_val)
    constructor.set_concat(True)
    sequences = filter(lambda a: a is not None, sequences)
    for row in sequences:
        curr_tree = constructor.construct_tree(row[key])
        if contract:
            convert_tree_to_cnf(curr_tree)
        ans.append((curr_tree, row['instances']))
    normalize_trees(ans)
    print(ans)
    return ans
Esempio n. 5
0
def simple_learning_test():
    converter = TreesConverter()
    table = get_table_tests()
    table = {converter.convert_ngram(key): val for key, val in table}
    trees = get_trees_tests()
    con = TreeConstructor(table)
    con.set_concat(True)
    con.set_lambda(1.0)
    teacher = SimpleMultiplicityTeacher(epsilon=0.000001, default_val=0)
    for tree, prob in trees:
        teacher.addExample(converter.convert_tree(tree), prob)
    print('starting')
    t = time()
    acc = learnMultPos(teacher)
    g = convert_pmta_to_pcfg(acc, converter.reverse_dict)
    g = convert_pcfg_to_str(g)
    with open('grammar_simple.txt', 'w') as file:
        file.write(g)
    print(g)
Esempio n. 6
0
def swap_learning_test():
    converter = TreesConverter()
    table = get_table_tests()
    table = {converter.convert_ngram(key): val for key, val in table}
    trees = get_trees_tests()
    con = TreeConstructor(table)
    con.set_concat(True)
    con.set_lambda(1.0)
    cmp = SwapComparator()
    teacher = ProbabilityTeacher(cmp, 0.25, 0.0000001)
    for tree, prob in trees:
        teacher.addExample(converter.convert_tree(tree), prob)
    teacher.setup_constructor_generator(con, 4, -1)
    print('starting')
    t = time()
    acc = learnMultPos(teacher)
    g = convert_pmta_to_pcfg(acc, converter.reverse_dict)
    g = convert_pcfg_to_str(g)
    with open('grammar_swap.txt', 'w') as file:
        file.write(g)
    print(g)
Esempio n. 7
0
        def thread_task():
            table = self._table.rows
            table = {
                self._tree_converter.convert_ngram(key): val
                for key, val in table
            }
            if oracle_settings['type'] is ProbabilityTeacher:
                print(oracle_settings)
                d = oracle_settings['comparator']()
                teacher = ProbabilityTeacher(d, oracle_settings['args'][0],
                                             0.000001)
            else:
                teacher = oracle_settings['type'](*oracle_settings['args'])
            for tree, prob in lst.get_selected_elements():
                teacher.addExample(self._tree_converter.convert_tree(tree),
                                   prob)

            if oracle_settings['comparator'] is SwapComparator:
                con = TreeConstructor(table)
                con.set_concat(True)
                con.set_lambda(1.0)
                teacher.setup_constructor_generator(
                    con, *oracle_settings['equiv_settings'])
            if oracle_settings['comparator'] is DuplicationComparator:
                teacher.setup_duplications_generator(2)
            #set_verbose(LOG_DEBUG)
            print('starting')
            t = time()
            acc = learnMultPos(teacher)
            g = convert_pmta_to_pcfg(acc, self._tree_converter.reverse_dict)
            g = convert_pcfg_to_str(g)
            with open('grammar.json', 'w') as json_file:
                json.dump(g, json_file)
            with open('grammar.txt', 'w') as output:
                output.write(g)
            print(g)