def learn_prob(trees, reverse_dict, oracle_settings, table): print(oracle_settings['equiv_settings']) if oracle_settings['type'] is ProbabilityTeacher: print(oracle_settings) d = oracle_settings['comparator']() teacher = ProbabilityTeacher(d, oracle_settings['args'][0], 0.001) else: teacher = oracle_settings['type'](*oracle_settings['args']) for tree in trees: teacher.addExample(*tree) if oracle_settings['type'] is ProbabilityTeacher: con = TreeConstructor(table) con.set_concat(True) con.set_lambda(1.0) teacher.setup_constructor_generator(con, *oracle_settings['equiv_settings']) print('setting') # set_verbose(LOG_DEBUG) print('starting') t = time() acc = learnMultPos(teacher) acc.print_desc() g = convert_pmta_to_pcfg(acc, reverse_dict) #gui.set_pcfg(g) with open('output_g', 'w') as output_file: for nt in get_nonterminals(g): output_file.write('\startprod{%s} ' % nt) prod_list = get_prod_by_nt(g, nt) if len(prod_list) <= 4: for prod in prod_list: rhs_str = ' '.join([str(r) for r in prod.rhs()]) output_file.write( '\derivation{%s}{%.3f}{%d}' % (rhs_str, prod.prob(), int(100 * prod.prob()))) else: for i in range(0, len(prod_list), 4): curr_sub_list = prod_list[i:min(i + 4, len(prod_list))] output_file.write('\makebox[10cm]{') for prod in curr_sub_list: rhs_str = ' '.join([str(r) for r in prod.rhs()]) output_file.write( '\derivation{%s}{%.3f}{%d}' % (rhs_str, prod.prob(), int(100 * prod.prob()))) output_file.write('}\n\n') output_file.write('\n\n') g_str = convert_pcfg_to_str(g) with open('grammar.json', 'w') as json_file: json.dump(g_str, json_file) print('took {}'.format(time() - t)) return g
def dup_learning_test(): converter = TreesConverter() trees = get_trees_tests() cmp = DuplicationComparator() teacher = ProbabilityTeacher(cmp, 0.2, 0.0000001) for tree, prob in trees: teacher.addExample(converter.convert_tree(tree), prob) teacher.setup_duplications_generator(2) print('starting') t = time() acc = learnMultPos(teacher) g = convert_pmta_to_pcfg(acc, converter.reverse_dict) g = convert_pcfg_to_str(g) with open('grammar_dup.txt', 'w') as file: file.write(g) print(g)
def simple_learning_test(): converter = TreesConverter() table = get_table_tests() table = {converter.convert_ngram(key): val for key, val in table} trees = get_trees_tests() con = TreeConstructor(table) con.set_concat(True) con.set_lambda(1.0) teacher = SimpleMultiplicityTeacher(epsilon=0.000001, default_val=0) for tree, prob in trees: teacher.addExample(converter.convert_tree(tree), prob) print('starting') t = time() acc = learnMultPos(teacher) g = convert_pmta_to_pcfg(acc, converter.reverse_dict) g = convert_pcfg_to_str(g) with open('grammar_simple.txt', 'w') as file: file.write(g) print(g)
def swap_learning_test(): converter = TreesConverter() table = get_table_tests() table = {converter.convert_ngram(key): val for key, val in table} trees = get_trees_tests() con = TreeConstructor(table) con.set_concat(True) con.set_lambda(1.0) cmp = SwapComparator() teacher = ProbabilityTeacher(cmp, 0.25, 0.0000001) for tree, prob in trees: teacher.addExample(converter.convert_tree(tree), prob) teacher.setup_constructor_generator(con, 4, -1) print('starting') t = time() acc = learnMultPos(teacher) g = convert_pmta_to_pcfg(acc, converter.reverse_dict) g = convert_pcfg_to_str(g) with open('grammar_swap.txt', 'w') as file: file.write(g) print(g)
def thread_task(): table = self._table.rows table = { self._tree_converter.convert_ngram(key): val for key, val in table } if oracle_settings['type'] is ProbabilityTeacher: print(oracle_settings) d = oracle_settings['comparator']() teacher = ProbabilityTeacher(d, oracle_settings['args'][0], 0.000001) else: teacher = oracle_settings['type'](*oracle_settings['args']) for tree, prob in lst.get_selected_elements(): teacher.addExample(self._tree_converter.convert_tree(tree), prob) if oracle_settings['comparator'] is SwapComparator: con = TreeConstructor(table) con.set_concat(True) con.set_lambda(1.0) teacher.setup_constructor_generator( con, *oracle_settings['equiv_settings']) if oracle_settings['comparator'] is DuplicationComparator: teacher.setup_duplications_generator(2) #set_verbose(LOG_DEBUG) print('starting') t = time() acc = learnMultPos(teacher) g = convert_pmta_to_pcfg(acc, self._tree_converter.reverse_dict) g = convert_pcfg_to_str(g) with open('grammar.json', 'w') as json_file: json.dump(g, json_file) with open('grammar.txt', 'w') as output: output.write(g) print(g)
def get_grammar(self, pmta): return convert_pmta_to_pcfg(pmta, self._reverse_dict)