def add_constructor_generator(teacher, **kwargs): if 'table' not in kwargs: raise BaseException("no table") table = kwargs['table'] con = TreeConstructor(table) con.set_concat(True) con.set_lambda(1.0) teacher.setup_constructor_generator(con, max_len, num_examples)
def construct_tree_from_string(self, string): table = self._table.rows table = { self._tree_converter.convert_ngram(key): val for key, val in table } con = TreeConstructor(table) con.set_concat(True) con.set_lambda(1.0) ngram = self._tree_converter.convert_ngram(string.split(' ')) tree = make_tree_nodes_int(con.construct_tree(ngram)) return self._tree_converter.reverse_convert_tree(tree)
def learn_prob(trees, reverse_dict, oracle_settings, table): print(oracle_settings['equiv_settings']) if oracle_settings['type'] is ProbabilityTeacher: print(oracle_settings) d = oracle_settings['comparator']() teacher = ProbabilityTeacher(d, oracle_settings['args'][0], 0.001) else: teacher = oracle_settings['type'](*oracle_settings['args']) for tree in trees: teacher.addExample(*tree) if oracle_settings['type'] is ProbabilityTeacher: con = TreeConstructor(table) con.set_concat(True) con.set_lambda(1.0) teacher.setup_constructor_generator(con, *oracle_settings['equiv_settings']) print('setting') # set_verbose(LOG_DEBUG) print('starting') t = time() acc = learnMultPos(teacher) acc.print_desc() g = convert_pmta_to_pcfg(acc, reverse_dict) #gui.set_pcfg(g) with open('output_g', 'w') as output_file: for nt in get_nonterminals(g): output_file.write('\startprod{%s} ' % nt) prod_list = get_prod_by_nt(g, nt) if len(prod_list) <= 4: for prod in prod_list: rhs_str = ' '.join([str(r) for r in prod.rhs()]) output_file.write( '\derivation{%s}{%.3f}{%d}' % (rhs_str, prod.prob(), int(100 * prod.prob()))) else: for i in range(0, len(prod_list), 4): curr_sub_list = prod_list[i:min(i + 4, len(prod_list))] output_file.write('\makebox[10cm]{') for prod in curr_sub_list: rhs_str = ' '.join([str(r) for r in prod.rhs()]) output_file.write( '\derivation{%s}{%.3f}{%d}' % (rhs_str, prod.prob(), int(100 * prod.prob()))) output_file.write('}\n\n') output_file.write('\n\n') g_str = convert_pcfg_to_str(g) with open('grammar.json', 'w') as json_file: json.dump(g_str, json_file) print('took {}'.format(time() - t)) return g
def create_trees(sequences, table, contract=False, lambda_val=0.0, key='annot'): ans = [] seqs = [] constructor = TreeConstructor(table) constructor.set_lambda(lambda_val) constructor.set_concat(True) sequences = filter(lambda a: a is not None, sequences) for row in sequences: curr_tree = constructor.construct_tree(row[key]) if contract: convert_tree_to_cnf(curr_tree) ans.append((curr_tree, row['instances'])) normalize_trees(ans) print(ans) return ans
def simple_learning_test(): converter = TreesConverter() table = get_table_tests() table = {converter.convert_ngram(key): val for key, val in table} trees = get_trees_tests() con = TreeConstructor(table) con.set_concat(True) con.set_lambda(1.0) teacher = SimpleMultiplicityTeacher(epsilon=0.000001, default_val=0) for tree, prob in trees: teacher.addExample(converter.convert_tree(tree), prob) print('starting') t = time() acc = learnMultPos(teacher) g = convert_pmta_to_pcfg(acc, converter.reverse_dict) g = convert_pcfg_to_str(g) with open('grammar_simple.txt', 'w') as file: file.write(g) print(g)
def swap_learning_test(): converter = TreesConverter() table = get_table_tests() table = {converter.convert_ngram(key): val for key, val in table} trees = get_trees_tests() con = TreeConstructor(table) con.set_concat(True) con.set_lambda(1.0) cmp = SwapComparator() teacher = ProbabilityTeacher(cmp, 0.25, 0.0000001) for tree, prob in trees: teacher.addExample(converter.convert_tree(tree), prob) teacher.setup_constructor_generator(con, 4, -1) print('starting') t = time() acc = learnMultPos(teacher) g = convert_pmta_to_pcfg(acc, converter.reverse_dict) g = convert_pcfg_to_str(g) with open('grammar_swap.txt', 'w') as file: file.write(g) print(g)
def thread_task(): table = self._table.rows table = { self._tree_converter.convert_ngram(key): val for key, val in table } if oracle_settings['type'] is ProbabilityTeacher: print(oracle_settings) d = oracle_settings['comparator']() teacher = ProbabilityTeacher(d, oracle_settings['args'][0], 0.000001) else: teacher = oracle_settings['type'](*oracle_settings['args']) for tree, prob in lst.get_selected_elements(): teacher.addExample(self._tree_converter.convert_tree(tree), prob) if oracle_settings['comparator'] is SwapComparator: con = TreeConstructor(table) con.set_concat(True) con.set_lambda(1.0) teacher.setup_constructor_generator( con, *oracle_settings['equiv_settings']) if oracle_settings['comparator'] is DuplicationComparator: teacher.setup_duplications_generator(2) #set_verbose(LOG_DEBUG) print('starting') t = time() acc = learnMultPos(teacher) g = convert_pmta_to_pcfg(acc, self._tree_converter.reverse_dict) g = convert_pcfg_to_str(g) with open('grammar.json', 'w') as json_file: json.dump(g, json_file) with open('grammar.txt', 'w') as output: output.write(g) print(g)