def print_trees(self, text): parses = self.parse_text(text) for p in parses: s = Sentence(p[1]) print(p[0]) s.print_tree() print('')
def read_text(self, text, aux_text=None, reset_context=True): if self.parser is None: self.debug_msg('creating parser...') self.parser = Parser() self.disamb = Disambiguation(self.hg, self.parser) nlp_parses = self.parser.parse_text(text) if reset_context: self.aux_text = text if aux_text: self.aux_text = '%s\n%s' % (text, aux_text) parses = [(p[0], self.read_sentence(Sentence(p[1]))) for p in nlp_parses] for p in parses: self.debug_msg('== extra ==') for edg in p[1].edges: self.debug_msg(ed.edge2str(edg)) return parses
def test(infile, model_type='rf'): parses = read_parses(infile, test_set=True) acc_total = 0 acc_wrong = 0 acc_predictions = Counter() acc_true_values = Counter() for parse in parses: # sentence_str = parse[0].strip() json_str = parse[1].strip() # outcome_str = parse[2].strip() sentence = Sentence(json_str=json_str) transfs = [int(token) for token in parse[3].split(',')] total = len(transfs) hgforest = Hypergen(model_type=model_type) hgforest.test(sentence, transfs) wrong = hgforest.wrong print('%s / %s' % (wrong, total)) acc_total += total acc_wrong += wrong acc_predictions = sum( (acc_predictions, Counter(hgforest.test_predictions)), Counter()) acc_true_values = sum( (acc_true_values, Counter(hgforest.test_true_values)), Counter()) acc_predictions = dict(acc_predictions) acc_true_values = dict(acc_true_values) print('PREDICTIONS:') for transf in acc_predictions: print('%s: %s' % (hgtransf.to_string(transf), acc_predictions[transf])) print('TRUE_VALUES:') for transf in acc_true_values: print('%s: %s' % (hgtransf.to_string(transf), acc_true_values[transf])) error_rate = (float(acc_wrong) / float(acc_total)) * 100. print('error rate: %.3f%%' % error_rate)
for i in range(len(token_seq)): token_seq[i].position_in_sentence = i sents.append((sentence_text, token_seq)) return sents def print_trees(self, text): parses = self.parse_text(text) for p in parses: s = Sentence(p[1]) print(p[0]) s.print_tree() print('') if __name__ == '__main__': test_text = u"""Some subspecies of mosquito might be 1st to be genetically wiped out.""" # test_text = u"""Des millions de Français n’ont pas accès à une connexion.""" print('Starting parser...') parser = Parser() print('Parsing...') result = parser.parse_text(test_text) for r in result: sentence = Sentence(r[1]) print(r[0]) sentence.print_tree() print('')
return elem_id, transf def process_sentence(self, sentence): self.tree.root_id = self.process_token(sentence.root())[0] return ParserOutput(sentence, self.tree) def transform(sentence): alpha = AlphaForest() return alpha.process_sentence(sentence) if __name__ == '__main__': # learn('cases.csv', 'alpha_forest.model') test_text = """ Satellites from NASA and other agencies have been tracking sea ice changes since 1979. """ # test_text = 'Telmo is going to the gym.' print('Starting parser...') parser = Parser() print('Parsing...') result = parser.parse_text(test_text) for r in result: s = Sentence(r[1]) t = transform(s) print(t.tree.to_hyperedge_str(with_namespaces=False))