def edge2pattern(edge, root=False, subtype=False): if root and edge.is_atom(): root_str = edge.root() else: root_str = '*' if subtype: et = edge.type() else: et = edge.type()[0] pattern = '{}/{}'.format(root_str, et) ar = edge.argroles() if ar == '': return hedge(pattern) else: return hedge('{}.{}'.format(pattern, ar))
def normalize_edge(edge): if edge.is_atom(): return edge conn = edge[0] ar = conn.argroles() if ar != '': roles_edges = zip(ar, edge[1:]) roles_edges = sorted(roles_edges, key=lambda role_edge: argrole_order[role_edge[0]]) ar = ''.join([role_edge[0] for role_edge in roles_edges]) pred = conn.atom() new_pred = hedge('{}/{}.{}'.format(pred.root(), pred.type(), ar)) conn = conn.replace_atom(pred, new_pred) edge = hedge([conn] + list(role_edge[1] for role_edge in roles_edges)) return hedge([normalize_edge(subedge) for subedge in edge])
def parse_and_add(self, text, hg, sequence=None, set_text=True): parse_results = self.parse(text) for parse in parse_results['parses']: main_edge = parse['resolved_corefs'] if self.corefs: unresolved_edge = parse['main_edge'] else: unresolved_edge = None # add main edge if main_edge: if sequence: hg.add_to_sequence(sequence, main_edge) else: hg.add(main_edge) # attach text to edge and subedges _set_edges_text(main_edge, hg, parse) if self.corefs: if unresolved_edge != main_edge: _set_edges_text(main_edge, hg, parse) coref_res_edge = hedge( (const.coref_res_pred, unresolved_edge, main_edge)) hg.add(coref_res_edge) # add extra edges for edge in parse['extra_edges']: hg.add(edge) for edge in parse_results['inferred_edges']: hg.add(edge, count=True) return parse_results
def test_add_count(self): self.hg.destroy() edge = hedge('(is/Pd graphbrain/Cp great/C)') self.hg.add(edge, count=True) self.assertEqual(self.hg.get_int_attribute(edge, 'count'), 1) self.hg.add(edge, count=True) self.assertEqual(self.hg.get_int_attribute(edge, 'count'), 2)
def test_parser(args): parser = create_parser(lang=args.lang, parser_class=args.parser) total = 0 wrong = 0 sentence = None with open(args.infile) as f: for line in f: if sentence: total += 1 correct_edge = hedge(line.strip()) parser_output = parser.parse(sentence) parsed_sentence = parser_output['parses'][0] edge = parsed_sentence['main_edge'] sent = parsed_sentence['spacy_sentence'] if edge != correct_edge: wrong += 1 print_tree(sent.root) print('expected:') print(correct_edge) print('result:') print(edge) sentence = None else: sentence = line.strip() print('%s wrong out of %s.' % (wrong, total))
def input_defects(sentence, edge): s = colored('s', 'magenta') h = colored('h', 'cyan') i = colored('i', 'yellow') options_str = '{}/{}/{}/subedge'.format(s, h, i) input_msg = 'wrong subedge ({}) ? '.format(options_str) defects = None while not defects: answer = input(input_msg) if answer == 's': print('\n{}\n'.format(sentence)) elif answer == 'h': print('\n{}\n'.format(colored_edge(edge))) elif answer == 'i': print('\n{}\n'.format(indented(edge))) else: edge_strs = answer.split('&') subedges = [] failed = False for edge_str in edge_strs: subedge = hedge(edge_str) if subedge is None: error_msg('{} did not parse correctly.'.format(edge_str)) failed = True elif edge.contains(subedge, deep=True): subedges.append(subedge) else: error_msg('{} is not a subedge of {}.'.format( subedge.to_str(), edge.to_str())) failed = True if not failed: defects = subedges return defects
def process_edge(self, edge, depth): hg = self.system.get_hg(self) if not edge.is_atom(): ct = edge.connector_type() if ct[:2] == 'Pd': pred = edge[0] if (len(edge) > 2 and deep_lemma(hg, pred).root() in CONFLICT_PRED_LEMMAS): subjects = edge.edges_with_argrole('s') objects = edge.edges_with_argrole('o') if len(subjects) == 1 and len(objects) == 1: subject = strip_concept(subjects[0]) obj = strip_concept(objects[0]) if (subject and obj and has_proper_concept(subject) and has_proper_concept(obj)): actor_orig = main_coref(hg, subject) actor_targ = main_coref(hg, obj) conflict_edge = hedge( ('conflict/P/.', actor_orig, actor_targ, edge)) if (is_actor(hg, actor_orig) and is_actor(hg, actor_targ)): yield create_op(conflict_edge) for wedge in self._topics( hg, actor_orig, actor_targ, edge): yield wedge self.conflicts += 1
def test_degrees(self): self.hg.destroy() graphbrain = hedge('graphbrain/1') great = hedge('great/1') self.assertEqual(self.hg.degree(graphbrain), 0) self.hg.add('(is graphbrain/1 great/1)') self.assertEqual(self.hg.degree(graphbrain), 1) self.assertEqual(self.hg.degree(great), 1) self.hg.add('(size graphbrain/1 7)') self.assertEqual(self.hg.degree(graphbrain), 2) self.assertEqual(self.hg.degree(great), 1) self.hg.remove(hedge('(is graphbrain/1 great/1)')) self.assertEqual(self.hg.degree(graphbrain), 1) self.assertEqual(self.hg.degree(great), 0) self.hg.remove(hedge('(size graphbrain/1 7)')) self.assertEqual(self.hg.degree(graphbrain), 0)
def test_non_primary_to_primary(self): self.hg.destroy() edge = hedge('(is/P (the/M sun/C) shining/C)') self.hg.add(edge, primary=False) self.assertFalse(self.hg.is_primary(edge)) self.hg.add(edge, primary=True) self.assertTrue(self.hg.is_primary(edge))
def clean_edge(edge): if not edge.is_atom(): return edge catom = edge.root() catom = catom.replace('_', '') catom = unidecode(catom) return hedge(catom)
def test_counter4(self): pc = PatternCounter(match_roots={'./P'}) pc.count(hedge('((not/M is/P.sc) mary/C (not/M nice/C))')) self.assertTrue(pc.patterns[hedge('(*/P.sc */C */C)')] == 0) self.assertTrue(pc.patterns[hedge('((*/M */P.sc) */C */C)')] == 0) self.assertTrue(pc.patterns[hedge('((*/M is/P.sc) */C */C)')] == 1) self.assertTrue(pc.patterns[hedge('(*/P.sc */C (*/M */C))')] == 0) self.assertTrue( pc.patterns[hedge('((*/M */P.sc) */C (*/M */C))')] == 0) self.assertTrue(pc.patterns[hedge('(*/M */P.sc)')] == 0) self.assertTrue(pc.patterns[hedge('(*/M is/P.sc)')] == 1) self.assertTrue(pc.patterns[hedge('(*/M */C)')] == 1)
def replace_subject(edge, new_subject): connector = edge[0] new_edge = list(edge) for pos, role in enumerate(connector.argroles()): if role == 's': new_edge[pos + 1] = new_subject return hedge(new_edge)
def count(self, edge): if not edge.is_atom(): if self._matches_expansions(edge): for pattern in self._edge2patterns(edge): self.patterns[hedge(pattern)] += 1 if self.count_subedges: for subedge in edge: self.count(subedge)
def test_counters3_non_deep_removal(self): self.hg.destroy() self.hg.add('(says mary/C (is graphbrain/C great/C))') self.hg.remove(hedge('(says mary/C (is graphbrain/C great/C))'), deep=False) self.assertEqual(self.hg.atom_count(), 5) self.assertEqual(self.hg.primary_atom_count(), 0) self.assertEqual(self.hg.edge_count(), 6) self.assertEqual(self.hg.primary_edge_count(), 0)
def blocks(edge, subtypes=False, argroles=True, namespaces=False): edge = hedge(edge) sedge = edge.simplify(subtypes=subtypes, argroles=argroles, namespaces=namespaces) html = _edge2html_blocks(sedge) html = '<div style="background-color:#fcfcfc; padding:50px">{}'\ '</div>'.format(html) display(HTML(html))
def _edge2patterns(self, edge): force_subtypes = self._force_subtypes(edge) force_root, _ = self._force_root_expansion(edge) return list( hedge(pattern) for pattern in self._list2patterns(list(edge.normalized()), force_subtypes=force_subtypes, force_root=force_root, force_expansion=False))
def test_search(self): self.hg.destroy() self.hg.add('(is/Pd graphbrain/Cp great/C)') self.hg.add('(says/Pd mary/Cp)') self.hg.add('(says/Pd mary/Cp (is/Pd graphbrain/Cp great/C))') self.hg.add('(says/Pd mary/Cp (is/Pd graphbrain/Cp great/C) extra/C)') self.assertEqual(list(self.hg.search('(* graphbrain/Cp *)')), [hedge('(is/Pd graphbrain/Cp great/C)')]) self.assertEqual(list(self.hg.search('(is/Pd graphbrain/Cp *)')), [hedge('(is/Pd graphbrain/Cp great/C)')]) self.assertEqual(list(self.hg.search('(x * *)')), []) self.assertEqual( list(self.hg.search('(says/Pd * ' '(is/Pd graphbrain/Cp great/C))')), [hedge('(says/Pd mary/Cp (is/Pd graphbrain/Cp great/C))')]) self.assertEqual( list(self.hg.search('(says/Pd * (is/Pd * *))')), [hedge('(says/Pd mary/Cp (is/Pd graphbrain/Cp great/C))')])
def test_inc_attributes_does_not_exist(self): self.hg.destroy() edge = hedge('(is graphbrain/1 great/1)') self.hg.add(edge) self.assertEqual(self.hg.get_int_attribute(edge, 'foo'), None) self.hg.inc_attribute(edge, 'foo') self.assertEqual(self.hg.get_int_attribute(edge, 'foo'), 1) self.hg.inc_attribute(edge, 'foo') self.assertEqual(self.hg.get_int_attribute(edge, 'foo'), 2)
def test_batch_adds(self): self.hg.destroy() edges = [] for i in range(10): edges.append(hedge('(is/P {}/C number/C)'.format(i))) with hopen('test.db') as hg: for edge in edges: hg.add(edge) for edge in edges: self.assertTrue(self.hg.exists(edge))
def test_counter1(self): pc = PatternCounter() pc.count(hedge('((not/M is/P.sc) mary/C (not/M nice/C))')) self.assertTrue(pc.patterns[hedge('(*/P.sc */C */C)')] == 1) self.assertTrue(pc.patterns[hedge('((*/M */P.sc) */C */C)')] == 1) self.assertTrue(pc.patterns[hedge('(*/P.sc */C (*/M */C))')] == 1) self.assertTrue( pc.patterns[hedge('((*/M */P.sc) */C (*/M */C))')] == 1) self.assertTrue(pc.patterns[hedge('(*/M */P.sc)')] == 1) self.assertFalse(pc.patterns[hedge('(*/M is/P.sc)')] == 1) self.assertTrue(pc.patterns[hedge('(*/M */C)')] == 1)
def show(edge, style='indented'): """Displays a representation of the edge in the notebook. Keyword arguments: style -- render style ('indented', 'line') (default: 'indented') """ edge = hedge(edge) html = _edge2html_show(edge, style=style)[0] display(HTML(html))
def test_star_limit(self): self.hg.destroy() self.hg.add('(is graphbrain/1 great/1)') self.hg.add('(is graphbrain/1 great/2)') self.hg.add('(is graphbrain/1 great/3)') center = hedge('graphbrain/1') self.assertEqual(len(list(self.hg.star(center))), 3) self.assertEqual(len(list(self.hg.star(center, limit=1))), 1) self.assertEqual(len(list(self.hg.star(center, limit=2))), 2) self.assertEqual(len(list(self.hg.star(center, limit=10))), 3)
def coref_set(hg, edge, corefs=None): """Returns the set of coreferences that the given edge belongs to.""" if corefs is None: corefs = {edge} for coref_edge in hg.edges_with_edges((hedge(coref_pred), edge)): if len(coref_edge) == 3 and coref_edge[0].to_str() == coref_pred: for item in coref_edge[1:]: if item not in corefs: corefs.add(item) coref_set(hg, item, corefs) return corefs
def test_deep_degrees(self): self.hg.destroy() edge1 = hedge('((is/M going/P) mary/C (to (the/M gym/C)))') self.hg.add(edge1) mary = hedge('mary/C') gym = hedge('gym/C') is_going = hedge('(is/M going/P)') self.assertEqual(self.hg.deep_degree(edge1), 0) self.assertEqual(self.hg.degree(mary), 1) self.assertEqual(self.hg.deep_degree(mary), 1) self.assertEqual(self.hg.degree(gym), 0) self.assertEqual(self.hg.deep_degree(gym), 1) self.assertEqual(self.hg.degree(is_going), 1) self.assertEqual(self.hg.deep_degree(is_going), 1) self.assertEqual(self.hg.deep_degree(gym), 1) edge2 = hedge('((is/M going/P) john/C (to (the/M gym/C)))') self.hg.add(edge2) self.assertEqual(self.hg.degree(gym), 0) self.assertEqual(self.hg.deep_degree(gym), 2) self.assertEqual(self.hg.degree(is_going), 2) self.assertEqual(self.hg.deep_degree(is_going), 2)
def test_add_with_attributes2(self): self.hg.destroy() edge = hedge('(is graphbrain/1 great/1)') self.hg.add_with_attributes(edge, { 'p': 0, 'd': 10, 'dd': 20, 'foo': 777, 'bar': -.77, 'xx': 'def' }) self.assertFalse(self.hg.is_primary(edge))
def test_search_star(self): self.hg.destroy() self.hg.add('(is/Pd graphbrain/Cp great/C)') self.hg.add('(says/Pd mary/Cp)') self.hg.add('(says/Pd mary/Cp (is/Pd graphbrain/Cp great/C))') self.hg.add('(says/Pd mary/Cp (is/Pd graphbrain/Cp great/C) extra/C)') self.assertEqual( set(self.hg.search('*')), { hedge('(is/Pd graphbrain/Cp great/C)'), hedge('(says/Pd mary/Cp)'), hedge('(says/Pd mary/Cp ' '(is/Pd graphbrain/Cp great/C))'), hedge('(says/Pd mary/Cp ' '(is/Pd graphbrain/Cp great/C) extra/C)'), hedge('extra/C'), hedge('graphbrain/Cp'), hedge('great/C'), hedge('is/Pd'), hedge('mary/Cp'), hedge('says/Pd') })
def test_counter6(self): pc = PatternCounter(count_subedges=False, match_subtypes={'*/M'}) pc.count(hedge('((not/Mn is/Pd.sc) mary/Cp.s (very/M nice/Cc.s))')) self.assertTrue(pc.patterns[hedge('(*/P.sc */C */C)')] == 1) self.assertTrue(pc.patterns[hedge('((*/Mn */P.sc) */C */C)')] == 1) self.assertTrue(pc.patterns[hedge('(*/P.sc */C (*/M */C))')] == 1) self.assertTrue( pc.patterns[hedge('((*/Mn */P.sc) */C (*/M */C))')] == 1) self.assertFalse(pc.patterns[hedge('(*/Mn */P.sc)')] == 1) self.assertFalse(pc.patterns[hedge('(*/M */C)')] == 1)
def test_add_with_attributes_search(self): self.hg.destroy() edge = hedge('(is graphbrain/1 great/1)') self.hg.add_with_attributes(edge, { 'p': 1, 'd': 10, 'dd': 20, 'foo': 777, 'bar': -.77, 'xx': 'def' }) results = set(self.hg.search('(is * *)')) self.assertEqual(results, set([edge]))
def are_corefs(hg, edge1, edge2, corefs=None): """Checks if the two given edges are coreferences.""" if corefs is None: corefs = {edge1} for coref_edge in hg.edges_with_edges((hedge(coref_pred), edge1)): if len(coref_edge) == 3 and coref_edge[0].to_str() == coref_pred: for item in coref_edge[1:]: if item not in corefs: if item == edge2: return True corefs.add(item) if are_corefs(hg, item, edge2, corefs): return True return False
def test_primary_2(self): self.hg.destroy() edge1 = hedge('((is/M going/P) mary/C (to (the/M gym/C)))') self.hg.add(edge1, primary=False) self.assertFalse(self.hg.is_primary(edge1)) self.hg.set_primary(edge1, True) self.assertTrue(self.hg.is_primary(edge1)) self.assertFalse(self.hg.is_primary(hedge('(is/M going/P)'))) self.hg.set_primary(hedge('(is/M going/P)'), True) self.assertTrue(self.hg.is_primary(hedge('(is/M going/P)'))) self.assertFalse(self.hg.is_primary(hedge('mary/C'))) self.hg.set_primary(hedge('mary/C'), True) self.assertTrue(self.hg.is_primary(hedge('mary/C')))