def get_context_data(self, **kwargs): from syntaxrules.syntaxtree import SyntaxTree from syntaxrules.soh import SOHServer ctx = super(ArticleRuleDetailsView, self).get_context_data(**kwargs) saf = amcatxtas.get_result(int(self.kwargs['article_id']), self.object.preprocessing) sid = int(self.request.GET.get("sid", 1)) sentences = list(self.get_sentences(saf)) soh = SOHServer(url="http://localhost:3030/x") t = SyntaxTree(soh) t.load_saf(saf, sid) g = t.get_graphviz() original_tree = base64.b64encode(g.draw(format='png', prog='dot')) ruleset = self.object.get_ruleset() t.apply_ruleset(ruleset) g = t.get_graphviz(grey_rel=True) processed_tree = base64.b64encode(g.draw(format='png', prog='dot')) ruleset_dump = json.dumps(ruleset, indent=2) saf_dump = json.dumps(saf, indent=2) ctx.update(locals()) return ctx
def test_graph(): _check_jena() t = SyntaxTree(TEST_SAF, sentence_id=1) g = t.get_graphviz() assert_equal( set(g.edges()), {(u"t_3_Mary", u"t_2_marry"), (u"t_1_John", u"t_2_marry"), (u"t_4_little", u"t_1_John")} ) assert_in("lemma: John", g.get_node("t_1_John").attr["label"]) # Can we 'gray out' syntax relations after applying rules t.apply_ruleset(TEST_RULES) t.apply_lexicon([{"lexclass": "man", "lemma": "john"}]) g = t.get_graphviz() # triple_args_function=VIS_GREY_REL) assert_equal( set(g.edges()), { (u"t_3_Mary", u"t_2_marry"), (u"t_1_John", u"t_2_marry"), (u"t_4_little", u"t_1_John"), (u"t_1_John", u"t_3_Mary"), }, ) # assert_false(g.get_edge(u't_1_John', u't_3_Mary').attr.get("color")) # assert_equal(g.get_edge(u't_1_John', u't_2_marry').attr.get("color"), # "grey") # can we draw it? can't check output, but we can check for errors with tempfile.NamedTemporaryFile() as f: g.draw(f.name, prog="dot") g.draw("/tmp/test.png", prog="dot")
def test_rules(): _check_jena() t = SyntaxTree(TEST_SAF, sentence_id=1) t.apply_ruleset(TEST_RULES) triples = [tr for tr in t.get_triples() if tr.predicate == "marry"] assert_equal(len(triples), 1) assert_equal(triples[0].subject.lemma, "John") assert_equal(triples[0].object.lemma, "Mary")
def get_transformed(self, aid, saf, rules): print(saf) ruleset = RuleSet.objects.get(label=rules) from syntaxrules.soh import SOHServer from syntaxrules.syntaxtree import SyntaxTree soh = SOHServer("http://localhost:3030/x") t = SyntaxTree(soh) for sid in {token['sentence'] for token in saf['tokens']}: t.load_saf(saf, sid) t.apply_ruleset(ruleset.get_ruleset()) yield sid
def test_get_triples(): _check_jena() t = SyntaxTree(TEST_SAF, sentence_id=1) t.apply_ruleset(TEST_RULES) rels = list(t.get_relations()) marry = dict(predicate="marry", object=3, object_nodes=[3], subject=1, subject_nodes=[1, 4]) assert_equal(rels, [marry]) # add new relation, see if percolation is 'blocked' by relation t.apply_rule({"condition": "?x :rel_dobj ?y", "insert": "?y :test ?x"}) rels = sorted(t.get_relations(), key=lambda rel: rel["predicate"]) assert_equal(rels[0], marry) assert_equal(rels[1], dict(predicate="test", object=3, object_nodes=[3], subject=2, subject_nodes=[2]))
def test_rules_multiple(): """Can we apply rules to multiple sentences at once""" _check_jena() triples_single = set() for sid in {t["sentence"] for t in TEST_SAF["tokens"]}: t = SyntaxTree(TEST_SAF, sentence_id=sid) t.apply_ruleset(TEST_RULES) triples_single |= set((s.id, p, o.id) for (s, p, o) in t.get_triples()) t = SyntaxTree(TEST_SAF) t.apply_ruleset(TEST_RULES) triples_all = set((s.id, p, o.id) for (s, p, o) in t.get_triples()) assert_equal(triples_single, triples_all)
def get_context_data(self, **kwargs): from syntaxrules.syntaxtree import SyntaxTree ctx = super(ArticleRuleDetailsView, self).get_context_data(**kwargs) saf = amcatxtas.get_result(int(self.kwargs['article_id']), self.object.preprocessing) sid = int(self.request.GET.get("sid", 1)) sentences = list(self.get_sentences(saf)) t = SyntaxTree(saf, sid) g = t.get_graphviz() original_tree = base64.b64encode(g.draw(format='png', prog='dot')) if 'preprocess' in self.request.GET: prep = RuleSet.objects.get(pk=int(self.request.GET['preprocess'])) t.apply_ruleset(prep.get_ruleset()) g = t.get_graphviz(grey_rel=True) preprocessed_tree = base64.b64encode(g.draw(format='png', prog='dot')) trees = [] # [(name, tree), ] for intermediate trees ruleset = self.object.get_ruleset() updates = [t._get_lexicon_update(ruleset['lexicon'])] for rule in ruleset['rules']: updates.append(rule) if rule.get('display'): t.apply_updates(updates) updates = [] g = t.get_graphviz(grey_rel=True) png = base64.b64encode(g.draw(format='png', prog='dot')) trees.append(('After '+rule['label'], png)) t.apply_updates(updates) g = t.get_graphviz(grey_rel=True) processed_tree = base64.b64encode(g.draw(format='png', prog='dot')) ruleset_dump = json.dumps(ruleset, indent=2) saf_dump = json.dumps(saf, indent=2) ctx.update(locals()) return ctx
def get_context_data(self, **kwargs): from syntaxrules.syntaxtree import SyntaxTree ctx = super(ArticleRuleDetailsView, self).get_context_data(**kwargs) saf = amcatxtas.get_result(int(self.kwargs['article_id']), self.object.preprocessing) sid = int(self.request.GET.get("sid", 1)) sentences = list(self.get_sentences(saf)) t = SyntaxTree(saf, sid) g = t.get_graphviz() original_tree = base64.b64encode(g.draw(format='png', prog='dot')) if 'preprocess' in self.request.GET: prep = RuleSet.objects.get(pk=int(self.request.GET['preprocess'])) t.apply_ruleset(prep.get_ruleset()) g = t.get_graphviz(grey_rel=True) preprocessed_tree = base64.b64encode( g.draw(format='png', prog='dot')) trees = [] # [(name, tree), ] for intermediate trees ruleset = self.object.get_ruleset() updates = [t._get_lexicon_update(ruleset['lexicon'])] for rule in ruleset['rules']: updates.append(rule) if rule.get('display'): t.apply_updates(updates) updates = [] g = t.get_graphviz(grey_rel=True) png = base64.b64encode(g.draw(format='png', prog='dot')) trees.append(('After ' + rule['label'], png)) t.apply_updates(updates) g = t.get_graphviz(grey_rel=True) processed_tree = base64.b64encode(g.draw(format='png', prog='dot')) ruleset_dump = json.dumps(ruleset, indent=2) saf_dump = json.dumps(saf, indent=2) ctx.update(locals()) return ctx
def get_sources(saf, ruleset_name): r = _get_ruleset(ruleset_name) tree = SyntaxTree(saf) tree.apply_ruleset(r) return [struct for struct in tree.get_structs() if 'quote' in struct.keys()]