Beispiel #1
0
def test_load():
    _check_jena()
    t = SyntaxTree(TEST_SAF, sentence_id=2)
    triples = list(t.get_triples())
    assert_equal(len(triples), 1)
    assert_equal(triples[0].predicate, "rel_nsubj")
    assert_equal(triples[0].subject.lemma, "it")
    assert_equal(triples[0].object.lemma, "rain")
Beispiel #2
0
def test_rules():
    _check_jena()
    t = SyntaxTree(TEST_SAF, sentence_id=1)
    t.apply_ruleset(TEST_RULES)
    triples = [tr for tr in t.get_triples() if tr.predicate == "marry"]

    assert_equal(len(triples), 1)
    assert_equal(triples[0].subject.lemma, "John")
    assert_equal(triples[0].object.lemma, "Mary")
Beispiel #3
0
def test_lexicon():
    _check_jena()
    t = SyntaxTree(TEST_SAF, sentence_id=1)
    t.apply_lexicon(TEST_RULES["lexicon"])

    classes = {k.replace(NS_AMCAT, ":"): v.get("lexclass") for (k, v) in t.get_tokens().iteritems()}
    assert_equal(
        classes, {":t_2_marry": ["marry"], ":t_3_Mary": ["person"], ":t_1_John": ["person"], ":t_4_little": None}
    )
Beispiel #4
0
 def get_transformed(self, aid, saf, rules):
     print(saf)
     ruleset = RuleSet.objects.get(label=rules)
     from syntaxrules.soh import SOHServer
     from syntaxrules.syntaxtree import SyntaxTree
     soh = SOHServer("http://localhost:3030/x")
     t = SyntaxTree(soh)
     for sid in {token['sentence'] for token in saf['tokens']}:
         t.load_saf(saf, sid)
         t.apply_ruleset(ruleset.get_ruleset())
         yield sid
    def get_context_data(self, **kwargs):
        from syntaxrules.syntaxtree import SyntaxTree
        from syntaxrules.soh import SOHServer

        ctx = super(ArticleRuleDetailsView, self).get_context_data(**kwargs)
        saf = amcatxtas.get_result(int(self.kwargs['article_id']),
                                   self.object.preprocessing)
        sid = int(self.request.GET.get("sid", 1))
        sentences = list(self.get_sentences(saf))

        soh = SOHServer(url="http://localhost:3030/x")
        t = SyntaxTree(soh)
        t.load_saf(saf, sid)
        g = t.get_graphviz()
        original_tree = base64.b64encode(g.draw(format='png', prog='dot'))

        ruleset = self.object.get_ruleset()
        t.apply_ruleset(ruleset)
        g = t.get_graphviz(grey_rel=True)
        processed_tree = base64.b64encode(g.draw(format='png', prog='dot'))

        ruleset_dump = json.dumps(ruleset, indent=2)
        saf_dump = json.dumps(saf, indent=2)
        ctx.update(locals())
        return ctx
Beispiel #6
0
def test_graph():
    _check_jena()
    t = SyntaxTree(TEST_SAF, sentence_id=1)
    g = t.get_graphviz()
    assert_equal(
        set(g.edges()), {(u"t_3_Mary", u"t_2_marry"), (u"t_1_John", u"t_2_marry"), (u"t_4_little", u"t_1_John")}
    )
    assert_in("lemma: John", g.get_node("t_1_John").attr["label"])

    # Can we 'gray out' syntax relations after applying rules
    t.apply_ruleset(TEST_RULES)
    t.apply_lexicon([{"lexclass": "man", "lemma": "john"}])
    g = t.get_graphviz()  # triple_args_function=VIS_GREY_REL)
    assert_equal(
        set(g.edges()),
        {
            (u"t_3_Mary", u"t_2_marry"),
            (u"t_1_John", u"t_2_marry"),
            (u"t_4_little", u"t_1_John"),
            (u"t_1_John", u"t_3_Mary"),
        },
    )
    # assert_false(g.get_edge(u't_1_John', u't_3_Mary').attr.get("color"))
    # assert_equal(g.get_edge(u't_1_John', u't_2_marry').attr.get("color"),
    # "grey")

    # can we draw it? can't check output, but we can check for errors
    with tempfile.NamedTemporaryFile() as f:
        g.draw(f.name, prog="dot")
        g.draw("/tmp/test.png", prog="dot")
Beispiel #7
0
def test_rules_multiple():
    """Can we apply rules to multiple sentences at once"""
    _check_jena()
    triples_single = set()
    for sid in {t["sentence"] for t in TEST_SAF["tokens"]}:
        t = SyntaxTree(TEST_SAF, sentence_id=sid)
        t.apply_ruleset(TEST_RULES)
        triples_single |= set((s.id, p, o.id) for (s, p, o) in t.get_triples())

    t = SyntaxTree(TEST_SAF)
    t.apply_ruleset(TEST_RULES)
    triples_all = set((s.id, p, o.id) for (s, p, o) in t.get_triples())

    assert_equal(triples_single, triples_all)
Beispiel #8
0
def test_get_triples():
    _check_jena()
    t = SyntaxTree(TEST_SAF, sentence_id=1)
    t.apply_ruleset(TEST_RULES)
    rels = list(t.get_relations())
    marry = dict(predicate="marry", object=3, object_nodes=[3], subject=1, subject_nodes=[1, 4])
    assert_equal(rels, [marry])

    # add new relation, see if percolation is 'blocked' by relation
    t.apply_rule({"condition": "?x :rel_dobj ?y", "insert": "?y :test ?x"})
    rels = sorted(t.get_relations(), key=lambda rel: rel["predicate"])
    assert_equal(rels[0], marry)
    assert_equal(rels[1], dict(predicate="test", object=3, object_nodes=[3], subject=2, subject_nodes=[2]))
Beispiel #9
0
 def get_transformed(self, aid, saf, rules):
     print(saf)
     ruleset = RuleSet.objects.get(label=rules)
     from syntaxrules.soh import SOHServer
     from syntaxrules.syntaxtree import SyntaxTree
     soh = SOHServer("http://localhost:3030/x")
     t = SyntaxTree(soh)
     for sid in {token['sentence'] for token in saf['tokens']}:
         t.load_saf(saf, sid)
         t.apply_ruleset(ruleset.get_ruleset())
         yield sid
Beispiel #10
0
    def get_context_data(self, **kwargs):
        from syntaxrules.syntaxtree import SyntaxTree

        ctx = super(ArticleRuleDetailsView, self).get_context_data(**kwargs)
        saf = amcatxtas.get_result(int(self.kwargs['article_id']),
                                   self.object.preprocessing)
        sid = int(self.request.GET.get("sid", 1))
        sentences = list(self.get_sentences(saf))

        t = SyntaxTree(saf, sid)
        g = t.get_graphviz()
        original_tree = base64.b64encode(g.draw(format='png', prog='dot'))

        if 'preprocess' in self.request.GET:
            prep = RuleSet.objects.get(pk=int(self.request.GET['preprocess']))
            t.apply_ruleset(prep.get_ruleset())
            g = t.get_graphviz(grey_rel=True)
            preprocessed_tree = base64.b64encode(
                g.draw(format='png', prog='dot'))

        trees = []  # [(name, tree), ] for intermediate trees
        ruleset = self.object.get_ruleset()

        updates = [t._get_lexicon_update(ruleset['lexicon'])]
        for rule in ruleset['rules']:
            updates.append(rule)
            if rule.get('display'):
                t.apply_updates(updates)
                updates = []
                g = t.get_graphviz(grey_rel=True)
                png = base64.b64encode(g.draw(format='png', prog='dot'))
                trees.append(('After ' + rule['label'], png))

        t.apply_updates(updates)
        g = t.get_graphviz(grey_rel=True)
        processed_tree = base64.b64encode(g.draw(format='png', prog='dot'))

        ruleset_dump = json.dumps(ruleset, indent=2)
        saf_dump = json.dumps(saf, indent=2)
        ctx.update(locals())
        return ctx
Beispiel #11
0
def get_sources(saf, ruleset_name):
    r = _get_ruleset(ruleset_name)
    tree = SyntaxTree(saf)
    tree.apply_ruleset(r)
    return [struct for struct in tree.get_structs() if 'quote' in struct.keys()]
Beispiel #12
0
def _get_tree(sid=1):
    _check_soh()
    soh = SOHServer(url="http://localhost:3030/x")
    t = SyntaxTree(soh)
    t.load_saf(TEST_SAF, sid)
    return t
Beispiel #13
0
    def get_context_data(self, **kwargs):
        from syntaxrules.syntaxtree import SyntaxTree


        ctx = super(ArticleRuleDetailsView, self).get_context_data(**kwargs)
        saf = amcatxtas.get_result(int(self.kwargs['article_id']),
                                   self.object.preprocessing)
        sid = int(self.request.GET.get("sid", 1))
        sentences = list(self.get_sentences(saf))


        t = SyntaxTree(saf, sid)
        g = t.get_graphviz()
        original_tree = base64.b64encode(g.draw(format='png', prog='dot'))

        if 'preprocess' in self.request.GET:
            prep = RuleSet.objects.get(pk=int(self.request.GET['preprocess']))
            t.apply_ruleset(prep.get_ruleset())
            g = t.get_graphviz(grey_rel=True)
            preprocessed_tree = base64.b64encode(g.draw(format='png', prog='dot'))


        trees = []  # [(name, tree), ] for intermediate trees
        ruleset = self.object.get_ruleset()

        updates = [t._get_lexicon_update(ruleset['lexicon'])]
        for rule in ruleset['rules']:
            updates.append(rule)
            if rule.get('display'):
                t.apply_updates(updates)
                updates = []
                g = t.get_graphviz(grey_rel=True)
                png = base64.b64encode(g.draw(format='png', prog='dot'))
                trees.append(('After '+rule['label'], png))

        t.apply_updates(updates)
        g = t.get_graphviz(grey_rel=True)
        processed_tree = base64.b64encode(g.draw(format='png', prog='dot'))

        ruleset_dump = json.dumps(ruleset, indent=2)
        saf_dump = json.dumps(saf, indent=2)
        ctx.update(locals())
        return ctx