def test_load(self): from amcat.models import Token, Triple, Pos, Relation s = amcattest.create_test_analysis_sentence() w1, w2, w3 = [amcattest.create_test_word(word=x) for x in "abc"] pos = Pos.objects.create(major="x", minor="y", pos="p") t1 = Token.objects.create(sentence=s, position=1, word=w1, pos=pos) t2 = Token.objects.create(sentence=s, position=2, word=w2, pos=pos) rel = Relation.objects.create(label="su") Triple.objects.create(parent=t1, child=t2, relation=rel) tt = get_test_transformer() tt.load_sentence(s.id) triples = list(tt.get_triples()) self.assertEqual(len(triples), 1) s, p, o = triples[0] self.assertEqual(p, "rel_su") self.assertEqual(s.label, "b") tt.update("?a :rel_su []", "?a :bla 'piet'") triples = list(tt.get_triples()) self.assertEqual(len(triples), 1) s, p, o = triples[0] self.assertEqual(s.bla, "piet")
def profile_store_triples(): transaction.enter_transaction_management() transaction.managed(True) try: aa = amcattest.create_test_analysis_article() log.info("Created test article %i" % aa.id) tokens = [] for s in range(5): log.info("Creating test sentence %i" % s) s = amcattest.create_test_analysis_sentence(aa) log.info("Created test sentence %i" % s.id) tokens += [ amcattest.create_tokenvalue(analysis_sentence=s.id, word=w, lemma=w) for w in "123456789" * 3 ] log.info("Storing %i tokens" % len(tokens)) with djangotoolkit.list_queries() as queries: aa.store_analysis(tokens=tokens) djangotoolkit.query_list_to_table(queries, maxqlen=150, output=print, encoding="utf-8") finally: transaction.rollback() transaction.leave_transaction_management()
def test_nqueries(self): from amcat.models import Triple, Relation s = amcattest.create_test_analysis_sentence() jan, moest, piet, slaan, volgens, kees, omte, marie, helpen = [ amcattest.create_test_token(sentence=s, position=i) for i in range(1,10)] with self.checkMaxQueries(1): s._get_tokens(get_words=True) for child, parent, rel in [(jan, moest, "su"), (jan, slaan, "su"), (moest, slaan, "vc"), (piet, slaan, "obj1"), (volgens, slaan, "mod"), (kees, volgens, "obj1"), (omte, slaan, "om"), (helpen, omte, "body"), (marie, helpen, "obj1"), ]: rel = Relation.objects.create(label=rel) Triple.objects.create(parent=parent, child=child, relation=rel) roles = ((jan.position, "su", slaan.position), (piet.position, "obj", slaan.position), (kees.position, "quote", moest.position)) from amcat.tools.djangotoolkit import list_queries with self.checkMaxQueries(1): statements = set(get_statements(s, roles)) with self.checkMaxQueries(0): s = str(statements)
def test_create_triples(self): from amcat.models.token import TripleValues, TokenValues s = amcattest.create_test_analysis_sentence() tokens = [ TokenValues(s.id, 0, word="a", lemma="l", pos="p", major="major", minor="minor", namedentity=None), TokenValues(s.id, 1, word="b", lemma="l", pos="p", major="major", minor="minor", namedentity=None) ] t = TripleValues(s.id, 0, 1, "su") result_tokens, result_triples, corefsets = store_analysis( s.analysed_article, tokens, [t]) tr, = Triple.objects.filter(parent__sentence=s) self.assertEqual(tr.relation.label, t.relation) self.assertEqual(tr.child.word.word, "a") for tokenvalue, token in result_tokens.items(): self.assertEqual(tokenvalue.position, token.position) self.assertEqual(tokenvalue.lemma, token.word.lemma.lemma)
def test_long_strings(self): """Test whether overly long lemmata, words, and pos are truncated""" from amcat.models.token import TokenValues, TripleValues s = amcattest.create_test_analysis_sentence() longpos = TokenValues(s.id, 0, word="a", lemma="l", pos="pp", major="m", minor="m", namedentity=None) self.assertRaises(Exception, list, create_tokens([longpos])) nonepos = TokenValues(s.id, 0, word="a", lemma="l", pos="p", major="m", minor="m", namedentity=None) longvals = TokenValues(s.id, 1, word="a"*9999, lemma="l"*9999, pos="p", major="m"*9999, minor="m"*9999, namedentity=None) triple = TripleValues(s.id, 0, 1, "x"*9999) create_triples([nonepos, longvals], [triple]) # django validation for length t, = Triple.objects.filter(parent__sentence=s) t.full_clean() t.relation.full_clean() for token in (t.parent, t.child): token.full_clean() token.word.full_clean() token.word.lemma.full_clean() token.pos.full_clean()
def todo_test_load(self): # TODO: do something useful when fuseki is not installed! from amcat.models import Token, Triple, Pos, Relation s = amcattest.create_test_analysis_sentence() w1, w2, w3 = [amcattest.create_test_word(word=x) for x in "abc"] pos = Pos.objects.create(major="x", minor="y", pos="p") t1 = Token.objects.create(sentence=s, position=1, word=w1, pos=pos) t2 = Token.objects.create(sentence=s, position=2, word=w2, pos=pos) rel = Relation.objects.create(label="su") Triple.objects.create(parent=t1, child=t2, relation=rel) tt = get_test_transformer() tt.load_sentence(s.id) triples = list(tt.get_triples()) self.assertEqual(len(triples), 1) s,p,o = triples[0] self.assertEqual(p, "rel_su") self.assertEqual(s.label, "b") tt.update("?a :rel_su []", "?a :bla 'piet'") triples = list(tt.get_triples()) self.assertEqual(len(triples), 1) s,p,o = triples[0] self.assertEqual(s.bla, "piet")
def test_nqueries(self): from amcat.models import Triple, Relation s = amcattest.create_test_analysis_sentence() jan, moest, piet, slaan, volgens, kees, omte, marie, helpen = [ amcattest.create_test_token(sentence=s, position=i) for i in range(1,10)] with self.checkMaxQueries(1): s._get_tokens(get_words=True) for child, parent, rel in [(jan, moest, "su"), (jan, slaan, "su"), (moest, slaan, "vc"), (piet, slaan, "obj1"), (volgens, slaan, "mod"), (kees, volgens, "obj1"), (omte, slaan, "om"), (helpen, omte, "body"), (marie, helpen, "obj1"), ]: rel = Relation.objects.create(label=rel) Triple.objects.create(parent=parent, child=child, relation=rel) roles = ((jan.position, "su", slaan.position), (piet.position, "obj", slaan.position), (kees.position, "quote", moest.position)) from amcat.tools.djangotoolkit import list_queries with self.checkMaxQueries(3): statements = set(get_statements(s, roles)) with self.checkMaxQueries(0): s = str(statements)
def test_long_strings(self): """Test whether overly long lemmata, words, and pos are truncated""" from amcat.models.token import TokenValues, TripleValues s = amcattest.create_test_analysis_sentence() longpos = TokenValues(s.id, 0, word="a", lemma="l", pos="pp", major="m", minor="m", namedentity=None) self.assertRaises(Exception, list, create_tokens([longpos])) nonepos = TokenValues(s.id, 0, word="a", lemma="l", pos="p", major="m", minor="m", namedentity=None) longvals = TokenValues(s.id, 1, word="a"*9999, lemma="l"*9999, pos="p", major="m"*9999, minor="m"*9999, namedentity=None) triple = TripleValues(s.id, 0, 1, "x"*9999) store_analysis(s.analysed_article, [nonepos, longvals], [triple]) # django validation for length t, = Triple.objects.filter(parent__sentence=s) t.full_clean() t.relation.full_clean() for token in (t.parent, t.child): token.full_clean() token.word.full_clean() token.word.lemma.full_clean() token.pos.full_clean()
def test_fill_out_and_predicate(self): from amcat.models import Triple, Relation s = amcattest.create_test_analysis_sentence() de, liberale, premier, moest, piet, een, klap, geven = [ amcattest.create_test_token(sentence=s, position=i) for i in range(1, 9) ] for child, parent, rel in [(premier, geven, "su"), (premier, moest, "su"), (geven, moest, "vc"), (de, premier, "det"), (liberale, premier, "mod"), (piet, geven, "obj2"), (klap, geven, "obj1"), (een, klap, "det")]: rel = Relation.objects.create(label=rel) Triple.objects.create(parent=parent, child=child, relation=rel) roles = ((premier.position, "su", geven.position), (piet.position, "obj", geven.position)) self.assertEqual(fill_out(s, [premier], roles), {de, liberale, premier}) predicate = fill_out(s, [moest, geven], roles) self.assertEqual(predicate, {moest, een, klap, geven}) predicate_paths = list(get_predicate_structure(s, predicate)) self.assertEqual(predicate_paths, [[een, klap, geven, moest]])
def test_store_stanford(self): # get tokenvalues etc from stanford test case aa = amcattest.create_test_analysed_article() as1, as2 = [ amcattest.create_test_analysis_sentence(analysed_article=aa) for _i in range(2) ] from amcat.nlp import stanford tokens, triples, corefsets = stanford.interpret_xml( [as1.id, as2.id], stanford.TestStanford._get_test_xml()) store_analysis(aa, tokens, triples, corefsets) self.assertEqual({str(t.word.lemma) for t in as1.tokens.all()}, {"Mary", "meet", "John"}) self.assertEqual( {(str(t.parent.word), str(t.child.word), str(t.relation)) for t in as2.triples}, {('likes', 'She', 'nsubj'), ('likes', 'him', 'dobj')}) self.assertEqual( { frozenset(str(t.word.lemma) for t in c.tokens.all()) for c in aa.coreferencesets.all() }, {frozenset(["Mary", "she"]), frozenset(["John", "he"])})
def test_get_tokens_order(self): s = amcattest.create_test_analysis_sentence() t1, t2, t3 = [ amcattest.create_test_token(sentence=s, position=i) for i in [2, 1, 3] ] self.assertEqual(list(s.tokens.all()), [t2, t1, t3])
def test_store_tokens(self): s = amcattest.create_test_analysis_sentence() t1 = amcattest.create_tokenvalue(analysis_sentence=s) s.analysis_article.store_analysis(tokens=[t1]) aa = AnalysisArticle.objects.get(pk=s.analysis_article.id) self.assertEqual(aa.done, True) token, = list(Token.objects.filter(sentence__analysis_article=aa)) self.assertEqual(token.word.word, t1.word) self.assertRaises(aa.store_analysis, tokens=[t1])
def _get_test_tokens(self, aa, words): s = amcattest.create_test_analysis_sentence(analysed_article=aa) if not words: words = "abcde" return [ amcattest.create_test_token( sentence=s, position=i, word=amcattest.create_test_word(word=w)) for (i, w) in enumerate(words) ]
def todo_test_process_sentence(self): s = amcattest.create_test_analysis_sentence( sentence=amcattest.create_test_sentence(sentence="de groenste huizen") ) f = Frog(None) tokens, triples = f.process_sentence(s) tokens = list(tokens) lemmata = [token.lemma for token in tokens] self.assertEqual(lemmata, ["de", "groen", "huis"]) poscats = [token.pos for token in tokens] self.assertEqual(poscats, ["D", "A", "N"])
def todo_test_process_sentence(self): s = amcattest.create_test_analysis_sentence( sentence=amcattest.create_test_sentence( sentence="de groenste huizen")) f = Frog(None) tokens, triples = f.process_sentence(s) tokens = list(tokens) lemmata = [token.lemma for token in tokens] self.assertEqual(lemmata, ["de", "groen", "huis"]) poscats = [token.pos for token in tokens] self.assertEqual(poscats, ["D", "A", "N"])
def todo_test_triples(self): s = amcattest.create_test_analysis_sentence( sentence=amcattest.create_test_sentence( sentence="hij gaf hem een boek")) f = FrogTriples(None) triples = set(f.get_triples(s)) self.assertEqual( triples, { TripleValues(s, 0, 1, 'su'), TripleValues(s, 2, 1, 'obj2'), TripleValues(s, 3, 4, 'det'), TripleValues(s, 4, 1, 'obj1'), })
def todo_test_statements(self): # jan moest piet slaan, volgens kees, om marie te helpen from amcat.models import Triple, Relation s = amcattest.create_test_analysis_sentence() jan, moest, piet, slaan, volgens, kees, omte, marie, helpen = [ amcattest.create_test_token(sentence=s, position=i) for i in range(1, 10) ] for child, parent, rel in [ (jan, moest, "su"), (jan, slaan, "su"), (moest, slaan, "vc"), (piet, slaan, "obj1"), (volgens, slaan, "mod"), (kees, volgens, "obj1"), (omte, slaan, "om"), (helpen, omte, "body"), (marie, helpen, "obj1"), ]: rel = Relation.objects.create(label=rel) Triple.objects.create(parent=parent, child=child, relation=rel) roles = ((jan.position, "su", slaan.position), (piet.position, "obj", slaan.position), (kees.position, "quote", moest.position)) direct = {Statement(s, {jan}, {moest, slaan}, {piet}, source={kees})} statements = set(get_statements(s, roles)) self.assertEqual(statements, direct) # om marie te helpen roles += ((marie.position, "obj", helpen.position), ) statements = set(get_statements(s, roles)) self.assertEqual(statements, direct) roles += ((moest.position, "om", helpen.position), ) om = { Statement(s, {jan}, {helpen}, {marie}, type={"Affective"}, source={kees}), Statement(s, {piet}, {helpen}, {marie}, source={jan, kees}, condition={moest, slaan}, type={"Causal"}), } statements = set(get_statements(s, roles)) self.assertEqual(statements, direct | om)
def test_create_triples(self): from amcat.models.token import TripleValues, TokenValues s = amcattest.create_test_analysis_sentence() tokens = [TokenValues(s.id, 0, word="a", lemma="l", pos="p", major="major", minor="minor", namedentity=None), TokenValues(s.id, 1, word="b", lemma="l", pos="p", major="major", minor="minor", namedentity=None)] t = TripleValues(s.id, 0, 1, "su") result_tokens, result_triples, corefsets = store_analysis(s.analysed_article, tokens, [t]) tr, = Triple.objects.filter(parent__sentence=s) self.assertEqual(tr.relation.label, t.relation) self.assertEqual(tr.child.word.word, "a") for tokenvalue, token in result_tokens.items(): self.assertEqual(tokenvalue.position, token.position) self.assertEqual(tokenvalue.lemma, token.word.lemma.lemma)
def todo_test_triples(self): s = amcattest.create_test_analysis_sentence( sentence=amcattest.create_test_sentence(sentence="hij gaf hem een boek") ) f = FrogTriples(None) triples = set(f.get_triples(s)) self.assertEqual( triples, { TripleValues(s, 0, 1, "su"), TripleValues(s, 2, 1, "obj2"), TripleValues(s, 3, 4, "det"), TripleValues(s, 4, 1, "obj1"), }, )
def test_create_tokens(self): from amcat.models.token import TokenValues s = amcattest.create_test_analysis_sentence() tokens = [ TokenValues(s.id, 2, word="w", lemma="l", pos="p", major="major", minor="minor", namedentity=None) ] token, = dict(create_tokens(tokens)).values() self.assertEqual(token.word.lemma.lemma, "l")
def test_predicates(self): from amcat.models import Triple, Relation s = amcattest.create_test_analysis_sentence() #jan moest piet slaan jan, wilde, piet, slaan = [amcattest.create_test_token(sentence=s, position=i) for i in range(1,5)] for child, parent, rel in [(jan, moest, "su"), (jan, slaan, "su"), (moest, slaan, "vc"), (piet, slaan, "obj1")]: rel = Relation.objects.create(label=rel) Triple.objects.create(parent=parent, child=child, relation=rel) preds = get_predicates(s) self.assertEqual(preds, {moest : {moest, slaan}, slaan : {moest, slaan}})
def test_store_stanford(self): # get tokenvalues etc from stanford test case aa = amcattest.create_test_analysed_article() as1, as2 = [amcattest.create_test_analysis_sentence(analysed_article=aa) for _i in range(2)] from amcat.nlp import stanford tokens, triples, corefsets = stanford.interpret_xml([as1.id, as2.id], stanford.TestStanford._get_test_xml()) store_analysis(aa, tokens, triples, corefsets) self.assertEqual({str(t.word.lemma) for t in as1.tokens.all()}, {"Mary", "meet", "John"}) self.assertEqual({(str(t.parent.word), str(t.child.word), str(t.relation)) for t in as2.triples}, {('likes', 'She', 'nsubj'), ('likes', 'him', 'dobj')}) self.assertEqual({frozenset(str(t.word.lemma) for t in c.tokens.all()) for c in aa.coreferencesets.all()}, {frozenset(["Mary", "she"]), frozenset(["John", "he"])})
def todo_test_predicates(self): from amcat.models import Triple, Relation s = amcattest.create_test_analysis_sentence() #jan moest piet slaan jan, moest, piet, slaan = [amcattest.create_test_token(sentence=s, position=i) for i in range(1,5)] for child, parent, rel in [(jan, moest, "su"), (jan, slaan, "su"), (moest, slaan, "vc"), (piet, slaan, "obj1")]: rel = Relation.objects.create(label=rel) Triple.objects.create(parent=parent, child=child, relation=rel) preds = get_predicates(s) self.assertEqual(preds, {moest : {moest, slaan}, slaan : {moest, slaan}})
def todo_test_statements(self): # jan moest piet slaan, volgens kees, om marie te helpen from amcat.models import Triple, Relation s = amcattest.create_test_analysis_sentence() jan, moest, piet, slaan, volgens, kees, omte, marie, helpen = [ amcattest.create_test_token(sentence=s, position=i) for i in range(1,10)] for child, parent, rel in [(jan, moest, "su"), (jan, slaan, "su"), (moest, slaan, "vc"), (piet, slaan, "obj1"), (volgens, slaan, "mod"), (kees, volgens, "obj1"), (omte, slaan, "om"), (helpen, omte, "body"), (marie, helpen, "obj1"), ]: rel = Relation.objects.create(label=rel) Triple.objects.create(parent=parent, child=child, relation=rel) roles = ((jan.position, "su", slaan.position), (piet.position, "obj", slaan.position), (kees.position, "quote", moest.position)) direct = {Statement(s, {jan}, {moest, slaan}, {piet}, source={kees})} statements = set(get_statements(s, roles)) self.assertEqual(statements, direct) # om marie te helpen roles += ((marie.position, "obj", helpen.position), ) statements = set(get_statements(s, roles)) self.assertEqual(statements, direct) roles += ((moest.position, "om", helpen.position), ) om = {Statement(s, {jan}, {helpen}, {marie}, type={"Affective"}, source={kees}), Statement(s, {piet}, {helpen}, {marie}, source={jan, kees}, condition={moest, slaan}, type={"Causal"}), } statements = set(get_statements(s, roles)) self.assertEqual(statements, direct | om)
def test_reality(self): from amcat.models import Triple, Relation s = amcattest.create_test_analysis_sentence() # VVD stijgt (dwz in de peilingen) vvd, stijgt = [ amcattest.create_test_token(sentence=s, position=i) for i in range(1,3)] for child, parent, rel in [(vvd, stijgt, "su")]: rel = Relation.objects.create(label=rel) Triple.objects.create(parent=parent, child=child, relation=rel) roles = ((None, "su", stijgt.position), (vvd.position, "obj", stijgt.position)) rea = {Statement(s, {None}, {stijgt}, {vvd}, type={"Reality"})} statements = set(get_statements(s, roles)) self.assertEqual(statements, rea)
def profile_store_triples(): transaction.enter_transaction_management() transaction.managed(True) try: aa = amcattest.create_test_analysis_article() log.info("Created test article %i" % aa.id) tokens = [] for s in range(5): log.info("Creating test sentence %i" % s) s = amcattest.create_test_analysis_sentence(aa) log.info("Created test sentence %i" % s.id) tokens += [amcattest.create_tokenvalue(analysis_sentence=s.id, word=w, lemma=w) for w in "123456789"*3] log.info("Storing %i tokens" % len(tokens)) with djangotoolkit.list_queries() as queries: aa.store_analysis(tokens=tokens) djangotoolkit.query_list_to_table(queries, maxqlen=150, output=print, encoding="utf-8") finally: transaction.rollback() transaction.leave_transaction_management()
def test_reality(self): from amcat.models import Triple, Relation s = amcattest.create_test_analysis_sentence() # VVD stijgt (dwz in de peilingen) vvd, stijgt = [ amcattest.create_test_token(sentence=s, position=i) for i in range(1,3)] for child, parent, rel in [(vvd, stijgt, "su")]: rel = Relation.objects.create(label=rel) Triple.objects.create(parent=parent, child=child, relation=rel) roles = ((None, "su", stijgt.position), (vvd.position, "obj", stijgt.position)) rea = {Statement({None}, {stijgt}, {vvd}, type={"Reality"})} statements = set(get_statements(s, roles)) self.assertEqual(statements, rea)
def test_process(self): from amcat.models.token import TokenValues class X(AnalysisScript): def __init__(self): super(X, self).__init__(analysis=None, tokens=True, triples=False) def get_tokens(self, analysis_sentence, memo=None): for i, x in enumerate( analysis_sentence.sentence.sentence.split()): yield TokenValues(analysis_sentence, i + 1, x, None, None, None, None) a = amcattest.create_test_analysis_sentence( sentence=amcattest.create_test_sentence( sentence="dit is een test")) tokens, triples = list(X().process_sentence(a)) print(tokens) self.assertIsNone(triples) self.assertEqual( list(tokens)[0], (TokenValues(a, 1, "dit", None, None, None, None)))
def test_fill_out_and_predicate(self): from amcat.models import Triple, Relation s = amcattest.create_test_analysis_sentence() de, liberale,premier, moest, piet, een, klap, geven = [ amcattest.create_test_token(sentence=s, position=i) for i in range(1,9)] for child, parent, rel in [(premier, geven, "su"), (premier, moest, "su"), (geven, moest, "vc"), (de, premier, "det"), (liberale, premier, "mod"), (piet, geven, "obj2"), (klap, geven, "obj1"), (een, klap, "det")]: rel = Relation.objects.create(label=rel) Triple.objects.create(parent=parent, child=child, relation=rel) roles = ((premier.position, "su", geven.position), (piet.position, "obj", geven.position)) self.assertEqual(fill_out(s, [premier], roles), {de, liberale, premier}) predicate = fill_out(s, [moest, geven], roles) self.assertEqual(predicate, {moest, een, klap, geven}) predicate_paths = list(get_predicate_structure(s, predicate)) self.assertEqual(predicate_paths, [[een, klap, geven, moest]])
def test_create_tokens(self): from amcat.models.token import TokenValues s = amcattest.create_test_analysis_sentence() tokens = [TokenValues(s.id, 2, word="w", lemma="l", pos="p", major="major", minor="minor", namedentity=None)] token, = dict(create_tokens(tokens)).values() self.assertEqual(token.word.lemma.lemma, "l")
def test_get_tokens_order(self): s = amcattest.create_test_analysis_sentence() t1,t2,t3 = [amcattest.create_test_token(sentence=s, position=i) for i in [2,1,3]] self.assertEqual(list(s.tokens.all()), [t2,t1,t3])
def _get_test_tokens(self, aa, words): s = amcattest.create_test_analysis_sentence(analysed_article=aa) if not words: words = "abcde" return [amcattest.create_test_token(sentence=s, position=i, word=amcattest.create_test_word(word=w)) for (i,w) in enumerate(words)]