def test_store_stanford(self): # get tokenvalues etc from stanford test case aa = amcattest.create_test_analysed_article() as1, as2 = [ amcattest.create_test_analysis_sentence(analysed_article=aa) for _i in range(2) ] from amcat.nlp import stanford tokens, triples, corefsets = stanford.interpret_xml( [as1.id, as2.id], stanford.TestStanford._get_test_xml()) store_analysis(aa, tokens, triples, corefsets) self.assertEqual({str(t.word.lemma) for t in as1.tokens.all()}, {"Mary", "meet", "John"}) self.assertEqual( {(str(t.parent.word), str(t.child.word), str(t.relation)) for t in as2.triples}, {('likes', 'She', 'nsubj'), ('likes', 'him', 'dobj')}) self.assertEqual( { frozenset(str(t.word.lemma) for t in c.tokens.all()) for c in aa.coreferencesets.all() }, {frozenset(["Mary", "she"]), frozenset(["John", "he"])})
def _get_test_script(self, words=None, codes={}, window_size=5): aa = amcattest.create_test_analysed_article() tokens = self._get_test_tokens(aa, words) aset = amcattest.create_test_set(articles=[aa.article]) lexicon_lang = Language.objects.get(pk=2) cb = self._get_test_codebook(lexicon_lang, codes) return WindowedSNAScript(articleset=aset.id, plugin=aa.plugin.id, codebook=cb.id, lexicon_language=lexicon_lang.id, window_size=window_size)
def test_store_stanford(self): # get tokenvalues etc from stanford test case aa = amcattest.create_test_analysed_article() as1, as2 = [amcattest.create_test_analysis_sentence(analysed_article=aa) for _i in range(2)] from amcat.nlp import stanford tokens, triples, corefsets = stanford.interpret_xml([as1.id, as2.id], stanford.TestStanford._get_test_xml()) store_analysis(aa, tokens, triples, corefsets) self.assertEqual({str(t.word.lemma) for t in as1.tokens.all()}, {"Mary", "meet", "John"}) self.assertEqual({(str(t.parent.word), str(t.child.word), str(t.relation)) for t in as2.triples}, {('likes', 'She', 'nsubj'), ('likes', 'him', 'dobj')}) self.assertEqual({frozenset(str(t.word.lemma) for t in c.tokens.all()) for c in aa.coreferencesets.all()}, {frozenset(["Mary", "she"]), frozenset(["John", "he"])})