def handle_split(form, project, article, sentences): articles = list(get_articles(article, sentences)) # We won't use bulk_create yet, as it bypasses save() and doesn't # insert ids Article.create_articles(articles) for art in articles: sbd.get_or_create_sentences(art) if not form.is_valid(): raise ValueError("Form invalid: {form.errors}".format(**locals())) # Context variables for template form_data = form.cleaned_data all_sets = list(project.all_articlesets().filter(articles=article)) # Add splitted articles to existing sets for aset in form_data["add_splitted_to_sets"]: aset.add_articles(articles) # Add splitted articles to sets wherin the original article live{d,s} if form_data["add_splitted_to_all"]: asets = project.all_articlesets().filter(articles=article).only("id") for aset in asets: aset.add_articles(articles) if form_data["remove_from_sets"]: for aset in form_data["remove_from_sets"]: aset.remove_articles([article]) if form_data["remove_from_all_sets"]: for aset in ArticleSet.objects.filter(project=project, articles=article).distinct(): aset.remove_articles([article]) if form_data["add_splitted_to_new_set"]: new_splitted_set = ArticleSet.create_set( project, form_data["add_splitted_to_new_set"], articles) if form_data["add_to_sets"]: for articleset in form_data["add_to_sets"]: articleset.add_articles([article]) if form_data["add_to_new_set"]: new_set = ArticleSet.create_set(project, form_data["add_to_new_set"], [article]) return locals()
def get_context_data(self, **kwargs): ctx = super(ArticleSplitView, self).get_context_data(**kwargs) sentences = sbd.get_or_create_sentences(self.article).only( "sentence", "parnr") ctx["sentences"] = _get_sentences(sentences) ctx["sentences"].next() # skip headline return ctx
def test_nqueries_sentence_codings(self): aschema, acodebook, astrf, aintf, acodef, _, _ = amcattest.create_test_schema_with_fields( isarticleschema=True) sschema, scodebook, sstrf, sintf, scodef, _, _ = amcattest.create_test_schema_with_fields( isarticleschema=False) cjob = amcattest.create_test_job(10, articleschema=aschema, unitschema=sschema) for article in cjob.articleset.articles.all(): coding = create_test_coding(codingjob=cjob, article=article) coding.update_values({astrf: "blas", aintf: 20}) for sentence in get_or_create_sentences(article): coding = create_test_coding(codingjob=cjob, article=article, sentence=sentence) coding.update_values({sstrf: "bla", sintf: 10}) fields = {sstrf: {}, sintf: {}, astrf: {}, aintf: {}} script = self._get_results_script([cjob], fields, export_level=CODING_LEVEL_BOTH) with self.checkMaxQueries(9): list(csv.reader(StringIO(script.run())))
def test_include_uncoded_sentences(self): aschema, acodebook, astrf, aintf, acodef, _, _ = amcattest.create_test_schema_with_fields( isarticleschema=True) sschema, scodebook, sstrf, sintf, scodef, _, _ = amcattest.create_test_schema_with_fields( isarticleschema=False) a1 = amcattest.create_test_article(text="Zin 1. Zin 2.") a2 = amcattest.create_test_article(text="Zin 1. Zin 2.") aset = amcattest.create_test_set([a1, a2]) cjob = amcattest.create_test_job(articleset=aset, articleschema=aschema, unitschema=sschema) sentence = list(get_or_create_sentences(a1))[1] coding = create_test_coding(codingjob=cjob, article=a1, sentence=sentence) coding.update_values({sstrf: "bla", sintf: 10}) # We expect 1 sentence if we only export codings fields = {sstrf: {}, sintf: {}, astrf: {}, aintf: {}} result = self._get_results([cjob], fields, include_uncoded_sentences=False, export_level=CODING_LEVEL_BOTH) self.assertEqual(1, len(result)) result = self._get_results([cjob], fields, include_uncoded_sentences=True, export_level=CODING_LEVEL_BOTH) self.assertEqual(3, len(result))
def handle_split(form, project, article, sentences): articles = list(get_articles(article, sentences)) # We won't use bulk_create yet, as it bypasses save() and doesn't # insert ids Article.create_articles(articles) for art in articles: sbd.get_or_create_sentences(art) if not form.is_valid(): raise ValueError("Form invalid: {form.errors}".format(**locals())) # Context variables for template form_data = form.cleaned_data all_sets = list(project.all_articlesets().filter(articles=article)) # Add splitted articles to existing sets for aset in form_data["add_splitted_to_sets"]: aset.add_articles(articles) # Add splitted articles to sets wherin the original article live{d,s} if form_data["add_splitted_to_all"]: asets = project.all_articlesets().filter(articles=article).only("id") for aset in asets: aset.add_articles(articles) if form_data["remove_from_sets"]: for aset in form_data["remove_from_sets"]: aset.remove_articles([article]) if form_data["remove_from_all_sets"]: for aset in ArticleSet.objects.filter(project=project, articles=article).distinct(): aset.remove_articles([article]) if form_data["add_splitted_to_new_set"]: new_splitted_set = ArticleSet.create_set(project, form_data["add_splitted_to_new_set"], articles) if form_data["add_to_sets"]: for articleset in form_data["add_to_sets"]: articleset.add_articles([article]) if form_data["add_to_new_set"]: new_set = ArticleSet.create_set(project, form_data["add_to_new_set"], [article]) return locals()
def test_nqueries_sentence_codings(self): aschema, acodebook, astrf, aintf, acodef, _, _ = amcattest.create_test_schema_with_fields(isarticleschema=True) sschema, scodebook, sstrf, sintf, scodef, _, _ = amcattest.create_test_schema_with_fields(isarticleschema=False) cjob = amcattest.create_test_job(10, articleschema=aschema, unitschema=sschema) for article in cjob.articleset.articles.all(): coding = create_test_coding(codingjob=cjob, article=article) coding.update_values({astrf: "blas", aintf: 20}) for sentence in get_or_create_sentences(article): coding = create_test_coding(codingjob=cjob, article=article, sentence=sentence) coding.update_values({sstrf: "bla", sintf: 10}) fields = {sstrf: {}, sintf: {}, astrf: {}, aintf: {}} script = self._get_results_script([cjob], fields, export_level=CODING_LEVEL_BOTH) with self.checkMaxQueries(9): list(csv.reader(StringIO(script.run())))
def test_include_uncoded_sentences(self): aschema, acodebook, astrf, aintf, acodef, _, _ = amcattest.create_test_schema_with_fields(isarticleschema=True) sschema, scodebook, sstrf, sintf, scodef, _, _ = amcattest.create_test_schema_with_fields(isarticleschema=False) a1 = amcattest.create_test_article(text="Zin 1. Zin 2.") a2 = amcattest.create_test_article(text="Zin 1. Zin 2.") aset = amcattest.create_test_set([a1, a2]) cjob = amcattest.create_test_job(articleset=aset, articleschema=aschema, unitschema=sschema) sentence = list(get_or_create_sentences(a1))[1] coding = create_test_coding(codingjob=cjob, article=a1, sentence=sentence) coding.update_values({sstrf: "bla", sintf: 10}) # We expect 1 sentence if we only export codings fields = {sstrf: {}, sintf: {}, astrf: {}, aintf: {}} result = self._get_results([cjob], fields, include_uncoded_sentences=False, export_level=CODING_LEVEL_BOTH) self.assertEqual(1, len(result)) result = self._get_results([cjob], fields, include_uncoded_sentences=True, export_level=CODING_LEVEL_BOTH) self.assertEqual(3, len(result))
def filter_queryset(self, sentences): sentences = super(CodingJobArticleSentenceViewSet, self).filter_queryset(sentences) return sentences.filter(id__in=sbd.get_or_create_sentences(self.article))
def setUp(self): self.s1 = amcattest.create_test_set(5) self.a1, self.a2, self.a3, self.a4, self.a5 = self.s1.articles.all() self.m1 = "Telegraaf" self.m2 = "NRC" self.m3 = "AD" self.a1.set_property("medium", self.m1) self.a2.set_property("medium", self.m2) self.a3.set_property("medium", self.m2) self.a4.set_property("medium", self.m3) self.a5.set_property("medium", self.m3) self.a1.text = "aap." self.a2.text = "aap. noot." self.a3.text = "aap. noot. mies." self.a1.date = datetime.datetime(2015, 1, 1) self.a2.date = datetime.datetime(2015, 1, 1) self.a3.date = datetime.datetime(2015, 2, 1) self.a4.date = datetime.datetime(2016, 1, 1) self.a5.date = datetime.datetime(2016, 1, 1) self.a1.save() self.a2.save() self.a3.save() self.a4.save() self.a5.save() # Uncomment if ever using elastic :) # self.s1.refresh_index(full_refresh=True) self.schema, self.codebook, self.strf, self.intf, self.codef, self.boolf, self.qualf = ( amcattest.create_test_schema_with_fields(isarticleschema=True)) self.sschema, self.scodebook, self.sstrf, self.sintf, self.scodef, self.sboolf, self.squalf = ( amcattest.create_test_schema_with_fields(isarticleschema=False)) # Article self.codes = self.codebook.get_codes() self.code_A, = [c for c in self.codes if c.label == "A"] self.code_B, = [c for c in self.codes if c.label == "B"] self.code_A1, = [c for c in self.codes if c.label == "A1"] # Sentence self.scodes = self.codebook.get_codes() self.scode_A, = [c for c in self.scodes if c.label == "A"] self.scode_B, = [c for c in self.scodes if c.label == "B"] self.scode_A1, = [c for c in self.scodes if c.label == "A1"] # Does not get fired in unit test? for article in [self.a1, self.a2, self.a3, self.a4, self.a5]: get_or_create_sentences(article) self.job = amcattest.create_test_job(articleset=self.s1, articleschema=self.schema, unitschema=self.sschema) self.c1 = amcattest.create_test_coding(codingjob=self.job, article=self.a1) self.c1.update_values({ self.codef: self.code_A.id, self.intf: 4, self.qualf: 4 }) self.c2 = amcattest.create_test_coding(codingjob=self.job, article=self.a2) self.c2.update_values({ self.codef: self.code_A.id, self.intf: 2, self.qualf: 1 }) self.c3 = amcattest.create_test_coding(codingjob=self.job, article=self.a3) self.c3.update_values({ self.codef: self.code_B.id, self.intf: 1, self.qualf: 2 }) self.c4 = amcattest.create_test_coding(codingjob=self.job, article=self.a4) self.c4.update_values({self.codef: self.code_A1.id, self.intf: 1}) self.sentence_coding = amcattest.create_test_coding( codingjob=self.job, article=self.a1, sentence=self.a1.sentences.all()[0]) self.sentence_coding.update_values({ self.scodef: self.scode_A1.id, self.sintf: 1 }) # Try to confuse aggregator by inserting multiple codingjobs job = amcattest.create_test_job(articleset=self.s1, articleschema=self.schema) c4 = amcattest.create_test_coding(codingjob=job, article=self.a3) c4.update_values({ self.codef: self.code_B.id, self.intf: 10, self.qualf: 8 })
def get_context_data(self, **kwargs): ctx = super(ArticleSplitView, self).get_context_data(**kwargs) sentences = sbd.get_or_create_sentences(self.article).only("sentence", "parnr") ctx["sentences"] = _get_sentences(sentences) next(ctx["sentences"]) # skip headline return ctx
def filter_queryset(self, queryset): qs = super(CodedArticleSentenceViewSet, self).filter_queryset(queryset) article = Article.objects.get(id=self.coded_article.article_id) sentences = qs.filter(id__in=sbd.get_or_create_sentences(article)) return sentences
def filter_queryset(self, sentences): sentences = super(CodingJobArticleSentenceViewSet, self).filter_queryset(sentences) return sentences.filter( id__in=sbd.get_or_create_sentences(self.article))
def filter_queryset(self, queryset): qs = super(SentenceViewSet, self).filter_queryset(queryset) return qs.filter(article=self.article, id__in=sbd.get_or_create_sentences(self.article))
def setUp(self): self.s1 = amcattest.create_test_set(5) self.a1, self.a2, self.a3, self.a4, self.a5 = self.s1.articles.all() self.m1 = self.a1.medium self.m2 = self.a2.medium self.m3 = self.a3.medium self.m4 = self.a4.medium self.a3.medium = self.m2 self.a3.save() self.a1.text = "aap." self.a2.text = "aap. noot." self.a3.text = "aap. noot. mies." self.a1.date = datetime.datetime(2015, 0o1, 0o1) self.a2.date = datetime.datetime(2015, 0o1, 0o1) self.a3.date = datetime.datetime(2015, 0o2, 0o1) self.a4.date = datetime.datetime(2016, 0o1, 0o1) self.a5.date = datetime.datetime(2016, 0o1, 0o1) self.a1.save() self.a2.save() self.a3.save() self.a4.save() self.a5.save() # Uncomment if ever using elastic :) # self.s1.refresh_index(full_refresh=True) self.schema, self.codebook, self.strf, self.intf, self.codef, self.boolf, self.qualf = ( amcattest.create_test_schema_with_fields(isarticleschema=True)) self.sschema, self.scodebook, self.sstrf, self.sintf, self.scodef, self.sboolf, self.squalf = ( amcattest.create_test_schema_with_fields(isarticleschema=False)) # Article self.codes = self.codebook.get_codes() self.code_A, = [c for c in self.codes if c.label == "A"] self.code_B, = [c for c in self.codes if c.label == "B"] self.code_A1, = [c for c in self.codes if c.label == "A1"] # Sentence self.scodes = self.codebook.get_codes() self.scode_A, = [c for c in self.scodes if c.label == "A"] self.scode_B, = [c for c in self.scodes if c.label == "B"] self.scode_A1, = [c for c in self.scodes if c.label == "A1"] # Does not get fired in unit test? for article in [self.a1, self.a2, self.a3, self.a4, self.a5]: get_or_create_sentences(article) self.job = amcattest.create_test_job(articleset=self.s1, articleschema=self.schema, unitschema=self.sschema) self.c1 = amcattest.create_test_coding(codingjob=self.job, article=self.a1) self.c1.update_values({self.codef: self.code_A.id, self.intf: 4, self.qualf: 4}) self.c2 = amcattest.create_test_coding(codingjob=self.job, article=self.a2) self.c2.update_values({self.codef: self.code_A.id, self.intf: 2, self.qualf: 1}) self.c3 = amcattest.create_test_coding(codingjob=self.job, article=self.a3) self.c3.update_values({self.codef: self.code_B.id, self.intf: 1, self.qualf: 2}) self.c4 = amcattest.create_test_coding(codingjob=self.job, article=self.a4) self.c4.update_values({self.codef: self.code_A1.id, self.intf: 1}) self.sentence_coding = amcattest.create_test_coding(codingjob=self.job, article=self.a1, sentence=self.a1.sentences.all()[0]) self.sentence_coding.update_values({self.scodef: self.scode_A1.id, self.sintf: 1}) # Try to confuse aggregator by inserting multiple codingjobs job = amcattest.create_test_job(articleset=self.s1, articleschema=self.schema) c4 = amcattest.create_test_coding(codingjob=job, article=self.a3) c4.update_values({self.codef: self.code_B.id, self.intf: 10, self.qualf: 8})
def setUp(self): self.s1 = amcattest.create_test_set(5) self.a = list(self.s1.articles.all().order_by('id')) self.ids = [a.id for a in self.a] self.m1 = "Telegraaf" self.m2 = "NRC" self.m3 = "AD" self.a[0].set_property("medium", self.m1) self.a[1].set_property("medium", self.m2) self.a[2].set_property("medium", self.m2) self.a[3].set_property("medium", self.m3) self.a[4].set_property("medium", self.m3) self.a[0].text = "aap." self.a[1].text = "aap. noot." self.a[2].text = "aap. noot. mies." self.a[0].date = datetime.datetime(2015, 1, 1) self.a[1].date = datetime.datetime(2015, 1, 1) self.a[2].date = datetime.datetime(2015, 2, 1) self.a[3].date = datetime.datetime(2016, 1, 1) self.a[4].date = datetime.datetime(2016, 1, 1) self.a[0].save() self.a[1].save() self.a[2].save() self.a[3].save() self.a[4].save() # Uncomment if ever using elastic :) # self.s1.refresh_index(full_refresh=True) self.schema, self.codebook, self.strf, self.intf, self.codef, self.boolf, self.qualf = ( amcattest.create_test_schema_with_fields(isarticleschema=True)) self.sschema, self.scodebook, self.sstrf, self.sintf, self.scodef, self.sboolf, self.squalf = ( amcattest.create_test_schema_with_fields(isarticleschema=False)) # Article self.codes = self.codebook.get_codes() self.code_A, = [c for c in self.codes if c.label == "A"] self.code_B, = [c for c in self.codes if c.label == "B"] self.code_A1, = [c for c in self.codes if c.label == "A1"] # Sentence self.scodes = self.codebook.get_codes() self.scode_A, = [c for c in self.scodes if c.label == "A"] self.scode_B, = [c for c in self.scodes if c.label == "B"] self.scode_A1, = [c for c in self.scodes if c.label == "A1"] self.scode_A1b, = [c for c in self.scodes if c.label == "A1b"] # Does not get fired in unit test? for article in [self.a[0], self.a[1], self.a[2], self.a[3], self.a[4]]: get_or_create_sentences(article) self.job = amcattest.create_test_job(articleset=self.s1, articleschema=self.schema, unitschema=self.sschema) self.c1 = amcattest.create_test_coding(codingjob=self.job, article=self.a[0]) self.c1.update_values({self.codef: self.code_A.id, self.intf: 4, self.qualf: 4}) self.c2 = amcattest.create_test_coding(codingjob=self.job, article=self.a[1]) self.c2.update_values({self.codef: self.code_A.id, self.intf: 2, self.qualf: 1}) self.c3 = amcattest.create_test_coding(codingjob=self.job, article=self.a[2]) self.c3.update_values({self.codef: self.code_B.id, self.intf: 1, self.qualf: 2}) self.c4 = amcattest.create_test_coding(codingjob=self.job, article=self.a[3]) self.c4.update_values({self.codef: self.code_A1.id, self.intf: 1}) self.sentence_coding = amcattest.create_test_coding(codingjob=self.job, article=self.a[0], sentence=self.a[0].sentences.all()[0]) self.sentence_coding.update_values({self.scodef: self.scode_A1.id, self.sintf: 1}) self.sentence_coding = amcattest.create_test_coding(codingjob=self.job, article=self.a[2], sentence=self.a[2].sentences.all()[0]) self.sentence_coding.update_values({self.scodef: self.scode_A1.id, self.sintf: 1}) self.sentence_coding = amcattest.create_test_coding(codingjob=self.job, article=self.a[2], sentence=self.a[2].sentences.all()[0]) self.sentence_coding.update_values({self.scodef: self.scode_A1b.id, self.sintf: 1}) # Try to confuse aggregator by inserting multiple codingjobs job = amcattest.create_test_job(articleset=self.s1, articleschema=self.schema) c4 = amcattest.create_test_coding(codingjob=job, article=self.a[2]) c4.update_values({self.codef: self.code_B.id, self.intf: 10, self.qualf: 8})