def set_up(self): self.a1 = amcattest.create_test_article() self.a1.text = "aap noot mies" self.a1.date = datetime.datetime(2010, 1, 1) self.a1.properties = {"author": "De Bas", "length_int": 5} self.a1.save() self.a2 = amcattest.create_test_article() self.a2.text = "aap noot geit" self.a2.date = datetime.datetime(2010, 1, 1) self.a2.properties = {"author": "Het Martijn", "length_int": 5} self.a2.save() self.a3 = amcattest.create_test_article() self.a3.text = "lamp" self.a3.date = datetime.datetime(2010, 1, 2) self.a3.properties = {"author": "Het Martijn", "length_int": 15} self.a3.save() self.aset1 = amcattest.create_test_set() self.aset1.add_articles([self.a1, self.a2]) self.aset1.refresh_index(True) self.aset2 = amcattest.create_test_set() self.aset2.add_articles([self.a3]) self.aset2.refresh_index(True) ES().refresh()
def setup(self): s1 = amcattest.create_test_set() s2 = amcattest.create_test_set() a = amcattest.create_test_article(text='aap noot mies', title='m1', date='2001-01-01', create=False) b = amcattest.create_test_article(text='noot mies wim zus', title='m2', date='2001-02-01', create=False) c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', title='m2', date='2002-01-01', create=False) d = amcattest.create_test_article(text='noot mies wim zus', title='m2', date='2001-02-03', create=False) e = amcattest.create_test_article(text='aap noot mies', title='m3', articleset=s2) Article.create_articles([a, b, c, d], articleset=s1) ES().refresh() return s1, s2, a, b, c, d, e
def test_used_properties(self): a1 = amcattest.create_test_article(properties={ "p1": "test", "p2_date": "2001-01-01" }) a2 = amcattest.create_test_article(properties={ "p1": "test", "p3_num": 15 }) a3 = amcattest.create_test_article(properties={ "p1": "test", "p4": "test" }) s1 = amcattest.create_test_set(articles=[a1]) s2 = amcattest.create_test_set(articles=[a2]) s3 = amcattest.create_test_set(articles=[a1, a3]) ES().refresh() self.assertEqual(set(ES().get_used_properties([s1.id])), {"p1", "p2_date"}) self.assertEqual(set(ES().get_used_properties([s1.id, s2.id])), {"p1", "p2_date", "p3_num"}) self.assertEqual(set(ES().get_used_properties([s3.id])), {"p1", "p2_date", "p4"}) self.assertEqual(set(ES().get_used_properties([s1.id])), {"p1", "p2_date"}) self.assertEqual(set(ES().get_used_properties([s1.id, s2.id])), {"p1", "p2_date", "p3_num"}) self.assertEqual(set(ES().get_used_properties([s3.id])), {"p1", "p2_date", "p4"})
def test_aggregation(self): """Can we create nice tables?""" p = amcattest.create_test_project() m1, m2 = [amcattest.create_test_medium() for x in [1,2]] arts1 = {amcattest.create_test_article(project=p, medium=m1) for i in range(5)} arts2 = {amcattest.create_test_article(project=p, medium=m2) for i in range(15)} aset = amcattest.create_test_set(project=p) aset.add_articles(arts1|arts2) aset.refresh_index() # can we select on mediumid self.assertEqual(self.list(projects=[p.id]), self.pks(arts1|arts2)) self.assertEqual(self.list(projects=[p.id], mediums=[m1.id]), self.pks(arts1)) # can we make a table? x = self.aggr(projects=[p.id], xAxis='medium') self.assertEqual(set(x), {(5,), (15,)}) # add second project with articles from first project in set p2 = amcattest.create_test_project() s = amcattest.create_test_set(project=p2) s.add(*(arts1|arts2)) x = self.aggr(projects=[p2.id], articlesets=[s.id], xAxis='medium')
def test_relative_date_hash(self): aset1 = amcattest.create_test_set(2) aset2 = amcattest.create_test_set(2, project=aset1.project) project = aset1.project delta = datetime.timedelta(-9).total_seconds() def _get_form(): _, _, form = self.get_form( project=project, articlesets=[aset1.id, aset2.id], relative_date=delta ) return form hash1 = _get_form().get_hash() with mock_datetime_now(datetime.datetime.now() + datetime.timedelta(1)): hash2 = _get_form().get_hash() self.assertNotEqual(hash1, hash2) # also test the mock function just to be sure. with mock_datetime_now(datetime.datetime.now()): hash3 = _get_form().get_hash() self.assertEqual(datetime.datetime.__name__, "MockDateTimeToday") self.assertNotEqual(datetime.datetime.__name__, "MockDateTimeToday") self.assertEqual(hash1, hash3)
def set_up(self): self.a1 = amcattest.create_test_article() self.a1.text = "aap noot mies" self.a1.date = datetime.datetime(2010, 1, 1) self.a1.save() self.m1 = self.a1.medium self.a2 = amcattest.create_test_article() self.a2.text = "aap noot geit" self.a2.date = datetime.datetime(2010, 1, 1) self.a2.save() self.m2 = self.a2.medium self.a3 = amcattest.create_test_article() self.a3.text = "lamp" self.a3.date = datetime.datetime(2010, 1, 2) self.a3.save() self.m3 = self.a3.medium self.aset1 = amcattest.create_test_set() self.aset1.add_articles([self.a1, self.a2]) self.aset1.refresh_index(True) self.aset2 = amcattest.create_test_set() self.aset2.add_articles([self.a3]) self.aset2.refresh_index(True) ES().flush()
def test_hash(self): aset1 = amcattest.create_test_set(2) aset2 = amcattest.create_test_set(2, project=aset1.project) project = aset1.project _, _, form1 = self.get_form( project=project, articlesets=[aset1.id, aset2.id], article_ids="1\n2\n3", query="abc\ndefg" ) _, _, form2 = self.get_form( project=project, articlesets=[aset2.id, aset1.id], article_ids="1\n3\n2", query="abc\ndifferent\nquery" ) form1.full_clean() form2.full_clean() self.assertEqual( form1.get_hash(ignore_fields=("query",)), form2.get_hash(ignore_fields=("query",)) ) self.assertNotEqual( form1.get_hash(), form2.get_hash() )
def setUp(self): project = amcattest.create_test_project() amcattest.create_test_set(name="foo", project=project) amcattest.create_test_set(name="bar", project=project) self.url = "/api/v4/projects/{project.id}/articlesets/?format=json" self.url = self.url.format(**locals())
def test_filters(self): """ Do filters work properly? """ m1, m2 = [amcattest.create_test_medium() for _ in range(2)] a = amcattest.create_test_article(text='aap noot mies', medium=m1, date="2001-01-01") b = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date="2002-01-01") c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', medium=m2, date="2003-01-01") s1 = amcattest.create_test_set(articles=[a, b, c]) s2 = amcattest.create_test_set(articles=[a, b]) ES().flush() q = lambda **filters: set(ES().query_ids(filters=filters)) # MEDIUM FILTER self.assertEqual(q(mediumid=m2.id), {b.id, c.id}) #### DATE FILTERS self.assertEqual(q(sets=s1.id, start_date='2001-06-01'), {b.id, c.id}) # start is inclusive self.assertEqual(q(sets=s1.id, start_date='2002-01-01', end_date="2002-06-01"), {b.id}) # end is exclusive self.assertEqual(q(sets=s1.id, start_date='2001-01-01', end_date="2003-01-01"), {a.id, b.id}) # COMBINATION self.assertEqual(q(sets=s2.id, start_date='2001-06-01'), {b.id}) self.assertEqual(q(end_date='2002-06-01', mediumid=m2.id), {b.id})
def test_delete(self): s = amcattest.create_test_set() sid = s.id s2 = amcattest.create_test_set() arts = [amcattest.create_test_article() for _x in range(10)] s.add_articles(arts[:8]) s2.add_articles(arts[6:]) ES().flush() s.delete() ES().flush() # articleset and articles only in that set are deleted self.assertRaises(ArticleSet.DoesNotExist, ArticleSet.objects.get, pk=sid) self.assertRaises(Article.DoesNotExist, Article.objects.get, pk=arts[0].id) # shared articles are not deleted self.assertEqual(Article.objects.get(pk=arts[6].id).id, arts[6].id) self.assertEqual(set(s2.articles.values_list("pk", flat=True)), {a.id for a in arts[6:]}) # index is updated self.assertEqual(ES().count(filters={"sets": sid}), 0) self.assertEqual(ES().count(filters={"sets": s2.id}), 4) self.assertRaises(elasticsearch.NotFoundError, ES().get, arts[0].id) self.assertEqual(ES().get(arts[6].id)['id'], arts[6].id)
def test_filters(self): """ Do filters work properly? """ a = amcattest.create_test_article(text='aap noot mies', title='m1', date="2001-01-01") b = amcattest.create_test_article(text='noot mies wim zus', title='m2', date="2002-01-01") c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', title='m2', date="2003-01-01") s1 = amcattest.create_test_set(articles=[a, b, c]) s2 = amcattest.create_test_set(articles=[a, b]) ES().refresh() q = lambda **filters: set(ES().query_ids(filters=filters)) # TITLE FILTER self.assertEqual(q(title='m2'), {b.id, c.id}) #### DATE FILTERS self.assertEqual(q(sets=s1.id, start_date='2001-06-01'), {b.id, c.id}) # start is inclusive self.assertEqual(q(sets=s1.id, start_date='2002-01-01', end_date="2002-06-01"), {b.id}) # end is exclusive self.assertEqual(q(sets=s1.id, start_date='2001-01-01', end_date="2003-01-01"), {a.id, b.id}) # COMBINATION self.assertEqual(q(sets=s2.id, start_date='2001-06-01'), {b.id}) self.assertEqual(q(end_date='2002-06-01', title='m2'), {b.id})
def test_aggregate(self): """Can we make tables per medium/date interval?""" from amcat.models import Article m1 = amcattest.create_test_medium(name="De Nep-Krant") m2, m3 = [amcattest.create_test_medium() for _ in range(2)] s1 = amcattest.create_test_set() s2 = amcattest.create_test_set() unused = amcattest.create_test_article(text='aap noot mies', medium=m3, articleset=s2) a = amcattest.create_test_article(text='aap noot mies', medium=m1, date='2001-01-01', create=False) b = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-01', create=False) c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', medium=m2, date='2002-01-01', create=False) d = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-03', create=False) Article.create_articles([a,b,c,d], articleset=s1, check_duplicate=False, create_id=True) ES().flush() self.assertEqual(dict(ES().aggregate_query(filters=dict(sets=s1.id), group_by="mediumid")), {m1.id : 1, m2.id : 3}) self.assertEqual(dict(ES().aggregate_query(filters=dict(sets=s1.id), group_by="date", date_interval="year")), {datetime(2001,1,1) : 3, datetime(2002,1,1) : 1}) self.assertEqual(dict(ES().aggregate_query(filters=dict(sets=s1.id), group_by="date", date_interval="month")), {datetime(2001,1,1) : 1, datetime(2002,1,1) : 1, datetime(2001,2,1) : 2}) # set statistics stats = ES().statistics(filters=dict(sets=s1.id)) self.assertEqual(stats.n, 4) self.assertEqual(stats.start_date, datetime(2001,1,1)) self.assertEqual(stats.end_date, datetime(2002,1,1)) # media list self.assertEqual(set(ES().list_media(filters=dict(sets=s1.id))), {m1.id, m2.id})
def test_query_args_from_form(self): m = amcattest.create_test_medium() s1 = amcattest.create_test_set() s2 = amcattest.create_test_set() form = dict(sortColumn='', useSolr=True, start=100, length=100, articleids=[], articlesets=[s1, s2], mediums=[m], projects=[], columns=[ u'article_id', u'date', u'medium_id', u'medium_name', u'headline' ], highlight=False, columnInterval='month', datetype='all', sortOrder='') args = query_args_from_form(form) self.assertEqual( args, dict(start=100, rows=100, filters=[ u'mediumid:{m.id}'.format(**locals()), u'sets:{s1.id} OR sets:{s2.id}'.format(**locals()) ]))
def test_list_media(self): """Test that list media works for more than 10 media""" from amcat.models import Article media = [amcattest.create_test_medium() for _ in range(20)] arts = [ amcattest.create_test_article(medium=m, create=False) for m in media ] s1 = amcattest.create_test_set() Article.create_articles(arts[:5], articleset=s1, check_duplicate=False, create_id=True) ES().flush() self.assertEqual(set(s1.get_mediums()), set(media[:5])) s2 = amcattest.create_test_set(project=s1.project) Article.create_articles(arts[5:], articleset=s2, check_duplicate=False, create_id=True) ES().flush() self.assertEqual(set(s2.get_mediums()), set(media[5:])) self.assertEqual(set(s1.project.get_mediums()), set(media))
def test_filters(self): """ Do filters work properly? """ m1, m2 = [amcattest.create_test_medium() for _ in range(2)] a = amcattest.create_test_article(text='aap noot mies', medium=m1, date="2001-01-01") b = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date="2002-01-01") c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', medium=m2, date="2003-01-01") s1 = amcattest.create_test_set(articles=[a,b,c]) s2 = amcattest.create_test_set(articles=[a,b]) ES().flush() q = lambda **filters: set(ES().query_ids(filters=filters)) # MEDIUM FILTER self.assertEqual(q(mediumid=m2.id), {b.id, c.id}) #### DATE FILTERS self.assertEqual(q(sets=s1.id, start_date='2001-06-01'), {b.id, c.id}) # start is inclusive self.assertEqual(q(sets=s1.id, start_date='2002-01-01', end_date="2002-06-01"), {b.id}) # end is exclusive self.assertEqual(q(sets=s1.id, start_date='2001-01-01', end_date="2003-01-01"), {a.id, b.id}) # COMBINATION self.assertEqual(q(sets=s2.id, start_date='2001-06-01'), {b.id}) self.assertEqual(q(end_date='2002-06-01', mediumid=m2.id), {b.id})
def test_project_duplicates(self): triples = list(get_triples_project(self.project)) self.assertEqual(len(triples), len(set(triples)), "Duplicate triples!") amcattest.create_test_set(project=self.project).add(self.article) triples = list(get_triples_project(self.project)) self.assertEqual(len(triples), len(set(triples)), "Duplicate triples!")
def test_analyses_per_article(self): p1, p2, p3 = [ amcattest.create_test_project(active=x < 2) for x in range(3) ] a1 = amcattest.create_test_article(project=p1) a2 = amcattest.create_test_article(project=p2) a3 = amcattest.create_test_article(project=p2) a4 = amcattest.create_test_article(project=p3) articles = {a1.id, a2.id, a3.id, a4.id} # baseline: no articles have any analysis with self.checkMaxQueries( n=3): # 2 for projects/article, 1 for analyses/project outcome = multidict(_get_analyses_per_article(articles)) self.assertEqual(outcome, {}) # let's add some analyses to the active projects n1, n2, n3 = [amcattest.create_test_analysis() for _x in range(3)] AnalysisProject.objects.create(project=p1, analysis=n1) AnalysisProject.objects.create(project=p1, analysis=n2) AnalysisProject.objects.create(project=p2, analysis=n2) AnalysisProject.objects.create(project=p2, analysis=n3) with self.checkMaxQueries(n=3): outcome = multidict(_get_analyses_per_article(articles)) self.assertEqual( outcome, { a1.id: {n1.id, n2.id}, a2.id: {n2.id, n3.id}, a3.id: {n2.id, n3.id} }) # adding an analysis to an inactive project has no effect AnalysisProject.objects.create(project=p3, analysis=n3) with self.checkMaxQueries(n=3): outcome = multidict(_get_analyses_per_article(articles)) self.assertEqual( outcome, { a1.id: {n1.id, n2.id}, a2.id: {n2.id, n3.id}, a3.id: {n2.id, n3.id} }) # adding an article to a project via a set does have effect s1 = amcattest.create_test_set(project=p1) s2 = amcattest.create_test_set(project=p2) s1.add(a4) s1.add(a2) AnalysisProject.objects.create(project=p3, analysis=n2) with self.checkMaxQueries(n=3): outcome = multidict(_get_analyses_per_article(articles)) self.assertEqual( outcome, { a1.id: {n1.id, n2.id}, a2.id: {n1.id, n2.id, n3.id}, a3.id: {n2.id, n3.id}, a4.id: {n1.id, n2.id} })
def set_up(self): super(TestQueryView, self).setUp() self.project = amcattest.create_test_project() self.user = self.project.insert_user self.aset1 = amcattest.create_test_set(2, project=self.project) self.aset2 = amcattest.create_test_set(3, project=self.project) self.project.favourite_articlesets.add(self.aset1) self.project.favourite_articlesets.add(self.aset2)
def setUp(self): Scraper.objects.all().delete() self.ds = Scraper.objects.create(module='amcat.tests.test_scraping', class_name='TestDatedScraper', run_daily=True, articleset=amcattest.create_test_set(name='TestArticleSet')) self.dbs =Scraper.objects.create(module='amcat.tests.test_scraping', class_name='TestDBScraper', run_daily=True, username='******', password='******', articleset=amcattest.create_test_set(name='TestArticleSet2')) self.project = amcattest.create_test_project(name='scrapetest')
def set_up(self): self.project = amcattest.create_test_project() self.a1 = amcattest.create_test_article(text="aap noot mies") self.a2 = amcattest.create_test_article(text="aap noot") self.a4 = amcattest.create_test_article(text="aap noot") self.a3 = amcattest.create_test_article(text="aap") self.a5 = amcattest.create_test_article(text="vuur") amcattest.create_test_set((self.a1, self.a2, self.a3, self.a4, self.a5), project=self.project) ES().flush()
def set_up(self): super(TestQueryView, self).setUp() self.project = amcattest.create_test_project() self.user = self.project.insert_user self.aset1 = amcattest.create_test_set(2, project=self.project) self.aset2 = amcattest.create_test_set(3, project=self.project) ProjectArticleSet.objects.update_or_create(project=self.project, articleset=self.aset1, defaults={'is_favourite': True}) ProjectArticleSet.objects.update_or_create(project=self.project, articleset=self.aset2, defaults={'is_favourite': True})
def test_articlesets(self): a, b, c = [amcattest.create_test_article() for _x in range(3)] s1 = amcattest.create_test_set(articles=[a, b, c]) s2 = amcattest.create_test_set(articles=[b, c]) s3 = amcattest.create_test_set(articles=[b]) ES().refresh() es_c = ES().get(c.id) self.assertEqual(set(es_c['sets']), {s1.id, s2.id}) ids = ES().query_ids(filters=dict(sets=s1.id)) self.assertEqual(set(ids), {a.id, b.id, c.id})
def test_all_articlesets(self): """Does getting all articlesets work?""" from django.db.models.query import QuerySet p1, p2 = [amcattest.create_test_project() for _x in [1,2]] a1 = amcattest.create_test_set(5, project=p1) a2 = amcattest.create_test_set(5, project=p2) self.assertEqual({a1}, set(p1.all_articlesets())) p1.articlesets.add(a2) self.assertEqual({a1, a2}, set(p1.all_articlesets())) self.assertTrue(isinstance(p1.all_articlesets(), QuerySet))
def test_all_articlesets(self): """Does getting all articlesets work?""" from django.db.models.query import QuerySet p1, p2 = [amcattest.create_test_project() for _x in [1,2]] a1 = amcattest.create_test_set(5, project=p1) a2 = amcattest.create_test_set(5, project=p2) self.assertEqual(set([a1]), set(p1.all_articlesets())) p1.articlesets.add(a2) self.assertEqual({a1, a2}, set(p1.all_articlesets())) self.assertTrue(isinstance(p1.all_articlesets(), QuerySet))
def set_up(self): self.project = amcattest.create_test_project() self.a1 = amcattest.create_test_article(text="aap noot mies") self.a2 = amcattest.create_test_article(text="aap noot") self.a4 = amcattest.create_test_article(text="aap noot") self.a3 = amcattest.create_test_article(text="aap") self.a5 = amcattest.create_test_article(text="vuur") amcattest.create_test_set( (self.a1, self.a2, self.a3, self.a4, self.a5), project=self.project) ES().refresh()
def test_get_mediums(self): set1 = amcattest.create_test_set(2) set2 = amcattest.create_test_set(2, project=set1.project) set3 = amcattest.create_test_set(2) [s.refresh_index() for s in [set1, set2, set3]] media = set(set1.project.get_mediums()) self.assertEqual( set(set1.project.get_mediums()), { a.medium for a in set1.articles.all() } | { a.medium for a in set2.articles.all() } ) # can we get_mediums on an empty project? self.assertEqual(list(amcattest.create_test_project().get_mediums()), [])
def setup(self): m1 = amcattest.create_test_medium(name="De Nep-Krant") m2, m3 = [amcattest.create_test_medium() for _ in range(2)] s1 = amcattest.create_test_set() s2 = amcattest.create_test_set() a = amcattest.create_test_article(text='aap noot mies', medium=m1, date='2001-01-01', create=False) b = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-01', create=False) c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', medium=m2, date='2002-01-01', create=False) d = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-03', create=False) e = amcattest.create_test_article(text='aap noot mies', medium=m3, articleset=s2) Article.create_articles([a, b, c, d], articleset=s1) ES().flush() return m1, m2, m3, s1, s2, a, b, c, d, e
def test_add_many(self): """Can we add a large number of articles from one set to another?""" s = amcattest.create_test_set() s2 = amcattest.create_test_set() p = amcattest.create_test_project() arts = [amcattest.create_test_article(project=p, create=False) for _x in range(1213)] Article.create_articles(arts, s) ES().refresh() self.assertEqual(len(arts), s.get_count()) s2.add_articles(arts) ES().refresh() self.assertEqual(len(arts), s2.get_count()) print(s2.get_count())
def setup(self): m1 = amcattest.create_test_medium(name="De Nep-Krant") m2, m3 = [amcattest.create_test_medium() for _ in range(2)] s1 = amcattest.create_test_set() s2 = amcattest.create_test_set() a = amcattest.create_test_article(text='aap noot mies', medium=m1, date='2001-01-01', create=False) b = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-01', create=False) c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', medium=m2, date='2002-01-01', create=False) d = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-03', create=False) e = amcattest.create_test_article(text='aap noot mies', medium=m3, articleset=s2) Article.create_articles([a, b, c, d], articleset=s1, check_duplicate=False, create_id=True) ES().flush() return m1, m2, m3, s1, s2, a, b, c, d, e
def test_permissions(self): from amcat.models import Role, ProjectRole metareader = Role.objects.get(label='metareader', projectlevel=True) reader = Role.objects.get(label='reader', projectlevel=True) p1 = amcattest.create_test_project(guest_role=None) p2 = amcattest.create_test_project(guest_role=metareader) s1 = amcattest.create_test_set(project=p1) s2 = amcattest.create_test_set(project=p2) p1.articlesets.add(s2) #alias aset_url = ArticleSetViewSet.get_url article_url = ArticleViewSet.get_url body = {'text' : 'bla', 'headline' : 'headline', 'date' : '2013-01-01T00:00:00', 'medium' : 'test_medium'} # anonymous user shoud be able to read p2's articlesets but not articles (requires READER), and nothing on p1 self.get(article_url(project=p1.id, articleset=s1.id), check_status=401) self.get(article_url(project=p2.id, articleset=s2.id), check_status=401) self.get(aset_url(project=p1.id), check_status=401) self.get(aset_url(project=p2.id), check_status=200) # it is illegal to view an articleset through a project it is not a member of self.get(article_url(project=p2.id, articleset=s1.id), check_status=404) u = p1.owner ProjectRole.objects.create(project=p2, user=u, role=reader) # User u shoud be able to view all views self.get(article_url(project=p1.id, articleset=s1.id), as_user=u, check_status=200) self.get(article_url(project=p1.id, articleset=s2.id), as_user=u, check_status=200) self.get(article_url(project=p2.id, articleset=s2.id), as_user=u, check_status=200) # Except this one, of course, because it doesn't exist self.get(article_url(project=p2.id, articleset=s1.id), as_user=u, check_status=404) self.get(aset_url(project=p1.id), as_user=u, check_status=200) self.get(aset_url(project=p2.id), as_user=u, check_status=200) # User u should be able to add articles to set 1 via project 1, but not p2/s2 self.post(article_url(project=p1.id, articleset=s1.id), body, as_user=u, check_status=201) self.post(article_url(project=p2.id, articleset=s2.id), body, as_user=u, check_status=403) # Neither u (p1.owner) nor p2.owner should be able to modify set 2 via project 1 self.post(article_url(project=p1.id, articleset=s2.id), body, as_user=u, check_status=403) self.post(article_url(project=p1.id, articleset=s2.id), body, as_user=p2.owner, check_status=403)
def test_refresh_index(self): """Are added/removed articles added/removed from the index?""" # TODO add/remove articles adds to index automatically (does remove?) # so refresh isn't really used. Rewrite to add to db manually s = amcattest.create_test_set() a = amcattest.create_test_article() s.add(a) self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s.id)))) s.refresh_index() self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id)))) # check adding of existing articles to a new set: s2 = amcattest.create_test_set() s2.add(a) s2.refresh_index() self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s2.id)))) # check that removing of articles from a set works and does not affect # other sets s2.remove_articles([a]) s2.refresh_index() self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s2.id)))) self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id)))) s.remove_articles([a]) self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id)))) s.refresh_index() self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s.id)))) # test that remove from index works for larger sets s = amcattest.create_test_set() arts = [amcattest.create_test_article(medium=a.medium) for i in range(20)] s.add(*arts) s.refresh_index() solr_ids = set(ES().query_ids(filters=dict(sets=s.id))) self.assertEqual(set(solr_ids), {a.id for a in arts}) s.remove_articles([arts[0]]) s.remove_articles([arts[-1]]) s.refresh_index() solr_ids = set(ES().query_ids(filters=dict(sets=s.id))) self.assertEqual(set(solr_ids), {a.id for a in arts[1:-1]}) # test that changing an article's properties can be reindexed arts[1].medium = amcattest.create_test_medium() arts[1].save()
def test_refresh_index(self): """Are added/removed articles added/removed from the index?""" # TODO add/remove articles adds to index automatically (does remove?) # so refresh isn't really used. Rewrite to add to db manually s = amcattest.create_test_set() a = amcattest.create_test_article() s.add(a) self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s.id)))) s.refresh_index() self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id)))) # check adding of existing articles to a new set: s2 = amcattest.create_test_set() s2.add(a) s2.refresh_index() self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s2.id)))) # check that removing of articles from a set works and does not affect # other sets s2.remove_articles([a]) s2.refresh_index() self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s2.id)))) self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id)))) s.remove_articles([a]) self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id)))) s.refresh_index() self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s.id)))) # test that remove from index works for larger sets s = amcattest.create_test_set() arts = [ amcattest.create_test_article(medium=a.medium) for i in range(20) ] s.add(*arts) s.refresh_index() solr_ids = set(ES().query_ids(filters=dict(sets=s.id))) self.assertEqual(set(solr_ids), {a.id for a in arts}) s.remove_articles([arts[0]]) s.remove_articles([arts[-1]]) s.refresh_index() solr_ids = set(ES().query_ids(filters=dict(sets=s.id))) self.assertEqual(set(solr_ids), {a.id for a in arts[1:-1]}) # test that changing an article's properties can be reindexed arts[1].medium = amcattest.create_test_medium() arts[1].save()
def test_clean_article_ids(self): p, _, form = self.get_form() aset = amcattest.create_test_set(1) article = aset.articles.all()[0] p.articlesets.add(aset) self.assertTrue(form.is_valid()) _, _, form = self.get_form(project=p, article_ids=str(article.id)) self.assertTrue(form.is_valid()) _, _, form = self.get_form(project=p, article_ids=str(article.id + 1)) self.assertTrue(form.is_valid()) article2 = amcattest.create_test_set(1).articles.all()[0] _, _, form = self.get_form(project=p, article_ids=str(article2.id)) self.assertFalse(form.is_valid())
def test_add_many(self): """Can we add a large number of articles from one set to another?""" s = amcattest.create_test_set() s2 = amcattest.create_test_set() m = amcattest.create_test_medium() p = amcattest.create_test_project() arts = [amcattest.create_test_article(project=p, medium=m, create=False) for _x in range(1213)] Article.create_articles(arts, s, create_id=True) ES().flush() self.assertEqual(len(arts), s.get_count()) s2.add_articles(arts, monitor=ProgressMonitor()) ES().flush() self.assertEqual(len(arts), s2.get_count()) print(s2.get_count())
def setUp(self): Scraper.objects.all().delete() self.ds = Scraper.objects.create( module='amcat.tests.test_scraping', class_name='TestDatedScraper', run_daily=True, articleset=amcattest.create_test_set(name='TestArticleSet')) self.dbs = Scraper.objects.create( module='amcat.tests.test_scraping', class_name='TestDBScraper', run_daily=True, username='******', password='******', articleset=amcattest.create_test_set(name='TestArticleSet2')) self.project = amcattest.create_test_project(name='scrapetest')
def test_clean_article_ids(self): p, _, form = self.get_form() aset = amcattest.create_test_set(1) article = aset.articles.all()[0] ProjectArticleSet.objects.create(project=p, articleset=aset, is_favourite=True) self.assertTrue(form.is_valid()) _, _, form = self.get_form(project=p, article_ids=str(article.id)) self.assertTrue(form.is_valid()) _, _, form = self.get_form(project=p, article_ids=str(article.id + 1)) self.assertTrue(form.is_valid()) article2 = amcattest.create_test_set(1).articles.all()[0] _, _, form = self.get_form(project=p, article_ids=str(article2.id)) self.assertFalse(form.is_valid())
def test_create(self): s = amcattest.create_test_set() # is the set empty? (aka can we get the results) url = ArticleViewSet.get_url(project=s.project.id, articleset=s.id) result = self.get(url) self.assertEqual(result['results'], []) body = {'text' : 'bla', 'headline' : 'headline', 'date' : '2013-01-01T00:00:00', 'medium' : 'test_medium'} result = self.post(url, body, as_user=s.project.owner) if isinstance(result, list): result, = result self.assertEqual(result['headline'], body['headline']) result = self.get(url) self.assertEqual(len(result['results']), 1) a = result['results'][0] self.assertEqual(a['headline'], body['headline']) self.assertEqual(a['project'], s.project_id) self.assertEqual(a['length'], 2) # Is the result added to the elastic index as well? from amcat.tools import amcates amcates.ES().flush() r = list(amcates.ES().query(filters=dict(sets=s.id), fields=["text", "headline", 'medium'])) self.assertEqual(len(r), 1) self.assertEqual(r[0].medium, "test_medium") self.assertEqual(r[0].headline, "headline")
def setUp(self): from django.core.files import File import os.path, json self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk') self.bzk = BZK(project=amcattest.create_test_project().id, file=File(open(os.path.join(self.dir, 'test.html'))), articleset=amcattest.create_test_set().id) self.result = self.bzk.run() def test_scrape_unit(self): self.assertTrue(self.result) def test_scrape_file(self): #props to check for: # headline, text, pagenr, section, medium, date must_props = ('headline', 'text', 'medium', 'date') may_props = ('pagenr', 'section') must_props = [[getattr(a, prop) for a in self.result] for prop in must_props] may_props = [[getattr(a, prop) for a in self.result] for prop in may_props] for proplist in must_props: self.assertTrue(all(proplist)) for proplist in may_props: #assuming at least one of the articles has the property. if not, break self.assertTrue(any(proplist))
def test_get_article_ids(self): aset = amcattest.create_test_set(10) ES().refresh() self.assertEqual(set(aset.articles.all().values_list("id", flat=True)), aset.get_article_ids()) self.assertEqual(set(aset.articles.all().values_list("id", flat=True)), aset.get_article_ids(use_elastic=True))
def test_to_medium_ids(self): arts = amcattest.create_test_set(2).articles.all() m1, m2 = amcattest.create_test_medium(), amcattest.create_test_medium() self.assertEqual(set(to_medium_ids(m1)), {m1.id,}) self.assertEqual(set(to_medium_ids([m1,m2])), {m1.id, m2.id}) self.assertEqual(set(to_medium_ids(Medium.objects.filter(id__in=[m1.id, m2.id]))), {m1.id, m2.id}) self.assertEqual(set(to_medium_ids(arts.values_list("medium__id", flat=True))), {a.medium_id for a in arts})
def test_create(self): """Can we create a set with some articles and retrieve the articles?""" s = amcattest.create_test_set() i = 7 for _x in range(i): s.add(amcattest.create_test_article()) self.assertEqual(i, len(s.articles.all()))
def test_parents(self): """Test parents via nesting""" s = amcattest.create_test_set() url = ArticleViewSet.get_url(project=s.project.id, articleset=s.id) base = { 'text': 'bla', 'headline': 'headline', 'date': '2013-01-01T00:00:00', 'medium': 'test_medium' } child1 = dict(base, headline='c1') child2 = dict(base, headline='c2') parent = dict(base, headline='parent') body = dict(parent, children=json.dumps([child1, child2])) self.post(url, body, as_user=s.project.owner) # result should have 3 articles, with c1 and c2 .parent set to parent result = {a['headline']: a for a in self.get(url)['results']} self.assertEqual(len(result), 3) self.assertEqual(result['c1']['parent'], result['parent']['id']) self.assertEqual(result['c2']['parent'], result['parent']['id']) self.assertEqual(result['parent']['parent'], None)
def test_full_refresh(self): "test full refresh, e.g. document content change" m1, m2 = [amcattest.create_test_medium() for _ in range(2)] a = amcattest.create_test_article(text='aap noot mies', medium=m1) s = amcattest.create_test_set() s.add(a) s.refresh_index() self.assertEqual( set(ES().query_ids(filters=dict(sets=s.id, mediumid=m1.id))), {a.id}) a.medium = m2 a.save() s.refresh_index(full_refresh=False) # a should NOT be reindexed self.assertEqual( set(ES().query_ids(filters=dict(sets=s.id, mediumid=m1.id))), {a.id}) self.assertEqual( set(ES().query_ids(filters=dict(sets=s.id, mediumid=m2.id))), set()) s.refresh_index(full_refresh=True) self.assertEqual( set(ES().query_ids(filters=dict(sets=s.id, mediumid=m1.id))), set()) self.assertEqual( set(ES().query_ids(filters=dict(sets=s.id, mediumid=m2.id))), {a.id})
def test_parents_multiple(self): """Can we add multiple objects with children?""" s = amcattest.create_test_set() url = ArticleViewSet.get_url(project=s.project.id, articleset=s.id) base = { 'text': 'bla', 'headline': 'headline', 'date': '2013-01-01T00:00:00', 'medium': 'test_medium' } child = dict(base, headline='c') parent = dict(base, headline='p') leaf = dict(base, headline='l') body = json.dumps([leaf, dict(parent, children=[child])]) self.post(url, body, as_user=s.project.owner, request_options=dict(content_type='application/json')) result = {a['headline']: a for a in self.get(url)['results']} self.assertEqual(len(result), 3) self.assertEqual(result['c']['parent'], result['p']['id']) self.assertEqual(result['p']['parent'], None) self.assertEqual(result['l']['parent'], None)
def test_create(self): s = amcattest.create_test_set() # is the set empty? (aka can we get the results) url = ArticleViewSet.get_url(project=s.project.id, articleset=s.id) result = self.get(url) self.assertEqual(result['results'], []) body = { 'text': 'bla', 'headline': 'headline', 'date': '2013-01-01T00:00:00', 'medium': 'test_medium' } result = self.post(url, body, as_user=s.project.owner) if isinstance(result, list): result, = result self.assertEqual(result['headline'], body['headline']) result = self.get(url) self.assertEqual(len(result['results']), 1) a = result['results'][0] self.assertEqual(a['headline'], body['headline']) self.assertEqual(a['project'], s.project_id) self.assertEqual(a['length'], 2) # Is the result added to the elastic index as well? from amcat.tools import amcates amcates.ES().flush() r = list(amcates.ES().query(filters=dict(sets=s.id), fields=["text", "headline", 'medium'])) self.assertEqual(len(r), 1) self.assertEqual(r[0].medium, "test_medium") self.assertEqual(r[0].headline, "headline")
def test_articleset_triggers(self): """Is a article added/removed from a set in the queue?""" a = amcattest.create_test_article() aset = amcattest.create_test_set() self._flush_queue() self.assertNotIn(a.id, self._all_articles()) aset.add(a) self.assertIn(a.id, self._all_articles()) self._flush_queue() aset.remove(a) self.assertIn(a.id, self._all_articles()) self._flush_queue() aid = a.id a.delete() self.assertIn(aid, self._all_articles()) b = amcattest.create_test_article() aset.add(b) self._flush_queue() aset.project = amcattest.create_test_project() aset.save() self.assertIn(b.id, self._all_articles())
def test_to_medium_ids(self): arts = amcattest.create_test_set(2).articles.all() m1, m2 = amcattest.create_test_medium(), amcattest.create_test_medium() self.assertEqual(set(to_medium_ids(m1)), {m1.id, }) self.assertEqual(set(to_medium_ids([m1, m2])), {m1.id, m2.id}) self.assertEqual(set(to_medium_ids(Medium.objects.filter(id__in=[m1.id, m2.id]))), {m1.id, m2.id}) self.assertEqual(set(to_medium_ids(arts.values_list("medium__id", flat=True))), {a.medium_id for a in arts})
def test_include_uncoded_sentences(self): aschema, acodebook, astrf, aintf, acodef, _, _ = amcattest.create_test_schema_with_fields( isarticleschema=True) sschema, scodebook, sstrf, sintf, scodef, _, _ = amcattest.create_test_schema_with_fields( isarticleschema=False) a1 = amcattest.create_test_article(text="Zin 1. Zin 2.") a2 = amcattest.create_test_article(text="Zin 1. Zin 2.") aset = amcattest.create_test_set([a1, a2]) cjob = amcattest.create_test_job(articleset=aset, articleschema=aschema, unitschema=sschema) sentence = list(get_or_create_sentences(a1))[1] coding = create_test_coding(codingjob=cjob, article=a1, sentence=sentence) coding.update_values({sstrf: "bla", sintf: 10}) # We expect 1 sentence if we only export codings fields = {sstrf: {}, sintf: {}, astrf: {}, aintf: {}} result = self._get_results([cjob], fields, include_uncoded_sentences=False, export_level=CODING_LEVEL_BOTH) self.assertEqual(1, len(result)) result = self._get_results([cjob], fields, include_uncoded_sentences=True, export_level=CODING_LEVEL_BOTH) self.assertEqual(3, len(result))
def setUp(self): from django.core.files import File import os.path, json self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk') self.bzk = BZK(project = amcattest.create_test_project().id, file = File(open(os.path.join(self.dir, 'test.html'))), articleset = amcattest.create_test_set().id) self.result = self.bzk.run() def test_scrape_unit(self): self.assertTrue(self.result) def test_scrape_file(self): #props to check for: # headline, text, pagenr, section, medium, date must_props = ('headline', 'text', 'medium', 'date') may_props = ('pagenr','section') must_props = [[getattr(a,prop) for a in self.result] for prop in must_props] may_props = [[getattr(a,prop) for a in self.result] for prop in may_props] for proplist in must_props: self.assertTrue(all(proplist)) for proplist in may_props: #assuming at least one of the articles has the property. if not, break self.assertTrue(any(proplist))
def setUp(self): if amcattest.skip_slow_tests(): return from django.core.files import File import os.path, json self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk') self.bzk = BZKPDFScraper(project=amcattest.create_test_project().id, file=File( open(os.path.join(self.dir, 'test.pdf'))), articleset=amcattest.create_test_set().id) self.result = self.bzk.run() def test_scrape_unit(self): if amcattest.skip_slow_tests(): return self.assertTrue(self.bzk.index) self.assertTrue(self.result) def test_getarticle(self): if amcattest.skip_slow_tests(): return #props to check for: # headline, text, date, pagenr, medium must_props = ('headline', 'text', 'medium', 'date') may_props = ('pagenr', ) must_props = [[getattr(a.props, prop) for a in self.result] for prop in must_props] may_props = [[getattr(a.props, prop) for a in self.result] for prop in may_props] for proplist in must_props: self.assertTrue(all(proplist)) for proplist in may_props: #assuming at least one of the articles has the property. if not, break. self.assertTrue(any(proplist))
def test_dates(self): base = dict(xAxis='date', yAxis='medium', counterType='numberOfArticles', datetype='all') a1 = amcattest.create_test_article(date='2001-01-01') a2 = amcattest.create_test_article(date='2001-03-02', medium=a1.medium) a3 = amcattest.create_test_article(date='2001-08-12', medium=a1.medium) aset = amcattest.create_test_set(articles=[a1, a2, a3]) aset.refresh_index() t = AggregationScript.run_script( dict(articlesets=[aset.id], projects=[aset.project_id], dateInterval='month', **base)) self.assertEqual(set(t.to_list(row_names=True, tuple_name=None)), {('2001-%02i' % i, int(i in (1, 3, 8))) for i in range(1, 9)}) t = AggregationScript.run_script( dict(articlesets=[aset.id], projects=[aset.project_id], dateInterval='quarter', **base)) self.assertEqual(set(t.to_list(row_names=True, tuple_name=None)), {('2001-1', 2), ('2001-2', 0), ('2001-3', 1)})
def setUp(self): from amcat.models.coding.coding import CodingValue # create a coding job set with a sensible schema and some articles to 'code' self.schema = amcattest.create_test_schema() self.codebook = amcattest.create_test_codebook() self.code = amcattest.create_test_code(label="CODED") self.codebook.add_code(self.code) texttype = CodingSchemaFieldType.objects.get(pk=1) inttype = CodingSchemaFieldType.objects.get(pk=2) codetype = CodingSchemaFieldType.objects.get(pk=5) create = CodingSchemaField.objects.create self.textfield = create(codingschema=self.schema, fieldnr=1, fieldtype=texttype, label="Text") self.intfield = create(codingschema=self.schema, fieldnr=2, fieldtype=inttype, label="Number") self.codefield = create(codingschema=self.schema, fieldnr=3, fieldtype=codetype, label="Code", codebook=self.codebook) self.users = [amcattest.create_test_user() for _x in range(2)] self.articles, self.jobs, self.asets = [], [], [] for i, user in enumerate([0, 0, 0, 0, 1]): aset = amcattest.create_test_set(articles=2 * (i + 1)) self.articles += list(aset.articles.all()) self.asets.append(aset) job = amcattest.create_test_job(articleschema=self.schema, unitschema=self.schema, coder=self.users[user], articleset=aset) self.jobs.append(job) self.an1 = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[0]) self.an2 = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[1]) self.an2.set_status(STATUS_COMPLETE) self.an2.comments = 'Makkie!' self.an2.save() sent = amcattest.create_test_sentence() self.sa1 = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[0], sentence=sent) self.sa2 = Coding.objects.create(codingjob=self.jobs[0], article=self.articles[0], sentence=sent) create = CodingValue.objects.create create(coding=self.sa1, field=self.intfield, intval=1) create(coding=self.sa1, field=self.textfield, strval="bla") create(coding=self.sa2, field=self.textfield, strval="blx") create(coding=self.sa1, field=self.codefield, intval=self.code.id)