def test_queries(self): """Test the list_queries context manager""" u = amcattest.create_test_user() with list_queries() as l: amcattest.create_test_project(owner=u) #query_list_to_table(l, output=print) self.assertEquals(len(l), 2) # create project, create role for owner
def test_filter(self): from amcat.models import Role from api.rest.resources import ProjectResource r = Role.objects.get(label='admin') p = amcattest.create_test_project(name="test") p2 = amcattest.create_test_project(name="not a test", guest_role=r) p3 = amcattest.create_test_project(name="anothertest") # no filter self.assertEqual(self._get_ids(ProjectResource), {p.id, p2.id, p3.id}) # Filter on simple fields: id, pk, and name self.assertEqual(self._get_ids(ProjectResource, id=p2.id), {p2.id}) self.assertEqual(self._get_ids(ProjectResource, name=p.name), {p.id}) self.assertEqual(self._get_ids(ProjectResource, pk=p.id), {p.id}) # Filter on directly related fields self.assertEqual(self._get_ids(ProjectResource, guest_role__id=r.id), {p2.id}) # Filter on 1-to-many field #aset = amcattest.create_test_set(project=p) #self.assertEqual(self._get_ids(ProjectResource, articlesets_set__id=aset.id), {p.id}) # Filter on more n-on-m field: project roles u = amcattest.create_test_user() self.assertEqual(self._get_ids(ProjectResource, projectrole__user__id=u.id), set()) from amcat.models import ProjectRole ProjectRole.objects.create(project=p3, user=u, role=r) self.assertEqual(self._get_ids(ProjectResource, projectrole__user__id=u.id), {p3.id}) # Filter on multiple values of same key. Expect them to be OR'ed. #self.assertEqual(self._get_ids(ProjectResource, id=[p.id, p2.id]), {p2.id, p.id}) self.assertEqual(self._get_ids(ProjectResource, pk=[p.id, p2.id]), {p2.id, p.id})
def todo_test_zip_file(self): from tempfile import NamedTemporaryFile, mkstemp from django.core.files import File # does _get_units perform normally with NamedTemporaryFile(prefix=u"upload_test", suffix=".txt") as f: f.write("Test") f.flush() s = UploadScript(project=amcattest.create_test_project().id, file=File(f)) self.assertEqual({u.name for u in s._get_units()}, {f.name}) # does a zip file work? #handle, fn = mkstemp(suffix=".zip") with NamedTemporaryFile(suffix=".zip") as f: with zipfile.ZipFile(f, "w") as zf: zf.writestr("test.txt", "TEST") zf.writestr("x/test.txt", "TAST") s = UploadScript(project=amcattest.create_test_project().id, file=File(f)) self.assertEqual({f.name for f in s._get_units()}, {"test.txt", "x/test.txt"}) self.assertEqual({f.read() for f in s._get_units()}, {"TEST", "TAST"})
def test_aggregation(self): """Can we create nice tables?""" p = amcattest.create_test_project() m1, m2 = [amcattest.create_test_medium() for x in [1,2]] arts1 = {amcattest.create_test_article(project=p, medium=m1) for i in range(5)} arts2 = {amcattest.create_test_article(project=p, medium=m2) for i in range(15)} aset = amcattest.create_test_set(project=p) aset.add_articles(arts1|arts2) aset.refresh_index() # can we select on mediumid self.assertEqual(self.list(projects=[p.id]), self.pks(arts1|arts2)) self.assertEqual(self.list(projects=[p.id], mediums=[m1.id]), self.pks(arts1)) # can we make a table? x = self.aggr(projects=[p.id], xAxis='medium') self.assertEqual(set(x), {(5,), (15,)}) # add second project with articles from first project in set p2 = amcattest.create_test_project() s = amcattest.create_test_set(project=p2) s.add(*(arts1|arts2)) x = self.aggr(projects=[p2.id], articlesets=[s.id], xAxis='medium')
def test_permissions(self): # articles should be visible if any of the sets it is in has the correct permissions role_metareader = Role.objects.get(label="metareader") role_reader = Role.objects.get(label="reader") user = amcattest.create_test_user(username="******", password="******") p1 = amcattest.create_test_project(name="p1") p2 = amcattest.create_test_project(name="p2", owner=user) s1 = amcattest.create_test_set(project=p1) a1 = amcattest.create_test_article(project=p1, articleset=s1, text="Dit is de tekst", title="hoofdlijn") client = Client() client.login(username="******", password="******") url = reverse("navigator:" + ArticleDetailsView.get_view_name(), args=[p1.id, s1.id, a1.id]) def test(url, can_view=True, can_read_article=True): response = client.get(url) self.assertEqual(response.status_code, 200 if can_view else 403) if can_view: self.assertEqual(response.context['can_view_text'], can_read_article) return response # fred can read it if p1 is reader p1.guest_role = role_reader p1.save() response = test(url) self.assertIn(b"Dit is de tekst", response.content) # but not if guest role is metareader p1.guest_role = role_metareader p1.save() response = test(url, can_read_article=False) self.assertNotIn(b"Dit is de tekst", response.content) self.assertIn(b"hoofdlijn", response.content) # and an error if there is no guest role at all p1.guest_role = None p1.save() test(url, can_view=False) # Unless the article set is added to project 2 (where Fred is owner) p2.articlesets.add(s1) test(url) # Also if project 1 has metareader as guest role p1.guest_role = role_metareader p1.save() test(url)
def test_permissions(self): # articles should be visible if any of the sets it is in has the correct permissions role_metareader = Role.objects.get(label="metareader", projectlevel=True) role_reader = Role.objects.get(label="reader", projectlevel=True) user = amcattest.create_test_user(username="******", password="******") p1 = amcattest.create_test_project(name="p1") p2 = amcattest.create_test_project(name="p2", owner=user) s1 = amcattest.create_test_set(project=p1) a1 = amcattest.create_test_article(project=p1, articleset=s1, text="Dit is de tekst", headline="hoofdlijn") client = Client() client.login(username="******", password="******") url = reverse("navigator:" + ArticleDetailsView.get_view_name(), args=[p1.id, s1.id, a1.id]) def test(url, can_view=True, can_read_article=True): response = client.get(url) self.assertEqual(response.status_code, 200 if can_view else 403) if can_view: self.assertEqual(response.context['can_view_text'], can_read_article) return response # fred can read it if p1 is reader p1.guest_role = role_reader p1.save() response = test(url) self.assertIn(b"Dit is de tekst", response.content) # but not if guest role is metareader p1.guest_role = role_metareader p1.save() response = test(url, can_read_article=False) self.assertNotIn(b"Dit is de tekst", response.content) self.assertIn(b"hoofdlijn", response.content) # and an error if there is no guest role at all p1.guest_role = None p1.save() test(url, can_view=False) # Unless the article set is added to project 2 (where Fred is owner) p2.articlesets.add(s1) test(url) # Also if project 1 has metareader as guest role p1.guest_role = role_metareader p1.save() test(url)
def get_articles(self, filename=default_file): project = amcattest.create_test_project() upload = create_test_upload(filename, project=project) articleset = amcattest.create_test_set().id fields = ['date', 'page_int', 'title', 'author', 'text', 'length_int', 'medium'] field_map = {k: {"type": "field", "value": k.split("_")[0]} for k in fields} form = dict(project=amcattest.create_test_project().id, articleset=articleset, field_map=json.dumps(field_map), encoding='utf-8', upload=upload.id) Factivia(**form).run() return ArticleSet.objects.get(pk=articleset).articles.all()
def test_page_size(self): from api.rest.resources import ProjectResource amcattest.create_test_project(name="t", description="t", insert_date="2011-01-01") amcattest.create_test_project(name="t2", description="t2", insert_date="2011-01-01") # Assumes that default page_size is greater or equal to 2.. self.assertEqual(len(self.get(ProjectResource)['results']), 2) res = self.get(ProjectResource, page_size=1) self.assertEqual(len(res['results']), 1) self.assertEqual(res['total'], 2) self.assertEqual(res['per_page'], 1)
def test_permissions(self): from amcat.models import Role, ProjectRole metareader = Role.objects.get(label='metareader', projectlevel=True) reader = Role.objects.get(label='reader', projectlevel=True) p1 = amcattest.create_test_project(guest_role=None) p2 = amcattest.create_test_project(guest_role=metareader) s1 = amcattest.create_test_set(project=p1) s2 = amcattest.create_test_set(project=p2) p1.articlesets.add(s2) #alias aset_url = ArticleSetViewSet.get_url article_url = ArticleViewSet.get_url body = {'text' : 'bla', 'headline' : 'headline', 'date' : '2013-01-01T00:00:00', 'medium' : 'test_medium'} # anonymous user shoud be able to read p2's articlesets but not articles (requires READER), and nothing on p1 self.get(article_url(project=p1.id, articleset=s1.id), check_status=401) self.get(article_url(project=p2.id, articleset=s2.id), check_status=401) self.get(aset_url(project=p1.id), check_status=401) self.get(aset_url(project=p2.id), check_status=200) # it is illegal to view an articleset through a project it is not a member of self.get(article_url(project=p2.id, articleset=s1.id), check_status=404) u = p1.owner ProjectRole.objects.create(project=p2, user=u, role=reader) # User u shoud be able to view all views self.get(article_url(project=p1.id, articleset=s1.id), as_user=u, check_status=200) self.get(article_url(project=p1.id, articleset=s2.id), as_user=u, check_status=200) self.get(article_url(project=p2.id, articleset=s2.id), as_user=u, check_status=200) # Except this one, of course, because it doesn't exist self.get(article_url(project=p2.id, articleset=s1.id), as_user=u, check_status=404) self.get(aset_url(project=p1.id), as_user=u, check_status=200) self.get(aset_url(project=p2.id), as_user=u, check_status=200) # User u should be able to add articles to set 1 via project 1, but not p2/s2 self.post(article_url(project=p1.id, articleset=s1.id), body, as_user=u, check_status=201) self.post(article_url(project=p2.id, articleset=s2.id), body, as_user=u, check_status=403) # Neither u (p1.owner) nor p2.owner should be able to modify set 2 via project 1 self.post(article_url(project=p1.id, articleset=s2.id), body, as_user=u, check_status=403) self.post(article_url(project=p1.id, articleset=s2.id), body, as_user=p2.owner, check_status=403)
def test_get_schemas(self): """Does get_schemas give the right results in the face of multiply imported schemas??""" p = amcattest.create_test_project() p2 = amcattest.create_test_project() p3 = amcattest.create_test_project() from django import forms cs = amcattest.create_test_schema(project=p) p.codingschemas.add(cs) p2.codingschemas.add(cs) class TestForm(forms.Form): c = forms.ModelChoiceField(queryset=p.get_codingschemas()) self.assertEqual(len(p.get_codingschemas().filter(pk=cs.id)), 1) self.assertEqual(len(p2.get_codingschemas().filter(pk=cs.id)), 1) self.assertEqual(len(p3.get_codingschemas().filter(pk=cs.id)), 0)
def setUp(self): if amcattest.skip_slow_tests(): return from django.core.files import File import os.path, json self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk') self.bzk = BZKPDFScraper(project = amcattest.create_test_project().id, file = File(open(os.path.join(self.dir, 'test.pdf'))), articleset = amcattest.create_test_set().id) self.result = self.bzk.run() def test_scrape_unit(self): if amcattest.skip_slow_tests(): return self.assertTrue(self.bzk.index) self.assertTrue(self.result) def test_getarticle(self): if amcattest.skip_slow_tests(): return #props to check for: # headline, text, date, pagenr, medium must_props = ('headline', 'text', 'medium', 'date') may_props = ('pagenr',) must_props = [[getattr(a.props, prop) for a in self.result] for prop in must_props] may_props = [[getattr(a.props, prop) for a in self.result] for prop in may_props] for proplist in must_props: self.assertTrue(all(proplist)) for proplist in may_props: #assuming at least one of the articles has the property. if not, break. self.assertTrue(any(proplist))
def test_post(self): """Test whether posting and retrieving an article works correctly""" self.set_up() p = amcattest.create_test_project(owner=self.user) s = amcattest.create_test_set(project=p) a = { 'date': datetime.datetime.now().isoformat(), 'headline': 'Test child', 'medium': 'Fantasy', 'text': 'Hello Universe', 'pagenr': 1, 'url': 'http://example.org', 'uuid': 'c691fadf-3c45-4ed6-93fe-f035b5f500af', } url = "/api/v4/projects/{p.id}/articlesets/{s.id}/articles/".format( **locals()) self.post(url, a, self.user) amcates.ES().flush() res = self.get(url)["results"] self.assertEqual(len(res), 1) self.assertEqual(res[0]["headline"], a['headline']) self.assertEqual(toolkit.readDate(res[0]["date"]), toolkit.readDate(a['date'])) self.assertEqual(res[0]["uuid"], a['uuid'])
def test_date_formats(self): dates = ( "2006-10-25", "2006/10/25", "25-10-2006", "25/10/2006", "Oct 25 2006", "Oct 25, 2006", "25 Oct 2006", "25 Oct, 2006", "October 25 2006", "October 25, 2006", "25 October 2006", "25 October, 2006" ) project = amcattest.create_test_project() for date in dates: form = SelectionForm(data={"start_date":date}, project=project) form.full_clean() self.assertFormValid(form, "Date: {}".format(repr(date))) self.assertEqual(datetime.date(2006, 10, 25), form.cleaned_data["start_date"].date()) for date in dates: form = SelectionForm(data={"on_date":date, "datetype": "on"}, project=project) form.full_clean() self.assertFormValid(form, "Date: {}".format(repr(date))) self.assertEqual(datetime.date(2006, 10, 25), form.cleaned_data["start_date"].date()) self.assertEqual(datetime.date(2006, 10, 25), form.cleaned_data["end_date"].date()) for date in dates: form = SelectionForm(data={"end_date":date}, project=project) form.full_clean() self.assertFormValid(form, "Date: {}".format(repr(date))) self.assertEqual(datetime.date(2006, 10, 25), form.cleaned_data["end_date"].date())
def test_get(self): p1 = amcattest.create_test_project(name="testnaam", description="testdescription", insert_date='2012-01-01') actual = self.get(ProjectResource, id=p1.id) actual_results = actual.pop("results") self.assertEqual(len(actual_results), 1) actual_results = actual_results[0] date = actual_results.pop('insert_date') readDate(date)# check valid date, not much more to check here? expected_results={u'insert_user': p1.insert_user.id, u'description': 'testdescription', u'name': u'testnaam', u'guest_role': 11, u'owner': p1.owner.id, u'active': True, u'id': p1.id, u'favourite' : False, } expected_meta = { u'page' : 1, u'next' : None, u'previous' : None, u'per_page' : 10, u'total' : 1, u'pages' : 1, u'echo' : None, } self.assertDictsEqual(actual, expected_meta) self.assertDictsEqual(actual_results, expected_results)
def test_elastic_hash(self): """Can we reproduce a hash from elastic data alone?""" article = Article(**{ "date": datetime.date(2015, 1, 1), "section": "\u6f22\u5b57", "pagenr": 1928390, "headline": "Headline hier.", "byline": "byline..", "length": 1928, "metastring": "Even more strange characters.. \x0C ..", "url": "https://example.com", "externalid": None, "author": None, "addressee": "Hmm", "text": "Contains invalid char \x08 woo", "medium": create_test_medium(name="abc."), "project": create_test_project() }) article.save() es = ES() es.add_articles([article.id]) hash = get_article_dict(article)["hash"] es.flush() es_articles = es.query_all(filters={"ids": [article.id]}, fields=HASH_FIELDS + ["hash"]) es_article = list(es_articles)[0] self.assertEqual(article.id, es_article.id) self.assertEqual(hash, es_article.hash) self.assertEqual(_get_hash(es_article.to_dict()), hash)
def test_post(self): """Test whether posting and retrieving an article works correctly""" self.set_up() p = amcattest.create_test_project(owner=self.user) s = amcattest.create_test_set(project=p) a = { 'date': datetime.datetime.now().isoformat(), 'headline': 'Test child', 'medium': 'Fantasy', 'text': 'Hello Universe', 'pagenr': 1, 'url': 'http://example.org', 'uuid': 'c691fadf-3c45-4ed6-93fe-f035b5f500af', } url = "/api/v4/projects/{p.id}/articlesets/{s.id}/articles/".format(**locals()) self.post(url, a, self.user) amcates.ES().flush() res = self.get(url)["results"] self.assertEqual(len(res), 1) self.assertEqual(res[0]["headline"], a['headline']) self.assertEqual(toolkit.readDate(res[0]["date"]), toolkit.readDate(a['date'])) self.assertEqual(res[0]["uuid"], a['uuid'])
def setUp(self): import os.path self.test_file = os.path.join(os.path.dirname(__file__), 'test_files', 'mediargus.txt') self.test_text = open(self.test_file).read().decode('latin-1') return self.script = Mediargus(project=amcattest.create_test_project().id, articleset=amcattest.create_test_set().id)
def _run_test_csv(header, rows, field_map, **options): project = amcattest.create_test_project() from tempfile import NamedTemporaryFile with NamedTemporaryFile(suffix=".txt", mode="w", encoding="utf-8") as f: w = csv.writer(f) for row in [header] + list(rows): w.writerow([field and field for field in row]) f.flush() u = create_test_upload(f.name, project=project) form = UploadForm( data={ "project": project.id, "field_map": json.dumps(field_map), "encoding": "utf-8", "upload": u.id, } ) form.full_clean() if not form.is_valid(): raise Exception(form.errors) set = CSV(form).run() return set.articles.all()
def test_project_user_add(self): project = amcattest.create_test_project() project_admin = User.objects.first() plebs = amcattest.create_test_user() admin = amcattest.create_test_user() admin.is_superuser = True admin.save() url = "/projects/{}/users/add/".format(project.id) # Adding a user as plebs should not be possible self.assertEqual(1, ProjectRole.objects.count()) admin_role = Role.objects.get(label="admin") response = self.post(url, plebs, {"role": admin_role.id, "user": plebs.id}) self.assertEqual(403, response.status_code) self.assertEqual(1, ProjectRole.objects.count()) # Adding as superuser should be possible reader_role = Role.objects.get(label="reader") response = self.post(url, admin, {"role": reader_role.id, "user": plebs.id}) self.assertEqual(302, response.status_code) self.assertEqual(2, ProjectRole.objects.count()) # Plebs gonna be plebs admin_role = Role.objects.get(label="admin") response = self.post(url, plebs, {"role": admin_role.id, "user": plebs.id}) self.assertEqual(403, response.status_code) self.assertEqual(2, ProjectRole.objects.count()) # Test remove if admin on project response = self.post(url, project_admin, {"role": "", "user": plebs.id}) self.assertEqual(302, response.status_code) self.assertEqual(1, ProjectRole.objects.count())
def test_date_formats(self): dates = ("2006-10-25", "2006/10/25", "25-10-2006", "25/10/2006", "Oct 25 2006", "Oct 25, 2006", "25 Oct 2006", "25 Oct, 2006", "October 25 2006", "October 25, 2006", "25 October 2006", "25 October, 2006") project = amcattest.create_test_project() for date in dates: p, c, form = self.get_form(start_date=date, project=project) form.full_clean() self.assertEqual(datetime.date(2006, 10, 25), form.cleaned_data["start_date"].date()) for date in dates: p, c, form = self.get_form(on_date=date, project=project, datetype="on") form.full_clean() self.assertEqual(datetime.date(2006, 10, 25), form.cleaned_data["start_date"].date()) self.assertEqual(datetime.date(2006, 10, 25), form.cleaned_data["end_date"].date()) for date in dates: p, c, form = self.get_form(end_date=date, project=project) form.full_clean() self.assertEqual(datetime.date(2006, 10, 25), form.cleaned_data["end_date"].date())
def test_children(self): p = amcattest.create_test_project() s = amcattest.create_test_set(project=p) # need to json dump the children because the django client does weird stuff with post data children = json.dumps([{ 'date': '2001-01-02', 'headline': 'Test child', 'medium': 'Fantasy', 'text': 'Hello Universe' }]) a = { 'date': '2001-01-01', 'headline': 'Test parent', 'medium': 'My Imagination', 'text': 'Hello World', 'children': children } url = "/api/v4/projects/{p.id}/articlesets/{s.id}/articles/".format( **locals()) self.post(url, a, as_user=self.user) amcates.ES().flush() res = self.get(url)["results"] headlines = {a['headline']: a for a in res} self.assertEqual(set(headlines), {'Test parent', 'Test child'}) self.assertEqual(headlines['Test child']['parent'], headlines['Test parent']['id'])
def test_split(self): articles = mediargus.Mediargus( project=amcattest.create_test_project().id).split_text( self.test_text) self.assertEqual(len(articles), 100) for article in articles: self.assertEqual(len(article), 2)
def setUp(self): from django.core.files import File import os.path self.project = amcattest.create_test_project() self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk') self.file_scrape1 = _rmcache(os.path.join(self.dir, 'test.html')) self.file_scrape2 = _rmcache(os.path.join(self.dir, 'test_scrape2.html'))
def test_body_to_article(self): header, body = self.split() articles = split_body(body) articles = [parse_article(a) for a in articles] # Only testing the first article. If this contains correct # data, we assume the implementation is correct. However, # we do test the remaining articles with full_clean(). art = body_to_article(*articles[0]) self.assertEquals(art.length, 306) self.assertEquals(art.headline, "This is a headline") self.assertEquals(art.byline, "with a byline. The article contains unicode characters.") self.assertEquals(art.text, articles[0][2]) self.assertEquals(art.date, datetime.datetime(2011, 8, 31)) self.assertEquals(art.medium.name, u"B\u00f6rsen-Zeitung") self.assertEquals(art.author, "MF Tokio") self.assertEquals(eval(art.metastring), {u'update': u'2. September 2011', u'language': u'GERMAN; DEUTSCH', u'publication-type': u'Zeitung'}) # Setup environment dp = amcattest.create_test_project() # Test remaining articles for art in articles[1:]: if art is None: continue self._create_medium(art[4]) p = body_to_article(*art) p.project = dp p.full_clean()
def test_filter_articlemeta(self): # Filter on date ranges and make sure normal filters still work p = amcattest.create_test_project(name="test") a1 = amcattest.create_test_article(project=p, date="2012-01-01") a2 = amcattest.create_test_article(project=p, date="2012-02-01") a3 = amcattest.create_test_article(project=p, date="2012-03-01") from api.rest.resources import ArticleMetaResource # filter on article set s = amcattest.create_test_set(articles=[a1, a2]) self.assertEqual(self._get_ids(ArticleMetaResource, articleset=s.id), {a1.id, a2.id}) # filter on dates self.assertEqual(self._get_ids(ArticleMetaResource, project=p.id), {a1.id, a2.id, a3.id}) self.assertEqual( self._get_ids(ArticleMetaResource, project=p.id, date='2012-01-01'), {a1.id}) self.assertEqual( self._get_ids(ArticleMetaResource, project=p.id, date_from='2012-01-15'), {a2.id, a3.id}) self.assertEqual( self._get_ids(ArticleMetaResource, project=p.id, date_to='2012-01-15'), {a1.id}) # Filter on multiple pk values #self.assertEqual(self._get_ids(ArticleMetaResource, pk_in=",".join(map(str, [a1.id, a2.id]))), {a1.id, a2.id}) self.assertEqual(self._get_ids(ArticleMetaResource, pk=[a1.id, a2.id]), {a1.id, a2.id})
def test_upload(self): """Test uploading with file map works and provenance is set correctly""" import os.path from django.core.files import File fields = ["date", "title", "length_int", "text", "section", "medium"] field_map = {f: dict(type='field', value=f) for f in fields} form = dict(project=amcattest.create_test_project().id, encoding="UTF-8", field_map=json.dumps(field_map), articleset_name="test set lexisnexis") aset = LexisNexis(filename=self.test_file, **form).run() articleset = ArticleSet.objects.get(pk=aset.id) arts = articleset.articles.all() self.assertEqual(len(arts), len(self.test_body_sols)) self.assertIn("LexisNexis query: '(((Japan OR Fukushima)", articleset.provenance) a = self.test_body_sols[1] b = articleset.articles.get(title=a['title']) self.assertEqual(a['text'], b.text) self.assertEqual(a['date'], str(b.date)) self.assertEqual(a['length_int'], b.properties['length_int']) self.assertEqual(a['medium'], b.properties['medium']) aset = LexisNexis(filename=self.test_file2, **form).run() articleset = ArticleSet.objects.get(pk=aset.id) # no query so provenance is the 'standard' message self.assertTrue( articleset.provenance.endswith("test2.txt' using LexisNexis"))
def setUp(self): if amcattest.skip_slow_tests(): return from django.core.files import File import os.path, json self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk') self.bzk = BZKPDFScraper(project=amcattest.create_test_project().id, file=File( open(os.path.join(self.dir, 'test.pdf'))), articleset=amcattest.create_test_set().id) self.result = self.bzk.run() def test_scrape_unit(self): if amcattest.skip_slow_tests(): return self.assertTrue(self.bzk.index) self.assertTrue(self.result) def test_getarticle(self): if amcattest.skip_slow_tests(): return #props to check for: # headline, text, date, pagenr, medium must_props = ('headline', 'text', 'medium', 'date') may_props = ('pagenr', ) must_props = [[getattr(a.props, prop) for a in self.result] for prop in must_props] may_props = [[getattr(a.props, prop) for a in self.result] for prop in may_props] for proplist in must_props: self.assertTrue(all(proplist)) for proplist in may_props: #assuming at least one of the articles has the property. if not, break. self.assertTrue(any(proplist))
def test_selection(self): """Can we select articles outside the project?""" # baseline: can we select articles in a project p = amcattest.create_test_project() arts = {amcattest.create_test_article(project=p) for i in range(10)} self.assertEqual(self.list(projects=[p.id]), arts) # add second project with articles from first project in set p2 = amcattest.create_test_project() s = amcattest.create_test_set(project=p2) s.add(*arts) # selecting on only project should give {}, but on set should give articles self.assertEqual(self.list(projects=[p2.id]), set()) self.assertEqual(self.list(projects=[p2.id], articlesets=[s.id]), arts)
def setUp(self): from django.core.files import File import os.path, json self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk') self.bzk = BZK(project = amcattest.create_test_project().id, file = File(open(os.path.join(self.dir, 'test.html'))), articleset = amcattest.create_test_set().id) self.result = self.bzk.run() def test_scrape_unit(self): self.assertTrue(self.result) def test_scrape_file(self): #props to check for: # headline, text, pagenr, section, medium, date must_props = ('headline', 'text', 'medium', 'date') may_props = ('pagenr','section') must_props = [[getattr(a,prop) for a in self.result] for prop in must_props] may_props = [[getattr(a,prop) for a in self.result] for prop in may_props] for proplist in must_props: self.assertTrue(all(proplist)) for proplist in may_props: #assuming at least one of the articles has the property. if not, break self.assertTrue(any(proplist))
def _run_test_csv(header, rows, field_map, **options): project = amcattest.create_test_project() from tempfile import NamedTemporaryFile from django.core.files import File with NamedTemporaryFile(suffix=".txt", mode="w", encoding="utf-8") as f: w = csv.writer(f) for row in [header] + list(rows): w.writerow([field and field for field in row]) f.flush() form = UploadForm( data={ "project": project.id, "field_map": json.dumps(field_map), "encoding": "UTF-8", "filename": f.name, } ) form.full_clean() if not form.is_valid(): raise Exception(form.errors) set = CSV(form).run() return set.articles.all()
def _set_up(self): # must be called manually. self.project = amcattest.create_test_project() now = datetime.datetime.now() articles = [ {"title": "one", "text": "two", "date": now, "properties": {"field": "three", "unique_id": "1"}}, {"title": "one", "text": "two", "date": now, "properties": {"field": "three", "unique_id": "2"}}, {"title": "title", "text": "text", "date": now, "properties": {"unique_id": "3"}}, {"title": "title", "text": "text", "date": now, "properties": {"unique_id": "4"}}, {"title": "title", "text": "text", "date": now, "properties": {"unique_id": "5"}}, ] self.test_set = amcattest.create_test_set(project=self.project) self.articles = [amcattest.create_test_article(articleset=self.test_set, **fields) for fields in articles] self.test_set.add(*self.articles) self.test_set.refresh_index(True) self.base_options = { "articleset": self.test_set.id, "ignore_fields": set(), "save_duplicates_to": None, "dry_run": False, } ES().refresh()
def test_articleset_triggers(self): """Is a article added/removed from a set in the queue?""" a = amcattest.create_test_article() aset = amcattest.create_test_set() self._flush_queue() self.assertNotIn(a.id, self._all_articles()) aset.add(a) self.assertIn(a.id, self._all_articles()) self._flush_queue() aset.remove(a) self.assertIn(a.id, self._all_articles()) self._flush_queue() aid = a.id a.delete() self.assertIn(aid, self._all_articles()) b = amcattest.create_test_article() aset.add(b) self._flush_queue() aset.project = amcattest.create_test_project() aset.save() self.assertIn(b.id, self._all_articles())
def test_date_formats(self): dates = ( "2006-10-25", "2006/10/25", "25-10-2006", "25/10/2006", "Oct 25 2006", "Oct 25, 2006", "25 Oct 2006", "25 Oct, 2006", "October 25 2006", "October 25, 2006", "25 October 2006", "25 October, 2006" ) project = amcattest.create_test_project() for date in dates: p, c, form = self.get_form(start_date=date, project=project) form.full_clean() self.assertEqual(datetime.date(2006, 10, 25), form.cleaned_data["start_date"].date()) for date in dates: p, c, form = self.get_form(on_date=date, project=project, datetype="on") form.full_clean() self.assertEqual(datetime.date(2006, 10, 25), form.cleaned_data["start_date"].date()) self.assertEqual(datetime.date(2006, 10, 25), form.cleaned_data["end_date"].date()) for date in dates: p, c, form = self.get_form(end_date=date, project=project) form.full_clean() self.assertEqual(datetime.date(2006, 10, 25), form.cleaned_data["end_date"].date())
def test_articles_preprocessing_reactivate(self): """Are deleted analyses undeleted when they are reactivated?""" p1 = amcattest.create_test_project() a1 = amcattest.create_test_article(project=p1) n1 = amcattest.create_test_analysis() AnalysisProject.objects.create(project=p1, analysis=n1) # baseline: check that required=actual gives a no-op aa = AnalysisArticle.objects.create(article=a1, analysis=n1) with self.checkMaxQueries(n=4): # 3 for needed, 1 for existing additions, restarts, deletions, undeletions = _get_articles_preprocessing_actions( [a1.id]) self.assertEqual(multidict(additions), {}) self.assertEqual(list(deletions), []) self.assertEqual(set(undeletions), set()) self.assertEqual(set(restarts), set()) # now set the aa to delete and see if it is reactivated aa.delete = True aa.save() with self.checkMaxQueries(n=4): # 3 for needed, 1 for existing additions, restarts, deletions, undeletions = _get_articles_preprocessing_actions( [a1.id]) self.assertEqual(multidict(additions), {}) self.assertEqual(list(deletions), []) self.assertEqual(set(undeletions), {aa.id}) self.assertEqual(set(restarts), set())
def setUp(self): project = amcattest.create_test_project() amcattest.create_test_set(name="foo", project=project) amcattest.create_test_set(name="bar", project=project) self.url = "/api/v4/projects/{project.id}/articlesets/?format=json" self.url = self.url.format(**locals())
def __init__(self, project=None,articleset=None, n=10): if project is None: project = amcattest.create_test_project() if articleset is None: articleset = amcattest.create_test_set() super(_TestScraper, self).__init__(articleset=articleset.id,project=project.id) self.n = n
def test_date_formats(self): dates = ("2006-10-25", "2006/10/25", "25-10-2006", "25/10/2006") project = amcattest.create_test_project() for date in dates: form = SelectionForm(data={"start_date": date, "datetype": "after"}, project=project) form.full_clean() start_date, end_date = form.get_date_range() self.assertFormValid(form, "Date: {}".format(repr(date))) self.assertEqual(datetime.date(2006, 10, 25), start_date.date()) for date in dates: form = SelectionForm(data={"on_date": date, "datetype": "on"}, project=project) form.full_clean() start_date, end_date = form.get_date_range() self.assertFormValid(form, "Date: {}".format(repr(date))) self.assertEqual(datetime.date(2006, 10, 25), start_date.date()) self.assertEqual(datetime.date(2006, 10, 25), end_date.date()) for date in dates: form = SelectionForm(data={"end_date": date, "datetype": "before"}, project=project) form.full_clean() start_date, end_date = form.get_date_range() self.assertFormValid(form, "Date: {}".format(repr(date))) self.assertEqual(datetime.date(2006, 10, 25), end_date.date())
def test_deduplication(self): """Does deduplication work as it is supposed to?""" art = dict(headline="test", byline="test", date='2001-01-01', medium=amcattest.create_test_medium(), project=amcattest.create_test_project(), ) a1 = amcattest.create_test_article(**art) def q(**filters): amcates.ES().flush() return set(amcates.ES().query_ids(filters=filters)) self.assertEqual(q(mediumid=art['medium']), {a1.id}) # duplicate articles should not be added a2 = amcattest.create_test_article(check_duplicate=True,**art) self.assertFalse(Article.objects.filter(pk=a2.id).exists()) self.assertEqual(a2.duplicate_of, a1.id) self.assertEqual(q(mediumid=art['medium']), {a1.id}) # however, if an articleset is given the 'existing' article # should be added to that set s1 = amcattest.create_test_set() a3 = amcattest.create_test_article(check_duplicate=True,articleset=s1, **art) self.assertFalse(Article.objects.filter(pk=a2.id).exists()) self.assertEqual(a3.duplicate_of, a1.id) self.assertEqual(q(mediumid=art['medium']), {a1.id}) self.assertEqual(set(s1.get_article_ids()), {a1.id}) self.assertEqual(q(sets=s1.id), {a1.id}) # can we suppress duplicate checking? a4 = amcattest.create_test_article(check_duplicate=False, **art) self.assertTrue(Article.objects.filter(pk=a4.id).exists()) self.assertFalse(hasattr(a4, 'duplicate_of')) self.assertIn(a4.id, q(mediumid=art['medium']))
def setUp(self): from django.core.files import File import os.path, json self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk') self.bzk = BZK(project=amcattest.create_test_project().id, file=File(open(os.path.join(self.dir, 'test.html'))), articleset=amcattest.create_test_set().id) self.result = self.bzk.run() def test_scrape_unit(self): self.assertTrue(self.result) def test_scrape_file(self): #props to check for: # headline, text, pagenr, section, medium, date must_props = ('headline', 'text', 'medium', 'date') may_props = ('pagenr', 'section') must_props = [[getattr(a, prop) for a in self.result] for prop in must_props] may_props = [[getattr(a, prop) for a in self.result] for prop in may_props] for proplist in must_props: self.assertTrue(all(proplist)) for proplist in may_props: #assuming at least one of the articles has the property. if not, break self.assertTrue(any(proplist))
def setUp(self): from django.core.files import File import os.path self.project = amcattest.create_test_project() self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk') self.file_scrape1 = os.path.join(self.dir, 'test.html') self.file_scrape2 = os.path.join(self.dir, 'test_scrape2.html')
def __init__(self, project=None, articleset=None, n=10): if project is None: project = amcattest.create_test_project() if articleset is None: articleset = amcattest.create_test_set() super(_TestScraper, self).__init__(articleset=articleset.id, project=project.id) self.n = n
def test_update_visited(self): u = amcattest.create_test_user() p = amcattest.create_test_project(owner=u) dt = datetime.now() (rp, _) = RecentProject.update_visited(u.userprofile, p, date_visited=dt) qs = RecentProject.objects.filter(user=u.userprofile, project=p, date_visited=dt) self.assertQuerysetEqual(qs, [repr(rp)])
def setUp(self): self.user = amcattest.create_test_user() self.project = amcattest.create_test_project(guest_role=None) ProjectRole.objects.create( user=self.user, project=self.project, role=Role.objects.get(id=ROLE_PROJECT_METAREADER))
def test_set(self): """Are scraped articles added to the set?""" p = amcattest.create_test_project() s = amcattest.create_test_set() c = SimpleController(s) ts = _TestScraper(project=p,articleset=s) c.scrape(ts) self.assertEqual(p.articles.count(), ts.n) self.assertEqual(s.articles.count(), ts.n)
def setUp(self): self.user = amcattest.create_test_user() self.project = amcattest.create_test_project(guest_role=None) ProjectRole.objects.create( user=self.user, project=self.project, role=Role.objects.get(id=ROLE_PROJECT_METAREADER) )
def set_up(self): super(TestQueryView, self).setUp() self.project = amcattest.create_test_project() self.user = self.project.insert_user self.aset1 = amcattest.create_test_set(2, project=self.project) self.aset2 = amcattest.create_test_set(3, project=self.project) self.project.favourite_articlesets.add(self.aset1) self.project.favourite_articlesets.add(self.aset2)
def setUp(self): Scraper.objects.all().delete() self.ds = Scraper.objects.create(module='amcat.tests.test_scraping', class_name='TestDatedScraper', run_daily=True articleset='TestArticleSet') self.dbs =Scraper.objects.create(module='amcat.tests.test_scraping', class_name='TestDBScraper', run_daily=True, username='******', password='******') self.project = amcattest.create_test_project(name='scrapetest')
def setUp(self): from django.core.files import File import os.path self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk') self.bzk = BZKEML(project=amcattest.create_test_project().id, file=File(open(os.path.join(self.dir, 'test.html'))), articleset=amcattest.create_test_set().id) self.result = self.bzk.run()
def test_get_triples_project(self): p = amcattest.create_test_project(name=u'\x92 bla') triples = set(get_triples(p)) subject = NS_AMCAT["Project/{p.id}".format(**locals())] for pred, obj in [ (NS_DC["identifier"], Literal(p.id)), (NS_DC["creator"], NS_AMCAT["User/{p.insert_user_id}".format(**locals())]), ]: self.assertIn((subject, pred, obj), triples)
def setUp(self): username = "******".format(uuid4()) user = create_test_user(username=username, password='******') self.project = amcattest.create_test_project(owner=user) self.cb = amcattest.create_test_codebook(project=self.project) from django.test import Client self.client = Client() success = self.client.login(username=username, password='******') self.assertTrue(success)
def test_analyses_per_article(self): p1, p2, p3 = [ amcattest.create_test_project(active=x < 2) for x in range(3) ] a1 = amcattest.create_test_article(project=p1) a2 = amcattest.create_test_article(project=p2) a3 = amcattest.create_test_article(project=p2) a4 = amcattest.create_test_article(project=p3) articles = {a1.id, a2.id, a3.id, a4.id} # baseline: no articles have any analysis with self.checkMaxQueries( n=3): # 2 for projects/article, 1 for analyses/project outcome = multidict(_get_analyses_per_article(articles)) self.assertEqual(outcome, {}) # let's add some analyses to the active projects n1, n2, n3 = [amcattest.create_test_analysis() for _x in range(3)] AnalysisProject.objects.create(project=p1, analysis=n1) AnalysisProject.objects.create(project=p1, analysis=n2) AnalysisProject.objects.create(project=p2, analysis=n2) AnalysisProject.objects.create(project=p2, analysis=n3) with self.checkMaxQueries(n=3): outcome = multidict(_get_analyses_per_article(articles)) self.assertEqual( outcome, { a1.id: {n1.id, n2.id}, a2.id: {n2.id, n3.id}, a3.id: {n2.id, n3.id} }) # adding an analysis to an inactive project has no effect AnalysisProject.objects.create(project=p3, analysis=n3) with self.checkMaxQueries(n=3): outcome = multidict(_get_analyses_per_article(articles)) self.assertEqual( outcome, { a1.id: {n1.id, n2.id}, a2.id: {n2.id, n3.id}, a3.id: {n2.id, n3.id} }) # adding an article to a project via a set does have effect s1 = amcattest.create_test_set(project=p1) s2 = amcattest.create_test_set(project=p2) s1.add(a4) s1.add(a2) AnalysisProject.objects.create(project=p3, analysis=n2) with self.checkMaxQueries(n=3): outcome = multidict(_get_analyses_per_article(articles)) self.assertEqual( outcome, { a1.id: {n1.id, n2.id}, a2.id: {n1.id, n2.id, n3.id}, a3.id: {n2.id, n3.id}, a4.id: {n1.id, n2.id} })
def test_set(self): """Are scraped articles added to the set?""" p = amcattest.create_test_project() s = amcattest.create_test_set() c = SimpleController(s) ts = _TestScraper(project=p, articleset=s) c.scrape(ts) self.assertEqual(p.articles.count(), ts.n) self.assertEqual(s.articles.count(), ts.n)