Example #1
0
    def set_up(self):
        self.a1 = amcattest.create_test_article()
        self.a1.text = "aap noot mies"
        self.a1.date = datetime.datetime(2010, 1, 1)
        self.a1.properties = {"author": "De Bas", "length_int": 5}
        self.a1.save()

        self.a2 = amcattest.create_test_article()
        self.a2.text = "aap noot geit"
        self.a2.date = datetime.datetime(2010, 1, 1)
        self.a2.properties = {"author": "Het Martijn", "length_int": 5}
        self.a2.save()

        self.a3 = amcattest.create_test_article()
        self.a3.text = "lamp"
        self.a3.date = datetime.datetime(2010, 1, 2)
        self.a3.properties = {"author": "Het Martijn", "length_int": 15}
        self.a3.save()

        self.aset1 = amcattest.create_test_set()
        self.aset1.add_articles([self.a1, self.a2])
        self.aset1.refresh_index(True)

        self.aset2 = amcattest.create_test_set()
        self.aset2.add_articles([self.a3])
        self.aset2.refresh_index(True)

        ES().refresh()
Example #2
0
    def setup(self):
        s1 = amcattest.create_test_set()
        s2 = amcattest.create_test_set()
        a = amcattest.create_test_article(text='aap noot mies',
                                          title='m1',
                                          date='2001-01-01',
                                          create=False)
        b = amcattest.create_test_article(text='noot mies wim zus',
                                          title='m2',
                                          date='2001-02-01',
                                          create=False)
        c = amcattest.create_test_article(text='mies bla bla bla wim zus jet',
                                          title='m2',
                                          date='2002-01-01',
                                          create=False)
        d = amcattest.create_test_article(text='noot mies wim zus',
                                          title='m2',
                                          date='2001-02-03',
                                          create=False)
        e = amcattest.create_test_article(text='aap noot mies',
                                          title='m3',
                                          articleset=s2)

        Article.create_articles([a, b, c, d], articleset=s1)
        ES().refresh()
        return s1, s2, a, b, c, d, e
Example #3
0
    def test_used_properties(self):
        a1 = amcattest.create_test_article(properties={
            "p1": "test",
            "p2_date": "2001-01-01"
        })
        a2 = amcattest.create_test_article(properties={
            "p1": "test",
            "p3_num": 15
        })
        a3 = amcattest.create_test_article(properties={
            "p1": "test",
            "p4": "test"
        })

        s1 = amcattest.create_test_set(articles=[a1])
        s2 = amcattest.create_test_set(articles=[a2])
        s3 = amcattest.create_test_set(articles=[a1, a3])
        ES().refresh()
        self.assertEqual(set(ES().get_used_properties([s1.id])),
                         {"p1", "p2_date"})
        self.assertEqual(set(ES().get_used_properties([s1.id, s2.id])),
                         {"p1", "p2_date", "p3_num"})
        self.assertEqual(set(ES().get_used_properties([s3.id])),
                         {"p1", "p2_date", "p4"})

        self.assertEqual(set(ES().get_used_properties([s1.id])),
                         {"p1", "p2_date"})
        self.assertEqual(set(ES().get_used_properties([s1.id, s2.id])),
                         {"p1", "p2_date", "p3_num"})
        self.assertEqual(set(ES().get_used_properties([s3.id])),
                         {"p1", "p2_date", "p4"})
    def test_aggregation(self):
        """Can we create nice tables?"""
        p = amcattest.create_test_project()
        m1, m2 = [amcattest.create_test_medium() for x in [1,2]]
        arts1 = {amcattest.create_test_article(project=p, medium=m1) for i in range(5)}
        arts2 = {amcattest.create_test_article(project=p, medium=m2) for i in range(15)}
        aset = amcattest.create_test_set(project=p)
        aset.add_articles(arts1|arts2)
        aset.refresh_index()


        # can we select on mediumid
        self.assertEqual(self.list(projects=[p.id]), self.pks(arts1|arts2))
        self.assertEqual(self.list(projects=[p.id], mediums=[m1.id]), self.pks(arts1))

        # can we make a table?
        x = self.aggr(projects=[p.id], xAxis='medium')
        self.assertEqual(set(x), {(5,), (15,)})

        
        # add second project with articles from first project in set
        p2 = amcattest.create_test_project()
        s = amcattest.create_test_set(project=p2)
        s.add(*(arts1|arts2))
        x = self.aggr(projects=[p2.id], articlesets=[s.id], xAxis='medium')
Example #5
0
    def test_relative_date_hash(self):
        aset1 = amcattest.create_test_set(2)
        aset2 = amcattest.create_test_set(2, project=aset1.project)
        project = aset1.project
        delta = datetime.timedelta(-9).total_seconds()

        def _get_form():
            _, _, form = self.get_form(
                project=project,
                articlesets=[aset1.id, aset2.id],
                relative_date=delta
            )
            return form

        hash1 = _get_form().get_hash()

        with mock_datetime_now(datetime.datetime.now() + datetime.timedelta(1)):
            hash2 = _get_form().get_hash()

        self.assertNotEqual(hash1, hash2)

        # also test the mock function just to be sure.
        with mock_datetime_now(datetime.datetime.now()):
            hash3 = _get_form().get_hash()
            self.assertEqual(datetime.datetime.__name__, "MockDateTimeToday")
        self.assertNotEqual(datetime.datetime.__name__, "MockDateTimeToday")

        self.assertEqual(hash1, hash3)
Example #6
0
    def set_up(self):
        self.a1 = amcattest.create_test_article()
        self.a1.text = "aap noot mies"
        self.a1.date = datetime.datetime(2010, 1, 1)
        self.a1.save()
        self.m1 = self.a1.medium

        self.a2 = amcattest.create_test_article()
        self.a2.text = "aap noot geit"
        self.a2.date = datetime.datetime(2010, 1, 1)
        self.a2.save()
        self.m2 = self.a2.medium

        self.a3 = amcattest.create_test_article()
        self.a3.text = "lamp"
        self.a3.date = datetime.datetime(2010, 1, 2)
        self.a3.save()
        self.m3 = self.a3.medium

        self.aset1 = amcattest.create_test_set()
        self.aset1.add_articles([self.a1, self.a2])
        self.aset1.refresh_index(True)

        self.aset2 = amcattest.create_test_set()
        self.aset2.add_articles([self.a3])
        self.aset2.refresh_index(True)

        ES().flush()
Example #7
0
    def test_hash(self):
        aset1 = amcattest.create_test_set(2)
        aset2 = amcattest.create_test_set(2, project=aset1.project)
        project = aset1.project
        _, _, form1 = self.get_form(
            project=project,
            articlesets=[aset1.id, aset2.id],
            article_ids="1\n2\n3",
            query="abc\ndefg"
        )

        _, _, form2 = self.get_form(
            project=project,
            articlesets=[aset2.id, aset1.id],
            article_ids="1\n3\n2",
            query="abc\ndifferent\nquery"
        )

        form1.full_clean()
        form2.full_clean()

        self.assertEqual(
            form1.get_hash(ignore_fields=("query",)),
            form2.get_hash(ignore_fields=("query",))
        )

        self.assertNotEqual(
            form1.get_hash(),
            form2.get_hash()
        )
Example #8
0
    def setUp(self):
        project = amcattest.create_test_project()
        amcattest.create_test_set(name="foo", project=project)
        amcattest.create_test_set(name="bar", project=project)

        self.url = "/api/v4/projects/{project.id}/articlesets/?format=json"
        self.url = self.url.format(**locals())
Example #9
0
    def setUp(self):
        project = amcattest.create_test_project()
        amcattest.create_test_set(name="foo", project=project)
        amcattest.create_test_set(name="bar", project=project)

        self.url = "/api/v4/projects/{project.id}/articlesets/?format=json"
        self.url = self.url.format(**locals())
Example #10
0
    def test_filters(self):
        """
        Do filters work properly?
        """
        m1, m2 = [amcattest.create_test_medium() for _ in range(2)]
        a = amcattest.create_test_article(text='aap noot mies', medium=m1, date="2001-01-01")
        b = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date="2002-01-01")
        c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', medium=m2, date="2003-01-01")

        s1 = amcattest.create_test_set(articles=[a, b, c])
        s2 = amcattest.create_test_set(articles=[a, b])
        ES().flush()

        q = lambda **filters: set(ES().query_ids(filters=filters))

        # MEDIUM FILTER
        self.assertEqual(q(mediumid=m2.id), {b.id, c.id})

        #### DATE FILTERS
        self.assertEqual(q(sets=s1.id, start_date='2001-06-01'), {b.id, c.id})
        # start is inclusive
        self.assertEqual(q(sets=s1.id, start_date='2002-01-01', end_date="2002-06-01"), {b.id})
        # end is exclusive
        self.assertEqual(q(sets=s1.id, start_date='2001-01-01', end_date="2003-01-01"), {a.id, b.id})

        # COMBINATION
        self.assertEqual(q(sets=s2.id, start_date='2001-06-01'), {b.id})
        self.assertEqual(q(end_date='2002-06-01', mediumid=m2.id), {b.id})
Example #11
0
 def test_delete(self):
     s = amcattest.create_test_set()
     sid = s.id
     s2 = amcattest.create_test_set()
     arts = [amcattest.create_test_article() for _x in range(10)]
     s.add_articles(arts[:8])
     s2.add_articles(arts[6:])
     ES().flush()
     s.delete()
     ES().flush()
     # articleset and articles only in that set are deleted
     self.assertRaises(ArticleSet.DoesNotExist,
                       ArticleSet.objects.get,
                       pk=sid)
     self.assertRaises(Article.DoesNotExist,
                       Article.objects.get,
                       pk=arts[0].id)
     # shared articles are not deleted
     self.assertEqual(Article.objects.get(pk=arts[6].id).id, arts[6].id)
     self.assertEqual(set(s2.articles.values_list("pk", flat=True)),
                      {a.id
                       for a in arts[6:]})
     # index is updated
     self.assertEqual(ES().count(filters={"sets": sid}), 0)
     self.assertEqual(ES().count(filters={"sets": s2.id}), 4)
     self.assertRaises(elasticsearch.NotFoundError, ES().get, arts[0].id)
     self.assertEqual(ES().get(arts[6].id)['id'], arts[6].id)
Example #12
0
    def test_filters(self):
        """
        Do filters work properly?
        """
        a = amcattest.create_test_article(text='aap noot mies', title='m1', date="2001-01-01")
        b = amcattest.create_test_article(text='noot mies wim zus', title='m2', date="2002-01-01")
        c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', title='m2', date="2003-01-01")

        s1 = amcattest.create_test_set(articles=[a, b, c])
        s2 = amcattest.create_test_set(articles=[a, b])
        ES().refresh()

        q = lambda **filters: set(ES().query_ids(filters=filters))

        # TITLE FILTER
        self.assertEqual(q(title='m2'), {b.id, c.id})

        #### DATE FILTERS
        self.assertEqual(q(sets=s1.id, start_date='2001-06-01'), {b.id, c.id})
        # start is inclusive
        self.assertEqual(q(sets=s1.id, start_date='2002-01-01', end_date="2002-06-01"), {b.id})
        # end is exclusive
        self.assertEqual(q(sets=s1.id, start_date='2001-01-01', end_date="2003-01-01"), {a.id, b.id})

        # COMBINATION
        self.assertEqual(q(sets=s2.id, start_date='2001-06-01'), {b.id})
        self.assertEqual(q(end_date='2002-06-01', title='m2'), {b.id})
Example #13
0
    def test_aggregate(self):
        """Can we make tables per medium/date interval?"""
        from amcat.models import Article
        m1 = amcattest.create_test_medium(name="De Nep-Krant")
        m2, m3 = [amcattest.create_test_medium() for _ in range(2)]
        s1 = amcattest.create_test_set()
        s2 = amcattest.create_test_set()
        unused = amcattest.create_test_article(text='aap noot mies', medium=m3, articleset=s2)
        a = amcattest.create_test_article(text='aap noot mies', medium=m1, date='2001-01-01', create=False)
        b = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-01', create=False)
        c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', medium=m2, date='2002-01-01', create=False)
        d = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-03', create=False)

        Article.create_articles([a,b,c,d], articleset=s1, check_duplicate=False, create_id=True)
        ES().flush()

        self.assertEqual(dict(ES().aggregate_query(filters=dict(sets=s1.id), group_by="mediumid")),
                         {m1.id : 1, m2.id : 3})

        self.assertEqual(dict(ES().aggregate_query(filters=dict(sets=s1.id), group_by="date", date_interval="year")),
                         {datetime(2001,1,1) : 3, datetime(2002,1,1) : 1})

        self.assertEqual(dict(ES().aggregate_query(filters=dict(sets=s1.id), group_by="date", date_interval="month")),
                         {datetime(2001,1,1) : 1, datetime(2002,1,1) : 1, datetime(2001,2,1) : 2})

        # set statistics
        stats = ES().statistics(filters=dict(sets=s1.id))
        self.assertEqual(stats.n, 4)
        self.assertEqual(stats.start_date, datetime(2001,1,1))
        self.assertEqual(stats.end_date, datetime(2002,1,1))

        # media list
        self.assertEqual(set(ES().list_media(filters=dict(sets=s1.id))),
                         {m1.id, m2.id})
Example #14
0
    def test_query_args_from_form(self):
        m = amcattest.create_test_medium()
        s1 = amcattest.create_test_set()
        s2 = amcattest.create_test_set()

        form = dict(sortColumn='',
                    useSolr=True,
                    start=100,
                    length=100,
                    articleids=[],
                    articlesets=[s1, s2],
                    mediums=[m],
                    projects=[],
                    columns=[
                        u'article_id', u'date', u'medium_id', u'medium_name',
                        u'headline'
                    ],
                    highlight=False,
                    columnInterval='month',
                    datetype='all',
                    sortOrder='')
        args = query_args_from_form(form)
        self.assertEqual(
            args,
            dict(start=100,
                 rows=100,
                 filters=[
                     u'mediumid:{m.id}'.format(**locals()),
                     u'sets:{s1.id} OR sets:{s2.id}'.format(**locals())
                 ]))
Example #15
0
    def test_list_media(self):
        """Test that list media works for more than 10 media"""
        from amcat.models import Article
        media = [amcattest.create_test_medium() for _ in range(20)]
        arts = [
            amcattest.create_test_article(medium=m, create=False)
            for m in media
        ]

        s1 = amcattest.create_test_set()
        Article.create_articles(arts[:5],
                                articleset=s1,
                                check_duplicate=False,
                                create_id=True)
        ES().flush()
        self.assertEqual(set(s1.get_mediums()), set(media[:5]))

        s2 = amcattest.create_test_set(project=s1.project)
        Article.create_articles(arts[5:],
                                articleset=s2,
                                check_duplicate=False,
                                create_id=True)
        ES().flush()
        self.assertEqual(set(s2.get_mediums()), set(media[5:]))

        self.assertEqual(set(s1.project.get_mediums()), set(media))
Example #16
0
    def set_up(self):
        self.a1 = amcattest.create_test_article()
        self.a1.text = "aap noot mies"
        self.a1.date = datetime.datetime(2010, 1, 1)
        self.a1.properties = {"author": "De Bas", "length_int": 5}
        self.a1.save()

        self.a2 = amcattest.create_test_article()
        self.a2.text = "aap noot geit"
        self.a2.date = datetime.datetime(2010, 1, 1)
        self.a2.properties = {"author": "Het Martijn", "length_int": 5}
        self.a2.save()

        self.a3 = amcattest.create_test_article()
        self.a3.text = "lamp"
        self.a3.date = datetime.datetime(2010, 1, 2)
        self.a3.properties = {"author": "Het Martijn", "length_int": 15}
        self.a3.save()

        self.aset1 = amcattest.create_test_set()
        self.aset1.add_articles([self.a1, self.a2])
        self.aset1.refresh_index(True)

        self.aset2 = amcattest.create_test_set()
        self.aset2.add_articles([self.a3])
        self.aset2.refresh_index(True)

        ES().refresh()
Example #17
0
    def test_filters(self):
        """
        Do filters work properly?
        """
        m1, m2 = [amcattest.create_test_medium() for _ in range(2)]
        a = amcattest.create_test_article(text='aap noot mies', medium=m1, date="2001-01-01")
        b = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date="2002-01-01")
        c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', medium=m2, date="2003-01-01")

        s1 = amcattest.create_test_set(articles=[a,b,c])
        s2 = amcattest.create_test_set(articles=[a,b])
        ES().flush()

        q = lambda **filters: set(ES().query_ids(filters=filters))

        # MEDIUM FILTER
        self.assertEqual(q(mediumid=m2.id), {b.id, c.id})

        #### DATE FILTERS
        self.assertEqual(q(sets=s1.id, start_date='2001-06-01'), {b.id, c.id})
        # start is inclusive
        self.assertEqual(q(sets=s1.id, start_date='2002-01-01', end_date="2002-06-01"), {b.id})
        # end is exclusive
        self.assertEqual(q(sets=s1.id, start_date='2001-01-01', end_date="2003-01-01"), {a.id, b.id})

        # COMBINATION
        self.assertEqual(q(sets=s2.id, start_date='2001-06-01'), {b.id})
        self.assertEqual(q(end_date='2002-06-01', mediumid=m2.id), {b.id})
Example #18
0
    def test_project_duplicates(self):
        triples = list(get_triples_project(self.project))
        self.assertEqual(len(triples), len(set(triples)), "Duplicate triples!")

        amcattest.create_test_set(project=self.project).add(self.article)

        triples = list(get_triples_project(self.project))
        self.assertEqual(len(triples), len(set(triples)), "Duplicate triples!")
Example #19
0
    def test_project_duplicates(self):
        triples = list(get_triples_project(self.project))
        self.assertEqual(len(triples), len(set(triples)), "Duplicate triples!")
        
        amcattest.create_test_set(project=self.project).add(self.article)

        triples = list(get_triples_project(self.project))
        self.assertEqual(len(triples), len(set(triples)), "Duplicate triples!")
Example #20
0
    def test_analyses_per_article(self):
        p1, p2, p3 = [
            amcattest.create_test_project(active=x < 2) for x in range(3)
        ]
        a1 = amcattest.create_test_article(project=p1)
        a2 = amcattest.create_test_article(project=p2)
        a3 = amcattest.create_test_article(project=p2)
        a4 = amcattest.create_test_article(project=p3)
        articles = {a1.id, a2.id, a3.id, a4.id}

        # baseline: no articles have any analysis
        with self.checkMaxQueries(
                n=3):  # 2 for projects/article, 1 for analyses/project
            outcome = multidict(_get_analyses_per_article(articles))
            self.assertEqual(outcome, {})

        # let's add some analyses to the active projects
        n1, n2, n3 = [amcattest.create_test_analysis() for _x in range(3)]
        AnalysisProject.objects.create(project=p1, analysis=n1)
        AnalysisProject.objects.create(project=p1, analysis=n2)
        AnalysisProject.objects.create(project=p2, analysis=n2)
        AnalysisProject.objects.create(project=p2, analysis=n3)
        with self.checkMaxQueries(n=3):
            outcome = multidict(_get_analyses_per_article(articles))
            self.assertEqual(
                outcome, {
                    a1.id: {n1.id, n2.id},
                    a2.id: {n2.id, n3.id},
                    a3.id: {n2.id, n3.id}
                })

        # adding an analysis to an inactive project has no effect
        AnalysisProject.objects.create(project=p3, analysis=n3)
        with self.checkMaxQueries(n=3):
            outcome = multidict(_get_analyses_per_article(articles))
            self.assertEqual(
                outcome, {
                    a1.id: {n1.id, n2.id},
                    a2.id: {n2.id, n3.id},
                    a3.id: {n2.id, n3.id}
                })

        # adding an article to a project via a set does have effect
        s1 = amcattest.create_test_set(project=p1)
        s2 = amcattest.create_test_set(project=p2)
        s1.add(a4)
        s1.add(a2)
        AnalysisProject.objects.create(project=p3, analysis=n2)
        with self.checkMaxQueries(n=3):
            outcome = multidict(_get_analyses_per_article(articles))
            self.assertEqual(
                outcome, {
                    a1.id: {n1.id, n2.id},
                    a2.id: {n1.id, n2.id, n3.id},
                    a3.id: {n2.id, n3.id},
                    a4.id: {n1.id, n2.id}
                })
Example #21
0
    def set_up(self):
        super(TestQueryView, self).setUp()

        self.project = amcattest.create_test_project()
        self.user = self.project.insert_user
        self.aset1 = amcattest.create_test_set(2, project=self.project)
        self.aset2 = amcattest.create_test_set(3, project=self.project)
        self.project.favourite_articlesets.add(self.aset1)
        self.project.favourite_articlesets.add(self.aset2)
Example #22
0
    def set_up(self):
        super(TestQueryView, self).setUp()

        self.project = amcattest.create_test_project()
        self.user = self.project.insert_user
        self.aset1 = amcattest.create_test_set(2, project=self.project)
        self.aset2 = amcattest.create_test_set(3, project=self.project)
        self.project.favourite_articlesets.add(self.aset1)
        self.project.favourite_articlesets.add(self.aset2)
Example #23
0
 def setUp(self):
     Scraper.objects.all().delete()
     self.ds = Scraper.objects.create(module='amcat.tests.test_scraping',
                                      class_name='TestDatedScraper', run_daily=True,
                                      articleset=amcattest.create_test_set(name='TestArticleSet'))
     self.dbs =Scraper.objects.create(module='amcat.tests.test_scraping',
                                      class_name='TestDBScraper', run_daily=True,
                                      username='******', password='******',
                                      articleset=amcattest.create_test_set(name='TestArticleSet2'))
     self.project = amcattest.create_test_project(name='scrapetest')
Example #24
0
    def set_up(self):
        self.project = amcattest.create_test_project()

        self.a1 = amcattest.create_test_article(text="aap noot mies")
        self.a2 = amcattest.create_test_article(text="aap noot")
        self.a4 = amcattest.create_test_article(text="aap noot")
        self.a3 = amcattest.create_test_article(text="aap")
        self.a5 = amcattest.create_test_article(text="vuur")

        amcattest.create_test_set((self.a1, self.a2, self.a3, self.a4, self.a5), project=self.project)
        ES().flush()
Example #25
0
    def set_up(self):
        super(TestQueryView, self).setUp()

        self.project = amcattest.create_test_project()
        self.user = self.project.insert_user
        self.aset1 = amcattest.create_test_set(2, project=self.project)
        self.aset2 = amcattest.create_test_set(3, project=self.project)
        ProjectArticleSet.objects.update_or_create(project=self.project, articleset=self.aset1,
                                                   defaults={'is_favourite': True})
        ProjectArticleSet.objects.update_or_create(project=self.project, articleset=self.aset2,
                                                   defaults={'is_favourite': True})
Example #26
0
    def setup(self):
        s1 = amcattest.create_test_set()
        s2 = amcattest.create_test_set()
        a = amcattest.create_test_article(text='aap noot mies', title='m1', date='2001-01-01', create=False)
        b = amcattest.create_test_article(text='noot mies wim zus', title='m2', date='2001-02-01', create=False)
        c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', title='m2', date='2002-01-01', create=False)
        d = amcattest.create_test_article(text='noot mies wim zus', title='m2', date='2001-02-03', create=False)
        e = amcattest.create_test_article(text='aap noot mies', title='m3', articleset=s2)

        Article.create_articles([a, b, c, d], articleset=s1)
        ES().refresh()
        return s1, s2, a, b, c, d, e
Example #27
0
    def test_articlesets(self):
        a, b, c = [amcattest.create_test_article() for _x in range(3)]
        s1 = amcattest.create_test_set(articles=[a, b, c])
        s2 = amcattest.create_test_set(articles=[b, c])
        s3 = amcattest.create_test_set(articles=[b])
        ES().refresh()

        es_c = ES().get(c.id)
        self.assertEqual(set(es_c['sets']), {s1.id, s2.id})

        ids = ES().query_ids(filters=dict(sets=s1.id))
        self.assertEqual(set(ids), {a.id, b.id, c.id})
Example #28
0
    def test_all_articlesets(self):
        """Does getting all articlesets work?"""
        from django.db.models.query import QuerySet

        p1, p2 = [amcattest.create_test_project() for _x in [1,2]]
        a1 = amcattest.create_test_set(5, project=p1)
        a2 = amcattest.create_test_set(5, project=p2)

        self.assertEqual({a1}, set(p1.all_articlesets()))
        p1.articlesets.add(a2)
        self.assertEqual({a1, a2}, set(p1.all_articlesets()))
        self.assertTrue(isinstance(p1.all_articlesets(), QuerySet))
Example #29
0
    def test_all_articlesets(self):
        """Does getting all articlesets work?"""
        from django.db.models.query import QuerySet

        p1, p2 = [amcattest.create_test_project() for _x in [1,2]]
        a1 = amcattest.create_test_set(5, project=p1)
        a2 = amcattest.create_test_set(5, project=p2)

        self.assertEqual(set([a1]), set(p1.all_articlesets()))
        p1.articlesets.add(a2)
        self.assertEqual({a1, a2}, set(p1.all_articlesets()))
        self.assertTrue(isinstance(p1.all_articlesets(), QuerySet))
Example #30
0
    def test_articlesets(self):
        a, b, c = [amcattest.create_test_article() for _x in range(3)]
        s1 = amcattest.create_test_set(articles=[a, b, c])
        s2 = amcattest.create_test_set(articles=[b, c])
        s3 = amcattest.create_test_set(articles=[b])
        ES().refresh()

        es_c = ES().get(c.id)
        self.assertEqual(set(es_c['sets']), {s1.id, s2.id})

        ids = ES().query_ids(filters=dict(sets=s1.id))
        self.assertEqual(set(ids), {a.id, b.id, c.id})
Example #31
0
    def set_up(self):
        self.project = amcattest.create_test_project()

        self.a1 = amcattest.create_test_article(text="aap noot mies")
        self.a2 = amcattest.create_test_article(text="aap noot")
        self.a4 = amcattest.create_test_article(text="aap noot")
        self.a3 = amcattest.create_test_article(text="aap")
        self.a5 = amcattest.create_test_article(text="vuur")

        amcattest.create_test_set(
            (self.a1, self.a2, self.a3, self.a4, self.a5),
            project=self.project)
        ES().refresh()
Example #32
0
    def test_get_mediums(self):
        set1 = amcattest.create_test_set(2)
        set2 = amcattest.create_test_set(2, project=set1.project)
        set3 = amcattest.create_test_set(2)
        [s.refresh_index() for s in [set1, set2, set3]]
        
        media = set(set1.project.get_mediums())
        self.assertEqual(
            set(set1.project.get_mediums()),
            { a.medium for a in set1.articles.all() } | { a.medium for a in set2.articles.all() }
        )

        # can we get_mediums on an empty project?
        self.assertEqual(list(amcattest.create_test_project().get_mediums()), [])
Example #33
0
    def setup(self):
        m1 = amcattest.create_test_medium(name="De Nep-Krant")
        m2, m3 = [amcattest.create_test_medium() for _ in range(2)]
        s1 = amcattest.create_test_set()
        s2 = amcattest.create_test_set()
        a = amcattest.create_test_article(text='aap noot mies', medium=m1, date='2001-01-01', create=False)
        b = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-01', create=False)
        c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', medium=m2, date='2002-01-01', create=False)
        d = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-03', create=False)
        e = amcattest.create_test_article(text='aap noot mies', medium=m3, articleset=s2)

        Article.create_articles([a, b, c, d], articleset=s1)
        ES().flush()
        return m1, m2, m3, s1, s2, a, b, c, d, e
Example #34
0
    def test_add_many(self):
        """Can we add a large number of articles from one set to another?"""
        s = amcattest.create_test_set()
        s2 = amcattest.create_test_set()
        p = amcattest.create_test_project()

        arts = [amcattest.create_test_article(project=p, create=False) for _x in range(1213)]
        Article.create_articles(arts, s)
        ES().refresh()
        self.assertEqual(len(arts), s.get_count())
        s2.add_articles(arts)
        ES().refresh()
        self.assertEqual(len(arts), s2.get_count())
        print(s2.get_count())
Example #35
0
    def test_get_mediums(self):
        set1 = amcattest.create_test_set(2)
        set2 = amcattest.create_test_set(2, project=set1.project)
        set3 = amcattest.create_test_set(2)
        [s.refresh_index() for s in [set1, set2, set3]]

        media = set(set1.project.get_mediums())
        self.assertEqual(
            set(set1.project.get_mediums()),
            { a.medium for a in set1.articles.all() } | { a.medium for a in set2.articles.all() }
        )

        # can we get_mediums on an empty project?
        self.assertEqual(list(amcattest.create_test_project().get_mediums()), [])
Example #36
0
    def setup(self):
        m1 = amcattest.create_test_medium(name="De Nep-Krant")
        m2, m3 = [amcattest.create_test_medium() for _ in range(2)]
        s1 = amcattest.create_test_set()
        s2 = amcattest.create_test_set()
        a = amcattest.create_test_article(text='aap noot mies', medium=m1, date='2001-01-01', create=False)
        b = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-01', create=False)
        c = amcattest.create_test_article(text='mies bla bla bla wim zus jet', medium=m2, date='2002-01-01', create=False)
        d = amcattest.create_test_article(text='noot mies wim zus', medium=m2, date='2001-02-03', create=False)
        e = amcattest.create_test_article(text='aap noot mies', medium=m3, articleset=s2)

        Article.create_articles([a, b, c, d], articleset=s1, check_duplicate=False, create_id=True)
        ES().flush()
        return m1, m2, m3, s1, s2, a, b, c, d, e
Example #37
0
    def test_permissions(self):
        from amcat.models import Role, ProjectRole
        metareader = Role.objects.get(label='metareader', projectlevel=True)
        reader = Role.objects.get(label='reader', projectlevel=True)

        p1 = amcattest.create_test_project(guest_role=None)
        p2 = amcattest.create_test_project(guest_role=metareader)

        s1 = amcattest.create_test_set(project=p1)
        s2 = amcattest.create_test_set(project=p2)

        p1.articlesets.add(s2)
        #alias

        aset_url = ArticleSetViewSet.get_url
        article_url = ArticleViewSet.get_url

        body = {'text' : 'bla', 'headline' : 'headline', 'date' : '2013-01-01T00:00:00', 'medium' : 'test_medium'}
        # anonymous user shoud be able to read p2's articlesets but not articles (requires READER), and nothing on p1

        self.get(article_url(project=p1.id, articleset=s1.id), check_status=401)
        self.get(article_url(project=p2.id, articleset=s2.id), check_status=401)

        self.get(aset_url(project=p1.id), check_status=401)
        self.get(aset_url(project=p2.id), check_status=200)

        # it is illegal to view an articleset through a project it is not a member of
        self.get(article_url(project=p2.id, articleset=s1.id), check_status=404)

        u = p1.owner
        ProjectRole.objects.create(project=p2, user=u, role=reader)

        # User u shoud be able to view all views
        self.get(article_url(project=p1.id, articleset=s1.id), as_user=u, check_status=200)
        self.get(article_url(project=p1.id, articleset=s2.id), as_user=u, check_status=200)
        self.get(article_url(project=p2.id, articleset=s2.id), as_user=u, check_status=200)
        # Except this one, of course, because it doesn't exist
        self.get(article_url(project=p2.id, articleset=s1.id), as_user=u, check_status=404)

        self.get(aset_url(project=p1.id), as_user=u, check_status=200)
        self.get(aset_url(project=p2.id), as_user=u, check_status=200)

        # User u should be able to add articles to set 1 via project 1, but not p2/s2
        self.post(article_url(project=p1.id, articleset=s1.id), body, as_user=u, check_status=201)
        self.post(article_url(project=p2.id, articleset=s2.id), body, as_user=u, check_status=403)

        # Neither u (p1.owner) nor p2.owner should be able to modify set 2 via project 1
        self.post(article_url(project=p1.id, articleset=s2.id), body, as_user=u, check_status=403)
        self.post(article_url(project=p1.id, articleset=s2.id), body, as_user=p2.owner, check_status=403)
Example #38
0
    def test_refresh_index(self):
        """Are added/removed articles added/removed from the index?"""
        # TODO add/remove articles adds to index automatically (does remove?)
        # so refresh isn't really used. Rewrite to add to db manually
        s = amcattest.create_test_set()
        a = amcattest.create_test_article()

        s.add(a)
        self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s.id))))
        s.refresh_index()
        self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id))))

        # check adding of existing articles to a new set:
        s2 = amcattest.create_test_set()
        s2.add(a)
        s2.refresh_index()
        self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s2.id))))
        # check that removing of articles from a set works and does not affect
        # other sets
        s2.remove_articles([a])
        s2.refresh_index()
        self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s2.id))))
        self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id))))



        s.remove_articles([a])
        self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id))))
        s.refresh_index()
        self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s.id))))

        # test that remove from index works for larger sets
        s = amcattest.create_test_set()
        arts = [amcattest.create_test_article(medium=a.medium) for i in range(20)]
        s.add(*arts)

        s.refresh_index()
        solr_ids = set(ES().query_ids(filters=dict(sets=s.id)))
        self.assertEqual(set(solr_ids), {a.id for a in arts})

        s.remove_articles([arts[0]])
        s.remove_articles([arts[-1]])
        s.refresh_index()
        solr_ids = set(ES().query_ids(filters=dict(sets=s.id)))
        self.assertEqual(set(solr_ids), {a.id for a in arts[1:-1]})

        # test that changing an article's properties can be reindexed
        arts[1].medium = amcattest.create_test_medium()
        arts[1].save()
Example #39
0
    def test_refresh_index(self):
        """Are added/removed articles added/removed from the index?"""
        # TODO add/remove articles adds to index automatically (does remove?)
        # so refresh isn't really used. Rewrite to add to db manually
        s = amcattest.create_test_set()
        a = amcattest.create_test_article()

        s.add(a)
        self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s.id))))
        s.refresh_index()
        self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id))))

        # check adding of existing articles to a new set:
        s2 = amcattest.create_test_set()
        s2.add(a)
        s2.refresh_index()
        self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s2.id))))
        # check that removing of articles from a set works and does not affect
        # other sets
        s2.remove_articles([a])
        s2.refresh_index()
        self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s2.id))))
        self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id))))

        s.remove_articles([a])
        self.assertEqual({a.id}, set(ES().query_ids(filters=dict(sets=s.id))))
        s.refresh_index()
        self.assertEqual(set(), set(ES().query_ids(filters=dict(sets=s.id))))

        # test that remove from index works for larger sets
        s = amcattest.create_test_set()
        arts = [
            amcattest.create_test_article(medium=a.medium) for i in range(20)
        ]
        s.add(*arts)

        s.refresh_index()
        solr_ids = set(ES().query_ids(filters=dict(sets=s.id)))
        self.assertEqual(set(solr_ids), {a.id for a in arts})

        s.remove_articles([arts[0]])
        s.remove_articles([arts[-1]])
        s.refresh_index()
        solr_ids = set(ES().query_ids(filters=dict(sets=s.id)))
        self.assertEqual(set(solr_ids), {a.id for a in arts[1:-1]})

        # test that changing an article's properties can be reindexed
        arts[1].medium = amcattest.create_test_medium()
        arts[1].save()
Example #40
0
    def test_clean_article_ids(self):
        p, _, form = self.get_form()
        aset = amcattest.create_test_set(1)
        article = aset.articles.all()[0]
        p.articlesets.add(aset)

        self.assertTrue(form.is_valid())
        _, _, form = self.get_form(project=p, article_ids=str(article.id))
        self.assertTrue(form.is_valid())
        _, _, form = self.get_form(project=p, article_ids=str(article.id + 1))
        self.assertTrue(form.is_valid())

        article2 = amcattest.create_test_set(1).articles.all()[0]
        _, _, form = self.get_form(project=p, article_ids=str(article2.id))
        self.assertFalse(form.is_valid())
Example #41
0
    def test_add_many(self):
        """Can we add a large number of articles from one set to another?"""
        s = amcattest.create_test_set()
        s2 = amcattest.create_test_set()
        m = amcattest.create_test_medium()
        p = amcattest.create_test_project()

        arts = [amcattest.create_test_article(project=p, medium=m, create=False) for _x in range(1213)]
        Article.create_articles(arts, s, create_id=True)
        ES().flush()
        self.assertEqual(len(arts), s.get_count())
        s2.add_articles(arts, monitor=ProgressMonitor())
        ES().flush()
        self.assertEqual(len(arts), s2.get_count())
        print(s2.get_count())
Example #42
0
    def test_clean_article_ids(self):
        p, _, form = self.get_form()
        aset = amcattest.create_test_set(1)
        article = aset.articles.all()[0]
        p.articlesets.add(aset)

        self.assertTrue(form.is_valid())
        _, _, form = self.get_form(project=p, article_ids=str(article.id))
        self.assertTrue(form.is_valid())
        _, _, form = self.get_form(project=p, article_ids=str(article.id + 1))
        self.assertTrue(form.is_valid())

        article2 = amcattest.create_test_set(1).articles.all()[0]
        _, _, form = self.get_form(project=p, article_ids=str(article2.id))
        self.assertFalse(form.is_valid())
Example #43
0
 def setUp(self):
     Scraper.objects.all().delete()
     self.ds = Scraper.objects.create(
         module='amcat.tests.test_scraping',
         class_name='TestDatedScraper',
         run_daily=True,
         articleset=amcattest.create_test_set(name='TestArticleSet'))
     self.dbs = Scraper.objects.create(
         module='amcat.tests.test_scraping',
         class_name='TestDBScraper',
         run_daily=True,
         username='******',
         password='******',
         articleset=amcattest.create_test_set(name='TestArticleSet2'))
     self.project = amcattest.create_test_project(name='scrapetest')
Example #44
0
    def test_clean_article_ids(self):
        p, _, form = self.get_form()
        aset = amcattest.create_test_set(1)
        article = aset.articles.all()[0]
        ProjectArticleSet.objects.create(project=p, articleset=aset, is_favourite=True)

        self.assertTrue(form.is_valid())
        _, _, form = self.get_form(project=p, article_ids=str(article.id))
        self.assertTrue(form.is_valid())
        _, _, form = self.get_form(project=p, article_ids=str(article.id + 1))
        self.assertTrue(form.is_valid())

        article2 = amcattest.create_test_set(1).articles.all()[0]
        _, _, form = self.get_form(project=p, article_ids=str(article2.id))
        self.assertFalse(form.is_valid())
    def test_create(self):
        s = amcattest.create_test_set()
                            
        # is the set empty? (aka can we get the results)
        url = ArticleViewSet.get_url(project=s.project.id, articleset=s.id)
        result = self.get(url)
        self.assertEqual(result['results'], [])

        body = {'text' : 'bla', 'headline' : 'headline', 'date' : '2013-01-01T00:00:00', 'medium' : 'test_medium'}
        
        result = self.post(url, body, as_user=s.project.owner)
        if isinstance(result, list): result, = result
        self.assertEqual(result['headline'], body['headline'])
        
        result = self.get(url)
        self.assertEqual(len(result['results']), 1)
        a = result['results'][0]
        self.assertEqual(a['headline'], body['headline'])
        self.assertEqual(a['project'], s.project_id)
        self.assertEqual(a['length'], 2)

        # Is the result added to the elastic index as well?
        from amcat.tools import amcates
        amcates.ES().flush()
        r = list(amcates.ES().query(filters=dict(sets=s.id), fields=["text", "headline", 'medium']))
        self.assertEqual(len(r), 1)
        self.assertEqual(r[0].medium, "test_medium")
        self.assertEqual(r[0].headline, "headline") 
Example #46
0
    def setUp(self):
        from django.core.files import File
        import os.path, json
        self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk')
        self.bzk = BZK(project=amcattest.create_test_project().id,
                       file=File(open(os.path.join(self.dir, 'test.html'))),
                       articleset=amcattest.create_test_set().id)
        self.result = self.bzk.run()

        def test_scrape_unit(self):
            self.assertTrue(self.result)

        def test_scrape_file(self):
            #props to check for:
            # headline, text, pagenr, section, medium, date
            must_props = ('headline', 'text', 'medium', 'date')
            may_props = ('pagenr', 'section')
            must_props = [[getattr(a, prop) for a in self.result]
                          for prop in must_props]
            may_props = [[getattr(a, prop) for a in self.result]
                         for prop in may_props]

            for proplist in must_props:
                self.assertTrue(all(proplist))
            for proplist in may_props:
                #assuming at least one of the articles has the property. if not, break
                self.assertTrue(any(proplist))
Example #47
0
    def test_get_article_ids(self):
        aset = amcattest.create_test_set(10)

        ES().refresh()

        self.assertEqual(set(aset.articles.all().values_list("id", flat=True)), aset.get_article_ids())
        self.assertEqual(set(aset.articles.all().values_list("id", flat=True)), aset.get_article_ids(use_elastic=True))
Example #48
0
 def test_to_medium_ids(self):
     arts = amcattest.create_test_set(2).articles.all()
     m1, m2 = amcattest.create_test_medium(), amcattest.create_test_medium()
     self.assertEqual(set(to_medium_ids(m1)), {m1.id,})
     self.assertEqual(set(to_medium_ids([m1,m2])), {m1.id, m2.id})
     self.assertEqual(set(to_medium_ids(Medium.objects.filter(id__in=[m1.id, m2.id]))), {m1.id, m2.id})
     self.assertEqual(set(to_medium_ids(arts.values_list("medium__id", flat=True))), {a.medium_id for a in arts})
Example #49
0
 def test_create(self):
     """Can we create a set with some articles and retrieve the articles?"""
     s = amcattest.create_test_set()
     i = 7
     for _x in range(i):
         s.add(amcattest.create_test_article())
     self.assertEqual(i, len(s.articles.all()))
Example #50
0
    def test_parents(self):
        """Test parents via nesting"""

        s = amcattest.create_test_set()
        url = ArticleViewSet.get_url(project=s.project.id, articleset=s.id)
        base = {
            'text': 'bla',
            'headline': 'headline',
            'date': '2013-01-01T00:00:00',
            'medium': 'test_medium'
        }

        child1 = dict(base, headline='c1')
        child2 = dict(base, headline='c2')
        parent = dict(base, headline='parent')

        body = dict(parent, children=json.dumps([child1, child2]))
        self.post(url, body, as_user=s.project.owner)

        # result should have 3 articles, with c1 and c2 .parent set to parent
        result = {a['headline']: a for a in self.get(url)['results']}
        self.assertEqual(len(result), 3)
        self.assertEqual(result['c1']['parent'], result['parent']['id'])
        self.assertEqual(result['c2']['parent'], result['parent']['id'])
        self.assertEqual(result['parent']['parent'], None)
Example #51
0
    def test_full_refresh(self):
        "test full refresh, e.g. document content change"
        m1, m2 = [amcattest.create_test_medium() for _ in range(2)]
        a = amcattest.create_test_article(text='aap noot mies', medium=m1)
        s = amcattest.create_test_set()
        s.add(a)
        s.refresh_index()
        self.assertEqual(
            set(ES().query_ids(filters=dict(sets=s.id, mediumid=m1.id))),
            {a.id})

        a.medium = m2
        a.save()
        s.refresh_index(full_refresh=False)  # a should NOT be reindexed
        self.assertEqual(
            set(ES().query_ids(filters=dict(sets=s.id, mediumid=m1.id))),
            {a.id})
        self.assertEqual(
            set(ES().query_ids(filters=dict(sets=s.id, mediumid=m2.id))),
            set())

        s.refresh_index(full_refresh=True)
        self.assertEqual(
            set(ES().query_ids(filters=dict(sets=s.id, mediumid=m1.id))),
            set())
        self.assertEqual(
            set(ES().query_ids(filters=dict(sets=s.id, mediumid=m2.id))),
            {a.id})
Example #52
0
    def test_parents_multiple(self):
        """Can we add multiple objects with children?"""
        s = amcattest.create_test_set()
        url = ArticleViewSet.get_url(project=s.project.id, articleset=s.id)
        base = {
            'text': 'bla',
            'headline': 'headline',
            'date': '2013-01-01T00:00:00',
            'medium': 'test_medium'
        }

        child = dict(base, headline='c')
        parent = dict(base, headline='p')
        leaf = dict(base, headline='l')

        body = json.dumps([leaf, dict(parent, children=[child])])
        self.post(url,
                  body,
                  as_user=s.project.owner,
                  request_options=dict(content_type='application/json'))

        result = {a['headline']: a for a in self.get(url)['results']}
        self.assertEqual(len(result), 3)
        self.assertEqual(result['c']['parent'], result['p']['id'])
        self.assertEqual(result['p']['parent'], None)
        self.assertEqual(result['l']['parent'], None)
Example #53
0
    def test_create(self):
        s = amcattest.create_test_set()

        # is the set empty? (aka can we get the results)
        url = ArticleViewSet.get_url(project=s.project.id, articleset=s.id)
        result = self.get(url)
        self.assertEqual(result['results'], [])

        body = {
            'text': 'bla',
            'headline': 'headline',
            'date': '2013-01-01T00:00:00',
            'medium': 'test_medium'
        }

        result = self.post(url, body, as_user=s.project.owner)
        if isinstance(result, list): result, = result
        self.assertEqual(result['headline'], body['headline'])

        result = self.get(url)
        self.assertEqual(len(result['results']), 1)
        a = result['results'][0]
        self.assertEqual(a['headline'], body['headline'])
        self.assertEqual(a['project'], s.project_id)
        self.assertEqual(a['length'], 2)

        # Is the result added to the elastic index as well?
        from amcat.tools import amcates
        amcates.ES().flush()
        r = list(amcates.ES().query(filters=dict(sets=s.id),
                                    fields=["text", "headline", 'medium']))
        self.assertEqual(len(r), 1)
        self.assertEqual(r[0].medium, "test_medium")
        self.assertEqual(r[0].headline, "headline")
Example #54
0
    def test_articleset_triggers(self):
        """Is a article added/removed from a set in the queue?"""

        a = amcattest.create_test_article()
        aset = amcattest.create_test_set()
        self._flush_queue()
        self.assertNotIn(a.id, self._all_articles())

        aset.add(a)
        self.assertIn(a.id, self._all_articles())

        self._flush_queue()
        aset.remove(a)
        self.assertIn(a.id, self._all_articles())

        self._flush_queue()
        aid = a.id
        a.delete()
        self.assertIn(aid, self._all_articles())

        b = amcattest.create_test_article()
        aset.add(b)
        self._flush_queue()
        aset.project = amcattest.create_test_project()
        aset.save()
        self.assertIn(b.id, self._all_articles())
Example #55
0
 def test_to_medium_ids(self):
     arts = amcattest.create_test_set(2).articles.all()
     m1, m2 = amcattest.create_test_medium(), amcattest.create_test_medium()
     self.assertEqual(set(to_medium_ids(m1)), {m1.id, })
     self.assertEqual(set(to_medium_ids([m1, m2])), {m1.id, m2.id})
     self.assertEqual(set(to_medium_ids(Medium.objects.filter(id__in=[m1.id, m2.id]))), {m1.id, m2.id})
     self.assertEqual(set(to_medium_ids(arts.values_list("medium__id", flat=True))), {a.medium_id for a in arts})
    def test_include_uncoded_sentences(self):
        aschema, acodebook, astrf, aintf, acodef, _, _ = amcattest.create_test_schema_with_fields(
            isarticleschema=True)
        sschema, scodebook, sstrf, sintf, scodef, _, _ = amcattest.create_test_schema_with_fields(
            isarticleschema=False)
        a1 = amcattest.create_test_article(text="Zin 1. Zin 2.")
        a2 = amcattest.create_test_article(text="Zin 1. Zin 2.")
        aset = amcattest.create_test_set([a1, a2])
        cjob = amcattest.create_test_job(articleset=aset,
                                         articleschema=aschema,
                                         unitschema=sschema)

        sentence = list(get_or_create_sentences(a1))[1]
        coding = create_test_coding(codingjob=cjob,
                                    article=a1,
                                    sentence=sentence)
        coding.update_values({sstrf: "bla", sintf: 10})

        # We expect 1 sentence if we only export codings
        fields = {sstrf: {}, sintf: {}, astrf: {}, aintf: {}}
        result = self._get_results([cjob],
                                   fields,
                                   include_uncoded_sentences=False,
                                   export_level=CODING_LEVEL_BOTH)
        self.assertEqual(1, len(result))

        result = self._get_results([cjob],
                                   fields,
                                   include_uncoded_sentences=True,
                                   export_level=CODING_LEVEL_BOTH)
        self.assertEqual(3, len(result))
    def setUp(self):
        from django.core.files import File
        import os.path, json
        self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk')
        self.bzk = BZK(project = amcattest.create_test_project().id,
                  file = File(open(os.path.join(self.dir, 'test.html'))),
                  articleset = amcattest.create_test_set().id)
        self.result = self.bzk.run()

        def test_scrape_unit(self):
            self.assertTrue(self.result)
        
        def test_scrape_file(self):
            #props to check for:
            # headline, text, pagenr, section, medium, date
            must_props = ('headline', 'text', 'medium', 'date')
            may_props = ('pagenr','section')
            must_props = [[getattr(a,prop) for a in self.result] for prop in must_props]
            may_props = [[getattr(a,prop) for a in self.result] for prop in may_props]

            for proplist in must_props:
                self.assertTrue(all(proplist))
            for proplist in may_props:
                #assuming at least one of the articles has the property. if not, break
                self.assertTrue(any(proplist))
Example #58
0
    def setUp(self):
        if amcattest.skip_slow_tests(): return

        from django.core.files import File
        import os.path, json
        self.dir = os.path.join(os.path.dirname(__file__), 'test_files', 'bzk')
        self.bzk = BZKPDFScraper(project=amcattest.create_test_project().id,
                                 file=File(
                                     open(os.path.join(self.dir, 'test.pdf'))),
                                 articleset=amcattest.create_test_set().id)
        self.result = self.bzk.run()

        def test_scrape_unit(self):
            if amcattest.skip_slow_tests(): return

            self.assertTrue(self.bzk.index)
            self.assertTrue(self.result)

        def test_getarticle(self):
            if amcattest.skip_slow_tests(): return

            #props to check for:
            # headline, text, date, pagenr, medium
            must_props = ('headline', 'text', 'medium', 'date')
            may_props = ('pagenr', )
            must_props = [[getattr(a.props, prop) for a in self.result]
                          for prop in must_props]
            may_props = [[getattr(a.props, prop) for a in self.result]
                         for prop in may_props]

            for proplist in must_props:
                self.assertTrue(all(proplist))
            for proplist in may_props:
                #assuming at least one of the articles has the property. if not, break.
                self.assertTrue(any(proplist))
Example #59
0
    def test_dates(self):

        base = dict(xAxis='date',
                    yAxis='medium',
                    counterType='numberOfArticles',
                    datetype='all')

        a1 = amcattest.create_test_article(date='2001-01-01')
        a2 = amcattest.create_test_article(date='2001-03-02', medium=a1.medium)
        a3 = amcattest.create_test_article(date='2001-08-12', medium=a1.medium)
        aset = amcattest.create_test_set(articles=[a1, a2, a3])
        aset.refresh_index()

        t = AggregationScript.run_script(
            dict(articlesets=[aset.id],
                 projects=[aset.project_id],
                 dateInterval='month',
                 **base))

        self.assertEqual(set(t.to_list(row_names=True, tuple_name=None)),
                         {('2001-%02i' % i, int(i in (1, 3, 8)))
                          for i in range(1, 9)})

        t = AggregationScript.run_script(
            dict(articlesets=[aset.id],
                 projects=[aset.project_id],
                 dateInterval='quarter',
                 **base))
        self.assertEqual(set(t.to_list(row_names=True, tuple_name=None)),
                         {('2001-1', 2), ('2001-2', 0), ('2001-3', 1)})
Example #60
0
    def setUp(self):
        from amcat.models.coding.coding import CodingValue
        # create a coding job set with a sensible schema and some articles to 'code'
        self.schema = amcattest.create_test_schema()
        self.codebook = amcattest.create_test_codebook()
        self.code = amcattest.create_test_code(label="CODED")
        self.codebook.add_code(self.code)

        texttype = CodingSchemaFieldType.objects.get(pk=1)
        inttype = CodingSchemaFieldType.objects.get(pk=2)
        codetype = CodingSchemaFieldType.objects.get(pk=5)

        create = CodingSchemaField.objects.create
        self.textfield = create(codingschema=self.schema,
                                fieldnr=1,
                                fieldtype=texttype,
                                label="Text")
        self.intfield = create(codingschema=self.schema,
                               fieldnr=2,
                               fieldtype=inttype,
                               label="Number")
        self.codefield = create(codingschema=self.schema,
                                fieldnr=3,
                                fieldtype=codetype,
                                label="Code",
                                codebook=self.codebook)

        self.users = [amcattest.create_test_user() for _x in range(2)]

        self.articles, self.jobs, self.asets = [], [], []
        for i, user in enumerate([0, 0, 0, 0, 1]):
            aset = amcattest.create_test_set(articles=2 * (i + 1))
            self.articles += list(aset.articles.all())
            self.asets.append(aset)
            job = amcattest.create_test_job(articleschema=self.schema,
                                            unitschema=self.schema,
                                            coder=self.users[user],
                                            articleset=aset)
            self.jobs.append(job)

        self.an1 = Coding.objects.create(codingjob=self.jobs[0],
                                         article=self.articles[0])
        self.an2 = Coding.objects.create(codingjob=self.jobs[0],
                                         article=self.articles[1])
        self.an2.set_status(STATUS_COMPLETE)
        self.an2.comments = 'Makkie!'
        self.an2.save()

        sent = amcattest.create_test_sentence()
        self.sa1 = Coding.objects.create(codingjob=self.jobs[0],
                                         article=self.articles[0],
                                         sentence=sent)
        self.sa2 = Coding.objects.create(codingjob=self.jobs[0],
                                         article=self.articles[0],
                                         sentence=sent)
        create = CodingValue.objects.create
        create(coding=self.sa1, field=self.intfield, intval=1)
        create(coding=self.sa1, field=self.textfield, strval="bla")
        create(coding=self.sa2, field=self.textfield, strval="blx")
        create(coding=self.sa1, field=self.codefield, intval=self.code.id)