Beispiel #1
0
    def test_articles_preprocessing_reactivate(self):
        """Are deleted analyses undeleted when they are reactivated?"""
        p1 = amcattest.create_test_project()
        a1 = amcattest.create_test_article(project=p1)
        n1 = amcattest.create_test_analysis()
        AnalysisProject.objects.create(project=p1, analysis=n1)

        # baseline: check that required=actual gives a no-op
        aa = AnalysisArticle.objects.create(article=a1, analysis=n1)
        with self.checkMaxQueries(n=4):  # 3 for needed, 1 for existing
            additions, restarts, deletions, undeletions = _get_articles_preprocessing_actions(
                [a1.id])
            self.assertEqual(multidict(additions), {})
            self.assertEqual(list(deletions), [])
            self.assertEqual(set(undeletions), set())
            self.assertEqual(set(restarts), set())

        # now set the aa to delete and see if it is reactivated
        aa.delete = True
        aa.save()
        with self.checkMaxQueries(n=4):  # 3 for needed, 1 for existing
            additions, restarts, deletions, undeletions = _get_articles_preprocessing_actions(
                [a1.id])
            self.assertEqual(multidict(additions), {})
            self.assertEqual(list(deletions), [])
            self.assertEqual(set(undeletions), {aa.id})
            self.assertEqual(set(restarts), set())
Beispiel #2
0
    def test_analyses_per_article(self):
        p1, p2, p3 = [
            amcattest.create_test_project(active=x < 2) for x in range(3)
        ]
        a1 = amcattest.create_test_article(project=p1)
        a2 = amcattest.create_test_article(project=p2)
        a3 = amcattest.create_test_article(project=p2)
        a4 = amcattest.create_test_article(project=p3)
        articles = {a1.id, a2.id, a3.id, a4.id}

        # baseline: no articles have any analysis
        with self.checkMaxQueries(
                n=3):  # 2 for projects/article, 1 for analyses/project
            outcome = multidict(_get_analyses_per_article(articles))
            self.assertEqual(outcome, {})

        # let's add some analyses to the active projects
        n1, n2, n3 = [amcattest.create_test_analysis() for _x in range(3)]
        AnalysisProject.objects.create(project=p1, analysis=n1)
        AnalysisProject.objects.create(project=p1, analysis=n2)
        AnalysisProject.objects.create(project=p2, analysis=n2)
        AnalysisProject.objects.create(project=p2, analysis=n3)
        with self.checkMaxQueries(n=3):
            outcome = multidict(_get_analyses_per_article(articles))
            self.assertEqual(
                outcome, {
                    a1.id: {n1.id, n2.id},
                    a2.id: {n2.id, n3.id},
                    a3.id: {n2.id, n3.id}
                })

        # adding an analysis to an inactive project has no effect
        AnalysisProject.objects.create(project=p3, analysis=n3)
        with self.checkMaxQueries(n=3):
            outcome = multidict(_get_analyses_per_article(articles))
            self.assertEqual(
                outcome, {
                    a1.id: {n1.id, n2.id},
                    a2.id: {n2.id, n3.id},
                    a3.id: {n2.id, n3.id}
                })

        # adding an article to a project via a set does have effect
        s1 = amcattest.create_test_set(project=p1)
        s2 = amcattest.create_test_set(project=p2)
        s1.add(a4)
        s1.add(a2)
        AnalysisProject.objects.create(project=p3, analysis=n2)
        with self.checkMaxQueries(n=3):
            outcome = multidict(_get_analyses_per_article(articles))
            self.assertEqual(
                outcome, {
                    a1.id: {n1.id, n2.id},
                    a2.id: {n1.id, n2.id, n3.id},
                    a3.id: {n2.id, n3.id},
                    a4.id: {n1.id, n2.id}
                })
Beispiel #3
0
    def test_articles_preprocessing_actions(self):
        p1, p2 = [amcattest.create_test_project() for x in range(2)]
        a1, a2, a3 = [
            amcattest.create_test_article(project=p) for p in [p1, p2, p2]
        ]
        articles = {a1.id, a2.id, a3.id}

        # baseline: no articles need any analysis, and no deletions are needed
        with self.checkMaxQueries(n=4):  # 3 for needed, 1 for existing
            additions, restarts, deletions, undeletions = _get_articles_preprocessing_actions(
                articles)
            self.assertEqual(set(additions), set())
            self.assertEqual(set(deletions), set())
            self.assertEqual(set(undeletions), set())
            self.assertEqual(set(restarts), set())

        # add some analyses to the active projects
        n1, n2, n3 = [amcattest.create_test_analysis() for _x in range(3)]
        AnalysisProject.objects.create(project=p1, analysis=n1)
        AnalysisProject.objects.create(project=p1, analysis=n2)
        AnalysisProject.objects.create(project=p2, analysis=n2)

        with self.checkMaxQueries(n=4):  # 3 for needed, 1 for existing
            additions, restarts, deletions, undeletions = _get_articles_preprocessing_actions(
                articles)
            self.assertEqual(multidict(additions), {
                a1.id: {n1.id, n2.id},
                a2.id: {n2.id},
                a3.id: {n2.id}
            })
            self.assertEqual(set(deletions), set())
            self.assertEqual(set(undeletions), set())
            self.assertEqual(set(restarts), set())

        # add some existing analyses
        AnalysisArticle.objects.create(article=a1, analysis=n1)
        AnalysisArticle.objects.create(article=a2, analysis=n1)
        AnalysisArticle.objects.create(article=a3, analysis=n2)

        with self.checkMaxQueries(n=4):  # 3 for needed, 1 for existing
            additions, restarts, deletions, undeletions = _get_articles_preprocessing_actions(
                articles)
            self.assertEqual(multidict(additions), {
                a1.id: {n2.id},
                a2.id: {n2.id}
            })
        todel = set()
        for aaid in deletions:
            aa = AnalysisArticle.objects.get(pk=aaid)
            todel.add((aa.article_id, aa.analysis_id))
        self.assertEqual(set(todel), {(a2.id, n1.id)})
        self.assertEqual(set(undeletions), set())
Beispiel #4
0
    def test_get_analysis_ids(self):
        p1, p2 = [amcattest.create_test_project() for _x in range(2)]
        a1, a2, a3 = [amcattest.create_test_analysis() for _x in range(3)]
        with self.checkMaxQueries(n=1):
            outcome = multidict(_get_analysis_ids([p1, p2]))
            self.assertEqual(outcome, {})

        AnalysisProject.objects.create(project=p1, analysis=a1)
        AnalysisProject.objects.create(project=p1, analysis=a2)
        AnalysisProject.objects.create(project=p2, analysis=a2)

        with self.checkMaxQueries(n=1):
            outcome = multidict(_get_analysis_ids([p1, p2]))
            self.assertEqual(outcome, {p1.id: {a1.id, a2.id}, p2.id: {a2.id}})
Beispiel #5
0
def _get_article_dicts(article_ids):
    """Yield dicts suitable for uploading to Solr from article IDs"""
    class GMT1(datetime.tzinfo):
        def utcoffset(self, dt):
            return datetime.timedelta(hours=1)

        def tzname(self, dt):
            return "GMT +1"

        def dst(self, dt):
            return datetime.timedelta(0)

    sets = multidict(
        (aa.article_id, aa.articleset_id)
        for aa in ArticleSetArticle.objects.filter(article__in=article_ids))
    for a in Article.objects.filter(pk__in=article_ids):
        yield dict(id=a.id,
                   headline=_clean(a.headline),
                   body=_clean(a.text),
                   byline=_clean(a.byline),
                   section=_clean(a.section),
                   projectid=a.project_id,
                   mediumid=a.medium_id,
                   date=a.date.replace(tzinfo=GMT1()),
                   sets=sets.get(a.id))
Beispiel #6
0
 def test_multidict(self):
     for input, output in (
             ([(1, 1), (1, 2), (1, 3), (2, 3)], {1: {1, 2, 3}, 2: {3}}),
             ((x for x in [(1, 1), (1, 2), (1, 3), (2, 3)]), {1: {1, 2, 3}, 2: {3}}),
             ((x for x in []), {}),
     ):
         self.assertEqual(dict(toolkit.multidict(input)), output)
Beispiel #7
0
    def test_get_projects(self):
        p = amcattest.create_test_project()
        a = amcattest.create_test_article(project=p)
        p2 = amcattest.create_test_project()
        a2 = amcattest.create_test_article(project=p2)
        a3 = amcattest.create_test_article(project=p2)
        p3 = amcattest.create_test_project(active=False)
        a4 = amcattest.create_test_article(project=p3)
        articleids = {a.id, a2.id, a3.id, a4.id}
        with self.checkMaxQueries(n=2):
            outcome = multidict(_get_active_project_ids(articleids))
            self.assertEqual(outcome, {
                a.id: {p.id},
                a2.id: {p2.id},
                a3.id: {p2.id}
            })

        # now let's add a to p2 via a set
        s = amcattest.create_test_set(project=p2)
        s.add(a)
        with self.checkMaxQueries(n=2):
            outcome = multidict(_get_active_project_ids(articleids))
            self.assertEqual(outcome, {
                a.id: {p.id, p2.id},
                a2.id: {p2.id},
                a3.id: {p2.id}
            })

        # now let's add a4 (whose project is inactive) to that set
        s.add(a4)
        with self.checkMaxQueries(n=2):
            outcome = multidict(_get_active_project_ids(articleids))
            self.assertEqual(
                outcome, {
                    a.id: {p.id, p2.id},
                    a2.id: {p2.id},
                    a3.id: {p2.id},
                    a4.id: {p2.id}
                })
Beispiel #8
0
 def test_multidict(self):
     for input, output in (
         ([(1, 1), (1, 2), (1, 3), (2, 3)], {
             1: set([1, 2, 3]),
             2: set([3])
         }),
         ((x for x in [(1, 1), (1, 2), (1, 3), (2, 3)]), {
             1: set([1, 2, 3]),
             2: set([3])
         }),
         ((x for x in []), {}),
     ):
         self.assertEqual(dict(toolkit.multidict(input)), output)
Beispiel #9
0
def gen_coding_choices(user, model):
    # Get codebooks based on three
    objects = model.objects.filter(
        # User in project
        Q(project__projectrole__user=user)|
        # User has access to project through guestrole
        Q(project__guest_role__id__gte=user.userprofile.role.id)
    ).distinct() if not user.userprofile.role.id >= get_admin_id() else model.objects.all()

    objects.select_related("project__name").only("name")
    objects = toolkit.multidict(((cb.project, cb) for cb in objects), ltype=list)

    for project, objs in sorted(objects.items(), key=name_sort):
        yield(project, [(x.id, x.name) for x in objs])
Beispiel #10
0
def gen_coding_choices(user, model):
    # Get codebooks based on three
    objects = model.objects.filter(
        # User in project
        Q(project__projectrole__user=user)|
        # User has access to project through guestrole
        Q(project__guest_role__id__gte=user.userprofile.role.id)
    ).distinct() if not user.is_superuser else model.objects.all()

    objects.select_related("project__name").only("name")
    objects = toolkit.multidict(((cb.project, cb) for cb in objects), ltype=list)

    for project, objs in sorted(objects.items(), key=name_sort):
        yield(project, [(x.id, x.name) for x in objs])
Beispiel #11
0
def gen_user_choices(project=None):
    """This function generates a list of users formatted in such a
    way it's usable for a Django Choicefield.

    See: https://docs.djangoproject.com/en/dev/ref/models/fields/#field-choices"""
    users = User.objects.all().select_related('userprofile__affiliation__name').only(
        'username', 'first_name', 'last_name' 
    )

    users = users.filter(projectrole__project=project) if project else users
    vals = toolkit.multidict(((u.userprofile.affiliation, u) for u in users), ltype=list)

    for aff, users in sorted(vals.items(), key=name_sort):
        yield(aff, [(u.id, "%s - %s %s (%s)" % (u.id, u.first_name, u.last_name, u.username)) for u in users])
Beispiel #12
0
def _get_analyses_per_article(articleids):
    """
    For each article, determine which analyses should be processed by what analyses
    based on direct and indirect (via articleset) project membership

    @return: a sequence of article id : analysis id pairs.
    """
    projects_per_article = list(_get_active_project_ids(articleids))

    all_projects = {p for (a, p) in projects_per_article}
    analyses_per_project = multidict(_get_analysis_ids(all_projects))

    for article, project in projects_per_article:
        for analysis in analyses_per_project.get(project, set()):
            yield article, analysis
Beispiel #13
0
 def add_articles(self, article_ids, batch_size = 1000):
     """
     Add the given article_ids to the index. This is done in batches, so there
     is no limit on the length of article_ids (which can be a generator).
     """
     if not article_ids: return
     from amcat.models import Article, ArticleSetArticle
     n = len(article_ids) / batch_size
     for i, batch in enumerate(splitlist(article_ids, itemsperbatch=batch_size)):
         log.info("Adding batch {i}/{n}".format(**locals()))
         all_sets = multidict((aa.article_id, aa.articleset_id)
                              for aa in ArticleSetArticle.objects.filter(article__in=batch))
         dicts = (get_article_dict(article, list(all_sets.get(article.id, [])))
                  for article in Article.objects.filter(pk__in=batch))
         self.bulk_insert(dicts)
Beispiel #14
0
    def add_articles(self, article_ids, batch_size=1000):
        """
        Add the given article_ids to the index. This is done in batches, so there
        is no limit on the length of article_ids (which can be a generator).
        """
        if not article_ids: return
        from amcat.models import Article, ArticleSetArticle

        n = len(article_ids) / batch_size
        for i, batch in enumerate(splitlist(article_ids, itemsperbatch=batch_size)):
            log.info("Adding batch {i}/{n}".format(**locals()))
            all_sets = multidict((aa.article_id, aa.articleset_id)
                                 for aa in ArticleSetArticle.objects.filter(article__in=batch))
            dicts = (get_article_dict(article, list(all_sets.get(article.id, [])))
                     for article in Article.objects.filter(pk__in=batch))
            self.bulk_insert(dicts)