def test_articles_preprocessing_reactivate(self): """Are deleted analyses undeleted when they are reactivated?""" p1 = amcattest.create_test_project() a1 = amcattest.create_test_article(project=p1) n1 = amcattest.create_test_analysis() AnalysisProject.objects.create(project=p1, analysis=n1) # baseline: check that required=actual gives a no-op aa = AnalysisArticle.objects.create(article=a1, analysis=n1) with self.checkMaxQueries(n=4): # 3 for needed, 1 for existing additions, restarts, deletions, undeletions = _get_articles_preprocessing_actions( [a1.id]) self.assertEqual(multidict(additions), {}) self.assertEqual(list(deletions), []) self.assertEqual(set(undeletions), set()) self.assertEqual(set(restarts), set()) # now set the aa to delete and see if it is reactivated aa.delete = True aa.save() with self.checkMaxQueries(n=4): # 3 for needed, 1 for existing additions, restarts, deletions, undeletions = _get_articles_preprocessing_actions( [a1.id]) self.assertEqual(multidict(additions), {}) self.assertEqual(list(deletions), []) self.assertEqual(set(undeletions), {aa.id}) self.assertEqual(set(restarts), set())
def test_analyses_per_article(self): p1, p2, p3 = [ amcattest.create_test_project(active=x < 2) for x in range(3) ] a1 = amcattest.create_test_article(project=p1) a2 = amcattest.create_test_article(project=p2) a3 = amcattest.create_test_article(project=p2) a4 = amcattest.create_test_article(project=p3) articles = {a1.id, a2.id, a3.id, a4.id} # baseline: no articles have any analysis with self.checkMaxQueries( n=3): # 2 for projects/article, 1 for analyses/project outcome = multidict(_get_analyses_per_article(articles)) self.assertEqual(outcome, {}) # let's add some analyses to the active projects n1, n2, n3 = [amcattest.create_test_analysis() for _x in range(3)] AnalysisProject.objects.create(project=p1, analysis=n1) AnalysisProject.objects.create(project=p1, analysis=n2) AnalysisProject.objects.create(project=p2, analysis=n2) AnalysisProject.objects.create(project=p2, analysis=n3) with self.checkMaxQueries(n=3): outcome = multidict(_get_analyses_per_article(articles)) self.assertEqual( outcome, { a1.id: {n1.id, n2.id}, a2.id: {n2.id, n3.id}, a3.id: {n2.id, n3.id} }) # adding an analysis to an inactive project has no effect AnalysisProject.objects.create(project=p3, analysis=n3) with self.checkMaxQueries(n=3): outcome = multidict(_get_analyses_per_article(articles)) self.assertEqual( outcome, { a1.id: {n1.id, n2.id}, a2.id: {n2.id, n3.id}, a3.id: {n2.id, n3.id} }) # adding an article to a project via a set does have effect s1 = amcattest.create_test_set(project=p1) s2 = amcattest.create_test_set(project=p2) s1.add(a4) s1.add(a2) AnalysisProject.objects.create(project=p3, analysis=n2) with self.checkMaxQueries(n=3): outcome = multidict(_get_analyses_per_article(articles)) self.assertEqual( outcome, { a1.id: {n1.id, n2.id}, a2.id: {n1.id, n2.id, n3.id}, a3.id: {n2.id, n3.id}, a4.id: {n1.id, n2.id} })
def test_articles_preprocessing_actions(self): p1, p2 = [amcattest.create_test_project() for x in range(2)] a1, a2, a3 = [ amcattest.create_test_article(project=p) for p in [p1, p2, p2] ] articles = {a1.id, a2.id, a3.id} # baseline: no articles need any analysis, and no deletions are needed with self.checkMaxQueries(n=4): # 3 for needed, 1 for existing additions, restarts, deletions, undeletions = _get_articles_preprocessing_actions( articles) self.assertEqual(set(additions), set()) self.assertEqual(set(deletions), set()) self.assertEqual(set(undeletions), set()) self.assertEqual(set(restarts), set()) # add some analyses to the active projects n1, n2, n3 = [amcattest.create_test_analysis() for _x in range(3)] AnalysisProject.objects.create(project=p1, analysis=n1) AnalysisProject.objects.create(project=p1, analysis=n2) AnalysisProject.objects.create(project=p2, analysis=n2) with self.checkMaxQueries(n=4): # 3 for needed, 1 for existing additions, restarts, deletions, undeletions = _get_articles_preprocessing_actions( articles) self.assertEqual(multidict(additions), { a1.id: {n1.id, n2.id}, a2.id: {n2.id}, a3.id: {n2.id} }) self.assertEqual(set(deletions), set()) self.assertEqual(set(undeletions), set()) self.assertEqual(set(restarts), set()) # add some existing analyses AnalysisArticle.objects.create(article=a1, analysis=n1) AnalysisArticle.objects.create(article=a2, analysis=n1) AnalysisArticle.objects.create(article=a3, analysis=n2) with self.checkMaxQueries(n=4): # 3 for needed, 1 for existing additions, restarts, deletions, undeletions = _get_articles_preprocessing_actions( articles) self.assertEqual(multidict(additions), { a1.id: {n2.id}, a2.id: {n2.id} }) todel = set() for aaid in deletions: aa = AnalysisArticle.objects.get(pk=aaid) todel.add((aa.article_id, aa.analysis_id)) self.assertEqual(set(todel), {(a2.id, n1.id)}) self.assertEqual(set(undeletions), set())
def test_get_analysis_ids(self): p1, p2 = [amcattest.create_test_project() for _x in range(2)] a1, a2, a3 = [amcattest.create_test_analysis() for _x in range(3)] with self.checkMaxQueries(n=1): outcome = multidict(_get_analysis_ids([p1, p2])) self.assertEqual(outcome, {}) AnalysisProject.objects.create(project=p1, analysis=a1) AnalysisProject.objects.create(project=p1, analysis=a2) AnalysisProject.objects.create(project=p2, analysis=a2) with self.checkMaxQueries(n=1): outcome = multidict(_get_analysis_ids([p1, p2])) self.assertEqual(outcome, {p1.id: {a1.id, a2.id}, p2.id: {a2.id}})
def _get_article_dicts(article_ids): """Yield dicts suitable for uploading to Solr from article IDs""" class GMT1(datetime.tzinfo): def utcoffset(self, dt): return datetime.timedelta(hours=1) def tzname(self, dt): return "GMT +1" def dst(self, dt): return datetime.timedelta(0) sets = multidict( (aa.article_id, aa.articleset_id) for aa in ArticleSetArticle.objects.filter(article__in=article_ids)) for a in Article.objects.filter(pk__in=article_ids): yield dict(id=a.id, headline=_clean(a.headline), body=_clean(a.text), byline=_clean(a.byline), section=_clean(a.section), projectid=a.project_id, mediumid=a.medium_id, date=a.date.replace(tzinfo=GMT1()), sets=sets.get(a.id))
def test_multidict(self): for input, output in ( ([(1, 1), (1, 2), (1, 3), (2, 3)], {1: {1, 2, 3}, 2: {3}}), ((x for x in [(1, 1), (1, 2), (1, 3), (2, 3)]), {1: {1, 2, 3}, 2: {3}}), ((x for x in []), {}), ): self.assertEqual(dict(toolkit.multidict(input)), output)
def test_get_projects(self): p = amcattest.create_test_project() a = amcattest.create_test_article(project=p) p2 = amcattest.create_test_project() a2 = amcattest.create_test_article(project=p2) a3 = amcattest.create_test_article(project=p2) p3 = amcattest.create_test_project(active=False) a4 = amcattest.create_test_article(project=p3) articleids = {a.id, a2.id, a3.id, a4.id} with self.checkMaxQueries(n=2): outcome = multidict(_get_active_project_ids(articleids)) self.assertEqual(outcome, { a.id: {p.id}, a2.id: {p2.id}, a3.id: {p2.id} }) # now let's add a to p2 via a set s = amcattest.create_test_set(project=p2) s.add(a) with self.checkMaxQueries(n=2): outcome = multidict(_get_active_project_ids(articleids)) self.assertEqual(outcome, { a.id: {p.id, p2.id}, a2.id: {p2.id}, a3.id: {p2.id} }) # now let's add a4 (whose project is inactive) to that set s.add(a4) with self.checkMaxQueries(n=2): outcome = multidict(_get_active_project_ids(articleids)) self.assertEqual( outcome, { a.id: {p.id, p2.id}, a2.id: {p2.id}, a3.id: {p2.id}, a4.id: {p2.id} })
def test_multidict(self): for input, output in ( ([(1, 1), (1, 2), (1, 3), (2, 3)], { 1: set([1, 2, 3]), 2: set([3]) }), ((x for x in [(1, 1), (1, 2), (1, 3), (2, 3)]), { 1: set([1, 2, 3]), 2: set([3]) }), ((x for x in []), {}), ): self.assertEqual(dict(toolkit.multidict(input)), output)
def gen_coding_choices(user, model): # Get codebooks based on three objects = model.objects.filter( # User in project Q(project__projectrole__user=user)| # User has access to project through guestrole Q(project__guest_role__id__gte=user.userprofile.role.id) ).distinct() if not user.userprofile.role.id >= get_admin_id() else model.objects.all() objects.select_related("project__name").only("name") objects = toolkit.multidict(((cb.project, cb) for cb in objects), ltype=list) for project, objs in sorted(objects.items(), key=name_sort): yield(project, [(x.id, x.name) for x in objs])
def gen_coding_choices(user, model): # Get codebooks based on three objects = model.objects.filter( # User in project Q(project__projectrole__user=user)| # User has access to project through guestrole Q(project__guest_role__id__gte=user.userprofile.role.id) ).distinct() if not user.is_superuser else model.objects.all() objects.select_related("project__name").only("name") objects = toolkit.multidict(((cb.project, cb) for cb in objects), ltype=list) for project, objs in sorted(objects.items(), key=name_sort): yield(project, [(x.id, x.name) for x in objs])
def gen_user_choices(project=None): """This function generates a list of users formatted in such a way it's usable for a Django Choicefield. See: https://docs.djangoproject.com/en/dev/ref/models/fields/#field-choices""" users = User.objects.all().select_related('userprofile__affiliation__name').only( 'username', 'first_name', 'last_name' ) users = users.filter(projectrole__project=project) if project else users vals = toolkit.multidict(((u.userprofile.affiliation, u) for u in users), ltype=list) for aff, users in sorted(vals.items(), key=name_sort): yield(aff, [(u.id, "%s - %s %s (%s)" % (u.id, u.first_name, u.last_name, u.username)) for u in users])
def _get_analyses_per_article(articleids): """ For each article, determine which analyses should be processed by what analyses based on direct and indirect (via articleset) project membership @return: a sequence of article id : analysis id pairs. """ projects_per_article = list(_get_active_project_ids(articleids)) all_projects = {p for (a, p) in projects_per_article} analyses_per_project = multidict(_get_analysis_ids(all_projects)) for article, project in projects_per_article: for analysis in analyses_per_project.get(project, set()): yield article, analysis
def add_articles(self, article_ids, batch_size = 1000): """ Add the given article_ids to the index. This is done in batches, so there is no limit on the length of article_ids (which can be a generator). """ if not article_ids: return from amcat.models import Article, ArticleSetArticle n = len(article_ids) / batch_size for i, batch in enumerate(splitlist(article_ids, itemsperbatch=batch_size)): log.info("Adding batch {i}/{n}".format(**locals())) all_sets = multidict((aa.article_id, aa.articleset_id) for aa in ArticleSetArticle.objects.filter(article__in=batch)) dicts = (get_article_dict(article, list(all_sets.get(article.id, []))) for article in Article.objects.filter(pk__in=batch)) self.bulk_insert(dicts)
def add_articles(self, article_ids, batch_size=1000): """ Add the given article_ids to the index. This is done in batches, so there is no limit on the length of article_ids (which can be a generator). """ if not article_ids: return from amcat.models import Article, ArticleSetArticle n = len(article_ids) / batch_size for i, batch in enumerate(splitlist(article_ids, itemsperbatch=batch_size)): log.info("Adding batch {i}/{n}".format(**locals())) all_sets = multidict((aa.article_id, aa.articleset_id) for aa in ArticleSetArticle.objects.filter(article__in=batch)) dicts = (get_article_dict(article, list(all_sets.get(article.id, []))) for article in Article.objects.filter(pk__in=batch)) self.bulk_insert(dicts)