def test_add_and_delete(self): """Adding a revision should add it to the index. Deleting should delete it. """ r = revision(save=True) self.refresh() eq_(RevisionMetricsMappingType.search().count(), 1) r.delete() self.refresh() eq_(RevisionMetricsMappingType.search().count(), 0)
def test_data_in_index(self): """Verify the data we are indexing.""" p = product(save=True) d = document(locale='es', save=True) d.products.add(p) r = revision(document=d, is_approved=True, save=True) self.refresh() eq_(RevisionMetricsMappingType.search().count(), 1) data = RevisionMetricsMappingType.search().values_dict()[0] eq_(data['is_approved'], r.is_approved) eq_(data['locale'], d.locale) eq_(data['product'], [p.slug]) eq_(data['creator_id'], r.creator_id)
def test_data_in_index(self): """Verify the data we are indexing.""" p = ProductFactory() base_doc = DocumentFactory(locale='en-US', products=[p]) d = DocumentFactory(locale='es', parent=base_doc) r = RevisionFactory(document=d, is_approved=True) self.refresh() eq_(RevisionMetricsMappingType.search().count(), 1) data = RevisionMetricsMappingType.search()[0] eq_(data['is_approved'], r.is_approved) eq_(data['locale'], d.locale) eq_(data['product'], [p.slug]) eq_(data['creator_id'], r.creator_id)
def top_contributors_l10n(start=None, end=None, locale=None, product=None, count=10, page=1, use_cache=True): """Get the top l10n contributors for the KB.""" if use_cache: cache_key = u'{}_{}_{}_{}_{}_{}'.format(start, end, locale, product, count, page) cache_key = hashlib.sha1(cache_key.encode('utf-8')).hexdigest() cache_key = u'top_contributors_l10n_{}'.format(cache_key) cached = cache.get(cache_key, None) if cached: return cached # Get the user ids and contribution count of the top contributors. query = RevisionMetricsMappingType.search() if locale is None: # If there is no locale specified, exclude en-US only. The rest are # l10n. query = query.filter(~F(locale=settings.WIKI_DEFAULT_LANGUAGE)) query = _apply_filters(query, start, end, locale, product) revisions = [q.id for q in query.all()[:HUGE_NUMBER]] users = (User.objects .filter(created_revisions__in=revisions) .annotate(query_count=Count('created_revisions')) .order_by('-query_count')) counts = _get_creator_counts(users, count, page) if use_cache: cache.set(cache_key, counts, 60*15) # 15 minutes return counts
def top_contributors_l10n(start=None, end=None, locale=None, product=None, count=10, page=1, use_cache=True): """Get the top l10n contributors for the KB.""" if use_cache: cache_key = u'{}_{}_{}_{}_{}_{}'.format(start, end, locale, product, count, page) cache_key = hashlib.sha1(cache_key.encode('utf-8')).hexdigest() cache_key = u'top_contributors_l10n_{}'.format(cache_key) cached = cache.get(cache_key, None) if cached: return cached # Get the user ids and contribution count of the top contributors. query = RevisionMetricsMappingType.search() if locale is None: # If there is no locale specified, exclude en-US only. The rest are # l10n. query = query.filter(~F(locale=settings.WIKI_DEFAULT_LANGUAGE)) query = _apply_filters(query, start, end, locale, product) revisions = [q.id for q in query.all()[:HUGE_NUMBER]] users = (User.objects.filter(created_revisions__in=revisions).annotate( query_count=Count('created_revisions')).order_by('-query_count')) counts = _get_creator_counts(users, count, page) if use_cache: cache.set(cache_key, counts, 60 * 15) # 15 minutes return counts
def top_contributors_l10n(start=None, end=None, locale=None, product=None, count=10, page=1): """Get the top l10n contributors for the KB.""" # Get the user ids and contribution count of the top contributors. query = RevisionMetricsMappingType.search().facet("creator_id", filtered=True, size=BIG_NUMBER) if locale is None: # If there is no locale specified, exlude en-US only. The rest are # l10n. query = query.filter(~F(locale=settings.WIKI_DEFAULT_LANGUAGE)) query = _apply_filters(query, start, end, locale, product) return _get_creator_counts(query, count, page)
def top_contributors_l10n(start=None, end=None, locale=None, product=None, count=10): """Get the top l10n contributors for the KB.""" # Get the user ids and contribution count of the top contributors. query = (RevisionMetricsMappingType.search().facet('creator_id', filtered=True, size=count)) if locale is None: # If there is no locale specified, exlude en-US only. The rest are # l10n. query = query.filter(~F(locale=settings.WIKI_DEFAULT_LANGUAGE)) query = _apply_filters(query, start, end, locale, product) return _get_creator_counts(query, count)
def get_data(self, request): super(TopContributorsLocalization, self).get_data(request) # This is the base of all the metrics. Each metric branches off from # this to get a particular metric type, since we can't do Aggregates. base_query = RevisionMetricsMappingType.search() base_filters = self.get_filters() # This branch is to get the number of revisions made by each user. revision_query = ( base_query .filter(base_filters) .facet('creator_id', filtered=True, size=BIG_NUMBER)) # This branch is to get the number of reviews done by each user. reviewer_query = ( base_query .filter(base_filters) .facet('reviewer_id', filtered=True, size=BIG_NUMBER)) # Collect two lists of objects that correlates users and the appropriate metric count revision_creator_counts = revision_query.facet_counts()['creator_id']['terms'] revision_reviewer_counts = reviewer_query.facet_counts()['reviewer_id']['terms'] # Combine all the metric types into one big list. combined = defaultdict(lambda: { 'revision_count': 0, 'review_count': 0, }) for d in revision_creator_counts: combined[d['term']]['user_id'] = d['term'] combined[d['term']]['revision_count'] = d['count'] for d in revision_reviewer_counts: combined[d['term']]['user_id'] = d['term'] combined[d['term']]['review_count'] = d['count'] # Sort by revision count, and get just the ids into a list. sort_key = self.query_values['ordering'] if sort_key[0] == '-': sort_reverse = True sort_key = sort_key[1:] else: sort_reverse = False top_contributors = combined.values() top_contributors.sort(key=lambda d: d[sort_key], reverse=sort_reverse) user_ids = [c['user_id'] for c in top_contributors] full_count = len(user_ids) # Paginate those user ids. page_start = (self.query_values['page'] - 1) * self.query_values['page_size'] page_end = page_start + self.query_values['page_size'] user_ids = user_ids[page_start:page_end] # Get full user objects for every id on this page. users = UserMappingType.reshape( UserMappingType .search() .filter(id__in=user_ids) .values_dict('id', 'username', 'display_name', 'avatar', 'last_contribution_date') [:self.query_values['page_size']]) # For ever user object found, mix in the metrics counts for that user, # and then reshape the data to make more sense to clients. data = [] for u in users: d = combined[u['id']] d['user'] = u d['last_contribution_date'] = d['user'].get('last_contribution_date', None) d.pop('user_id', None) d['user'].pop('id', None) d['user'].pop('last_contribution_date', None) data.append(d) # One last sort, since ES didn't return the users in any particular order. data.sort(key=lambda d: d[sort_key], reverse=sort_reverse) # Add ranks to the objects. for i, contributor in enumerate(data, 1): contributor['rank'] = page_start + i return { 'results': data, 'count': full_count, 'filters': self.query_values, 'allowed_orderings': self.get_allowed_orderings(), 'warnings': self.warnings, }
def get_data(self, request): super(TopContributorsLocalization, self).get_data(request) # This is the base of all the metrics. Each metric branches off from # this to get a particular metric type, since we can't do Aggregates. base_query = RevisionMetricsMappingType.search() base_filters = self.get_filters() # This branch is to get the number of revisions made by each user. revision_query = base_query.filter(base_filters).facet("creator_id", filtered=True, size=BIG_NUMBER) # This branch is to get the number of reviews done by each user. reviewer_query = base_query.filter(base_filters).facet("reviewer_id", filtered=True, size=BIG_NUMBER) # Collect two lists of objects that correlates users and the appropriate metric count revision_creator_counts = revision_query.facet_counts( )["creator_id"]["terms"] revision_reviewer_counts = reviewer_query.facet_counts( )["reviewer_id"]["terms"] # Combine all the metric types into one big list. combined = defaultdict(lambda: { "revision_count": 0, "review_count": 0, }) for d in revision_creator_counts: combined[d["term"]]["user_id"] = d["term"] combined[d["term"]]["revision_count"] = d["count"] for d in revision_reviewer_counts: combined[d["term"]]["user_id"] = d["term"] combined[d["term"]]["review_count"] = d["count"] # Sort by revision count, and get just the ids into a list. sort_key = self.query_values["ordering"] if sort_key[0] == "-": sort_reverse = True sort_key = sort_key[1:] else: sort_reverse = False top_contributors = list(combined.values()) top_contributors.sort(key=lambda d: d[sort_key], reverse=sort_reverse) user_ids = [c["user_id"] for c in top_contributors] full_count = len(user_ids) # Paginate those user ids. page_start = (self.query_values["page"] - 1) * self.query_values["page_size"] page_end = page_start + self.query_values["page_size"] user_ids = user_ids[page_start:page_end] # Get full user objects for every id on this page. users = UserMappingType.reshape( UserMappingType.search().filter(id__in=user_ids).values_dict( "id", "username", "display_name", "avatar", "last_contribution_date")[:self.query_values["page_size"]]) # For ever user object found, mix in the metrics counts for that user, # and then reshape the data to make more sense to clients. data = [] for u in users: d = combined[u["id"]] d["user"] = u d["last_contribution_date"] = d["user"].get( "last_contribution_date", None) d.pop("user_id", None) d["user"].pop("id", None) d["user"].pop("last_contribution_date", None) data.append(d) # One last sort, since ES didn't return the users in any particular order. data.sort(key=lambda d: d[sort_key], reverse=sort_reverse) # Add ranks to the objects. for i, contributor in enumerate(data, 1): contributor["rank"] = page_start + i return { "results": data, "count": full_count, "filters": self.query_values, "allowed_orderings": self.get_allowed_orderings(), "warnings": self.warnings, }