def metrics_for_range(table_id, dt_range_list, use_cached=False, use_only_cached=False): # tell core to do it's data wrangling for us (using cached data) results = OrderedDict({}) for from_date, to_date in dt_range_list: res = core.article_metrics(table_id, from_date, to_date, use_cached, use_only_cached) results[(ymd(from_date), ymd(to_date))] = res return results
def generate_queries(table_id, query_func_name, datetime_list, use_cached=False, use_only_cached=False): "returns a list of queries to be executed by google" assert isinstance(query_func_name, str), "query func name must be a string" query_list = [] for start_date, end_date in datetime_list: module = core.module_picker(start_date, end_date) query_func = getattr(module, query_func_name) query_type = 'views' if query_func_name == 'path_counts_query' else 'downloads' output_path = core.output_path(query_type, start_date, end_date) LOG.debug("looking for metrics here: %s", output_path) if use_cached: if os.path.exists(output_path): LOG.debug("we have %r results for %r to %r already", query_type, ymd(start_date), ymd(end_date)) continue else: LOG.info("no cache file for %r results for %r to %r", query_type, ymd(start_date), ymd(end_date)) else: LOG.debug("couldn't find file %r", output_path) if use_only_cached: LOG.info("skipping google query, using only cache files") continue q = query_func(table_id, start_date, end_date) query_list.append(q) if use_only_cached: # code problem assert query_list == [], "use_only_cached==True but we're accumulating queries somehow" return query_list
def test_monthly_data(self): self.assertEqual(0, models.Article.objects.count()) self.assertEqual(0, models.Metric.objects.count()) month_to_import = datetime(year=2015, month=8, day=01) logic.import_ga_metrics('monthly', from_date=month_to_import, to_date=month_to_import) expected = 1649 self.assertEqual(expected, models.Article.objects.count()) self.assertEqual(expected, models.Metric.objects.count()) doi = '10.7554/eLife.08007' metrics = models.Metric.objects.get(article__doi=doi) this_month = ymd(datetime.now() - timedelta(days=1))[:-3] metrics.date = this_month metrics.save() expected_data = { doi: { 'daily': OrderedDict({}), 'monthly': OrderedDict({ this_month: { #'full': 525, 'full': 604, # introduction of POA as full text views 'abstract': 9, 'digest': 46, 'pdf': 129, }, }), }, } url = reverse('api-article-metrics', kwargs={'doi': doi}) resp = self.c.get(url) self.assertEqual(200, resp.status_code) data = resp.data self.assertEqual(expected_data, resp.data)
def monthly(doi, from_date, to_date, source=models.GA): """returns monthly metrics for the given article for the month starting in `from_date` to the month ending in `to_date`""" # because we're not storing dates, but rather a representation of a date date_list = utils.dt_month_range(from_date, to_date) # ll: [(2013-01-01, 2013-01-31), (2013-02-01, 2013-02-28), ...] date_list = [ymd(i[0])[:7] for i in date_list] # ll: [2013-01, 2013-02, 2013-03] return models.Metric.objects \ .filter(article__doi__iexact=doi) \ .filter(source=source) \ .filter(period=models.MONTH) \ .filter(date__in=date_list)
def test_multiple_daily_data(self): from_date = datetime(year=2015, month=9, day=11) to_date = from_date + timedelta(days=1) logic.import_ga_metrics('daily', from_date, to_date) doi = u'10.7554/eLife.09560' # hack. yesterday = unicode(ymd(datetime.now() - timedelta(days=1))) day_before = unicode(ymd(datetime.now() - timedelta(days=2))) m1, m2 = models.Metric.objects.filter(article__doi=doi) m1.date = day_before m2.date = yesterday m1.save() m2.save() expected_data = { doi: { 'daily': OrderedDict([ (day_before, { 'full': 21922, 'abstract': 325, 'digest': 114, 'pdf': 1533, }), (yesterday, { 'full': 9528, 'abstract': 110, 'digest': 42, 'pdf': 489, }) ]), 'monthly': OrderedDict({}) }, } url = reverse('api-article-metrics', kwargs={'doi': doi}) resp = self.c.get(url) self.assertEqual(200, resp.status_code) self.assertEqual(expected_data, resp.data)
def test_daily_data(self): "a very simple set of data returns the expected daily and monthly data in the expected structure" day_to_import = datetime(year=2015, month=9, day=11) logic.import_ga_metrics('daily', from_date=day_to_import, to_date=day_to_import) doi = '10.7554/eLife.09560' # hack. metric = models.Metric.objects.get(article__doi=doi) yesterday = ymd(datetime.now() - timedelta(days=1)) metric.date = yesterday metric.save() expected_data = { doi: { 'daily': OrderedDict({ yesterday: { 'full': 21922, 'abstract': 325, 'digest': 114, 'pdf': 1533, }, #2015-09-12: { # .... # #} }), 'monthly': OrderedDict({}), #'total': { # '2015-09-11': { # 'full': ...., # 'abstract': ..., # 'digest': ..., # }, #}, }, } url = reverse('api-article-metrics', kwargs={'doi': doi}) resp = self.c.get(url) self.assertEqual(200, resp.status_code) self.assertEqual(expected_data, resp.data)
def test_ymd(self): dt = datetime(year=1997, month=8, day=29, hour=6, minute=14) # UTC ;) self.assertEqual(core.ymd(dt), "1997-08-29")
def test_mixed_source_data(self): "data from multiple sources is served up correctly" from_date = datetime(year=2015, month=9, day=11) to_date = from_date + timedelta(days=1) logic.import_ga_metrics('daily', from_date, to_date) logic.import_hw_metrics('daily', from_date, to_date) doi = '10.7554/eLife.09560' # hack. yesterday = ymd(datetime.now() - timedelta(days=1)) day_before = ymd(datetime.now() - timedelta(days=2)) m1, m2 = models.Metric.objects.filter(article__doi=doi, source=models.GA) m1.date = day_before m2.date = yesterday m1.save() m2.save() m1, m2 = models.Metric.objects.filter(article__doi=doi, source=models.HW) m1.date = day_before m2.date = yesterday m1.save() m2.save() expected_data = { models.GA: { doi: { 'daily': OrderedDict([ (day_before, { 'full': 21922, 'abstract': 325, 'digest': 114, 'pdf': 1533, }), (yesterday, { 'full': 9528, 'abstract': 110, 'digest': 42, 'pdf': 489, }) ]), 'monthly': OrderedDict({}), }, }, models.HW: { doi: { 'daily': OrderedDict([ (day_before, { 'full': 39912, 'abstract': 540, 'digest': 0, 'pdf': 4226, }), (yesterday, { 'full': 15800, 'abstract': 144, 'digest': 0, 'pdf': 1132, }), ]), 'monthly': OrderedDict({}), }, }, } url = reverse('api-article-metrics-mixed-source', kwargs={'doi': doi}) resp = self.c.get(url) self.assertEqual(200, resp.status_code) self.assertEqual(expected_data, resp.data)
def daily(doi, from_date, to_date, source=models.GA): return models.Metric.objects \ .filter(article__doi__iexact=doi) \ .filter(source=source) \ .filter(period=models.DAY) \ .filter(date__gte=ymd(from_date), date__lte=ymd(to_date)) # does this even work with charfields??