def metrics_for_range(table_id, dt_range_list, use_cached=False, use_only_cached=False):
    # tell core to do it's data wrangling for us (using cached data)
    results = OrderedDict({})
    for from_date, to_date in dt_range_list:
        res = core.article_metrics(table_id, from_date, to_date, use_cached, use_only_cached)
        results[(ymd(from_date), ymd(to_date))] = res
    return results
def generate_queries(table_id, query_func_name, datetime_list, use_cached=False, use_only_cached=False):
    "returns a list of queries to be executed by google"
    assert isinstance(query_func_name, str), "query func name must be a string"
    query_list = []
    for start_date, end_date in datetime_list:
        module = core.module_picker(start_date, end_date)
        query_func = getattr(module, query_func_name)
        query_type = 'views' if query_func_name == 'path_counts_query' else 'downloads'
        
        output_path = core.output_path(query_type, start_date, end_date)
        LOG.debug("looking for metrics here: %s", output_path)
        if use_cached:
            if os.path.exists(output_path):
                LOG.debug("we have %r results for %r to %r already", query_type, ymd(start_date), ymd(end_date))
                continue
            else:
                LOG.info("no cache file for %r results for %r to %r", query_type, ymd(start_date), ymd(end_date))
        else:
            LOG.debug("couldn't find file %r", output_path)
        
        if use_only_cached:
            LOG.info("skipping google query, using only cache files")
            continue

        q = query_func(table_id, start_date, end_date)
        query_list.append(q)

    if use_only_cached:
        # code problem
        assert query_list == [], "use_only_cached==True but we're accumulating queries somehow"
        
    return query_list
Exemple #3
0
def metrics_for_range(table_id,
                      dt_range_list,
                      use_cached=False,
                      use_only_cached=False):
    # tell core to do it's data wrangling for us (using cached data)
    results = OrderedDict({})
    for from_date, to_date in dt_range_list:
        res = core.article_metrics(table_id, from_date, to_date, use_cached,
                                   use_only_cached)
        results[(ymd(from_date), ymd(to_date))] = res
    return results
Exemple #4
0
    def test_monthly_data(self):
        self.assertEqual(0, models.Article.objects.count())
        self.assertEqual(0, models.Metric.objects.count())
        month_to_import = datetime(year=2015, month=8, day=01)
        logic.import_ga_metrics('monthly', from_date=month_to_import, to_date=month_to_import)
        expected = 1649
        self.assertEqual(expected, models.Article.objects.count())
        self.assertEqual(expected, models.Metric.objects.count())

        doi = '10.7554/eLife.08007'

        metrics = models.Metric.objects.get(article__doi=doi)
        this_month = ymd(datetime.now() - timedelta(days=1))[:-3]
        metrics.date = this_month
        metrics.save()
        
        expected_data = {
            doi: {
                'daily': OrderedDict({}),
                'monthly': OrderedDict({
                    this_month: {
                        #'full': 525,
                        'full': 604, # introduction of POA as full text views
                        'abstract': 9,
                        'digest': 46,
                        'pdf': 129,
                    },
                }),
            },
        }
        url = reverse('api-article-metrics', kwargs={'doi': doi})
        resp = self.c.get(url)
        self.assertEqual(200, resp.status_code)
        data = resp.data
        self.assertEqual(expected_data, resp.data)
Exemple #5
0
def monthly(doi, from_date, to_date, source=models.GA):
    """returns monthly metrics for the given article for the month
    starting in `from_date` to the month ending in `to_date`"""
    # because we're not storing dates, but rather a representation of a date
    date_list = utils.dt_month_range(from_date, to_date) # ll: [(2013-01-01, 2013-01-31), (2013-02-01, 2013-02-28), ...]
    date_list = [ymd(i[0])[:7] for i in date_list] # ll:  [2013-01, 2013-02, 2013-03]
    return models.Metric.objects \
      .filter(article__doi__iexact=doi) \
      .filter(source=source) \
      .filter(period=models.MONTH) \
      .filter(date__in=date_list)
Exemple #6
0
    def test_multiple_daily_data(self):
        from_date = datetime(year=2015, month=9, day=11)
        to_date = from_date + timedelta(days=1)
        logic.import_ga_metrics('daily', from_date, to_date)
        doi = u'10.7554/eLife.09560'

        # hack. 
        yesterday = unicode(ymd(datetime.now() - timedelta(days=1)))
        day_before = unicode(ymd(datetime.now() - timedelta(days=2)))
        m1, m2 = models.Metric.objects.filter(article__doi=doi)
        m1.date = day_before
        m2.date = yesterday
        m1.save()
        m2.save()
        
        expected_data = {
            doi: {
                'daily': OrderedDict([
                    (day_before, {
                        'full': 21922,
                        'abstract': 325,
                        'digest': 114,
                        'pdf': 1533,
                    }),
                    (yesterday, { 
                        'full': 9528,
                        'abstract': 110,
                        'digest': 42,
                        'pdf': 489,
                    })
                ]),
                'monthly': OrderedDict({})
            },
        }
        url = reverse('api-article-metrics', kwargs={'doi': doi})
        resp = self.c.get(url)
        self.assertEqual(200, resp.status_code)
        self.assertEqual(expected_data, resp.data)
Exemple #7
0
def generate_queries(table_id,
                     query_func_name,
                     datetime_list,
                     use_cached=False,
                     use_only_cached=False):
    "returns a list of queries to be executed by google"
    assert isinstance(query_func_name, str), "query func name must be a string"
    query_list = []
    for start_date, end_date in datetime_list:
        module = core.module_picker(start_date, end_date)
        query_func = getattr(module, query_func_name)
        query_type = 'views' if query_func_name == 'path_counts_query' else 'downloads'

        output_path = core.output_path(query_type, start_date, end_date)
        LOG.debug("looking for metrics here: %s", output_path)
        if use_cached:
            if os.path.exists(output_path):
                LOG.debug("we have %r results for %r to %r already",
                          query_type, ymd(start_date), ymd(end_date))
                continue
            else:
                LOG.info("no cache file for %r results for %r to %r",
                         query_type, ymd(start_date), ymd(end_date))
        else:
            LOG.debug("couldn't find file %r", output_path)

        if use_only_cached:
            LOG.info("skipping google query, using only cache files")
            continue

        q = query_func(table_id, start_date, end_date)
        query_list.append(q)

    if use_only_cached:
        # code problem
        assert query_list == [], "use_only_cached==True but we're accumulating queries somehow"

    return query_list
Exemple #8
0
    def test_daily_data(self):
        "a very simple set of data returns the expected daily and monthly data in the expected structure"
        day_to_import = datetime(year=2015, month=9, day=11)
        logic.import_ga_metrics('daily', from_date=day_to_import, to_date=day_to_import)

        doi = '10.7554/eLife.09560'

        # hack.
        metric = models.Metric.objects.get(article__doi=doi)
        yesterday = ymd(datetime.now() - timedelta(days=1))
        metric.date = yesterday
        metric.save()
        
        expected_data = {
            doi: {
                'daily': OrderedDict({
                    yesterday: {
                        'full': 21922,
                        'abstract': 325,
                        'digest': 114,
                        'pdf': 1533,
                    },
                    #2015-09-12: {
                    #    ....
                    #
                    #}
                }),
                'monthly': OrderedDict({}),
                #'total': {
                #    '2015-09-11': {
                #        'full': ....,
                #        'abstract': ...,
                #        'digest': ...,
                #    },
                #},
            },
        }
        
            
        url = reverse('api-article-metrics', kwargs={'doi': doi})
        resp = self.c.get(url)
        self.assertEqual(200, resp.status_code)
        self.assertEqual(expected_data, resp.data)
 def test_ymd(self):
     dt = datetime(year=1997, month=8, day=29, hour=6, minute=14) # UTC ;)
     self.assertEqual(core.ymd(dt), "1997-08-29")
Exemple #10
0
    def test_mixed_source_data(self):
        "data from multiple sources is served up correctly"        
        from_date = datetime(year=2015, month=9, day=11)
        to_date = from_date + timedelta(days=1)
        logic.import_ga_metrics('daily', from_date, to_date)
        logic.import_hw_metrics('daily', from_date, to_date)
        doi = '10.7554/eLife.09560'


        # hack. 
        yesterday = ymd(datetime.now() - timedelta(days=1))
        day_before = ymd(datetime.now() - timedelta(days=2))
        m1, m2 = models.Metric.objects.filter(article__doi=doi, source=models.GA)
        m1.date = day_before
        m2.date = yesterday
        m1.save()
        m2.save()

        m1, m2 = models.Metric.objects.filter(article__doi=doi, source=models.HW)
        m1.date = day_before
        m2.date = yesterday
        m1.save()
        m2.save()
        
        expected_data = {
            models.GA: {
                doi: {
                    'daily': OrderedDict([
                        (day_before, {
                            'full': 21922,
                            'abstract': 325,
                            'digest': 114,
                            'pdf': 1533,
                        }),
                        (yesterday, { 
                            'full': 9528,
                            'abstract': 110,
                            'digest': 42,
                            'pdf': 489,
                        })
                    ]),
                    'monthly': OrderedDict({}),
                },
            },
            models.HW: {
                doi: {
                    'daily': OrderedDict([
                        (day_before, {
                            'full': 39912,
                            'abstract': 540,
                            'digest': 0,
                            'pdf': 4226,
                        }),
                        (yesterday, {
                            'full': 15800,
                            'abstract': 144,
                            'digest': 0,
                            'pdf': 1132,
                        }),
                    ]),
                    'monthly': OrderedDict({}),
                },
            },
        }
        url = reverse('api-article-metrics-mixed-source', kwargs={'doi': doi})
        resp = self.c.get(url)
        self.assertEqual(200, resp.status_code)
        self.assertEqual(expected_data, resp.data)
Exemple #11
0
def daily(doi, from_date, to_date, source=models.GA):
    return models.Metric.objects \
      .filter(article__doi__iexact=doi) \
      .filter(source=source) \
      .filter(period=models.DAY) \
      .filter(date__gte=ymd(from_date), date__lte=ymd(to_date)) # does this even work with charfields??
Exemple #12
0
 def test_ymd(self):
     dt = datetime(year=1997, month=8, day=29, hour=6, minute=14)  # UTC ;)
     self.assertEqual(core.ymd(dt), "1997-08-29")