Esempio n. 1
0
 def test_document_translate_fallback(self):
     d_en = document(locale='en-US',
                     title=u'How to delete Google Chrome?',
                     save=True)
     invalid_translate = reverse('wiki.document', locale='tr',
                                 args=[d_en.slug])
     self.assertEqual(d_en, Document.from_url(invalid_translate))
Esempio n. 2
0
 def test_document_translate_fallback(self):
     d_en = DocumentFactory(locale='en-US',
                            title=u'How to delete Google Chrome?')
     invalid_translate = reverse('wiki.document',
                                 locale='tr',
                                 args=[d_en.slug])
     self.assertEqual(d_en, Document.from_url(invalid_translate))
Esempio n. 3
0
 def test_document_translate_fallback(self):
     d_en = DocumentFactory(locale="en-US",
                            title="How to delete Google Chrome?")
     invalid_translate = reverse("wiki.document",
                                 locale="tr",
                                 args=[d_en.slug])
     self.assertEqual(d_en, Document.from_url(invalid_translate))
Esempio n. 4
0
def pageviews_by_document(start_date, end_date):
    """Return the number of pageviews by document in a given date range.

    * Only returns en-US documents for now since that's what we did with
    webtrends.

    Returns a dict with pageviews for each document:
        {<document_id>: <pageviews>,
         1: 42,
         7: 1337,...}
    """
    counts = {}
    request = _build_request()
    start_index = 1
    max_results = 10000

    while True:  # To deal with pagination

        @retry_503
        def _make_request():
            return request.get(
                ids='ga:' + profile_id,
                start_date=str(start_date),
                end_date=str(end_date),
                metrics='ga:pageviews',
                dimensions='ga:pagePath',
                filters='ga:pagePathLevel2==/kb/;ga:pagePathLevel1==/en-US/',
                max_results=max_results,
                start_index=start_index).execute()

        results = _make_request()

        for result in results['rows']:
            path = result[0]
            pageviews = int(result[1])
            doc = Document.from_url(path, id_only=True, check_host=False)
            if not doc:
                continue

            # The same document can appear multiple times due to url params.
            counts[doc.pk] = counts.get(doc.pk, 0) + pageviews

        # Move to next page of results.
        start_index += max_results
        if start_index > results['totalResults']:
            break

    return counts
Esempio n. 5
0
 def test_document_translate_fallback(self):
     d_en = document(locale="en-US", title=u"How to delete Google Chrome?", save=True)
     invalid_translate = reverse("wiki.document", locale="tr", args=[d_en.slug])
     self.assertEqual(d_en, Document.from_url(invalid_translate))
Esempio n. 6
0
def pageviews_by_document(start_date, end_date, verbose=False):
    """Return the number of pageviews by document in a given date range.

    * Only returns en-US documents for now since that's what we did with
    webtrends.

    Returns a dict with pageviews for each document:
        {<document_id>: <pageviews>,
         1: 42,
         7: 1337,...}
    """
    counts = {}
    request = _build_request()
    max_results = 10000

    end_date_step = end_date

    while True:  # To reduce the size of result set request 3 months at a time
        start_date_step = end_date_step - timedelta(90)

        if start_date_step < start_date:
            start_date_step = start_date

        if verbose:
            print 'Fetching data for {0!s} to {1!s}:'.format(start_date_step,
                                                   end_date_step)

        start_index = 1

        while True:  # To deal with pagination

            @retry_503
            def _make_request():
                return request.get(
                    ids='ga:' + profile_id,
                    start_date=str(start_date_step),
                    end_date=str(end_date_step),
                    metrics='ga:pageviews',
                    dimensions='ga:pagePath',
                    filters=('ga:pagePathLevel2==/kb/;'
                             'ga:pagePathLevel1==/en-US/'),
                    max_results=max_results,
                    start_index=start_index).execute()

            results = _make_request()

            if verbose:
                d = (max_results - 1
                     if start_index + max_results - 1 < results['totalResults']
                     else results['totalResults'] - start_index)
                print '- Got {0!s} of {1!s} results.'.format(start_index + d,
                                                   results['totalResults'])

            for result in results.get('rows', []):
                path = result[0]
                pageviews = int(result[1])
                doc = Document.from_url(path, id_only=True, check_host=False)
                if not doc:
                    continue

                # The same document can appear multiple times due to url params
                counts[doc.pk] = counts.get(doc.pk, 0) + pageviews

            # Move to next page of results.
            start_index += max_results
            if start_index > results.get('totalResults', 0):
                break

        end_date_step = start_date_step - timedelta(1)

        if start_date_step == start_date or end_date_step < start_date:
            break

    return counts
Esempio n. 7
0
def pageviews_by_document(start_date, end_date, verbose=False):
    """Return the number of pageviews by document in a given date range.

    * Only returns en-US documents for now since that's what we did with
    webtrends.

    Returns a dict with pageviews for each document:
        {<document_id>: <pageviews>,
         1: 42,
         7: 1337,...}
    """
    counts = {}
    request = _build_request()
    max_results = 10000

    end_date_step = end_date

    while True:  # To reduce the size of result set request 3 months at a time
        start_date_step = end_date_step - timedelta(90)

        if start_date_step < start_date:
            start_date_step = start_date

        if verbose:
            print 'Fetching data for %s to %s:' % (start_date_step,
                                                   end_date_step)

        start_index = 1

        while True:  # To deal with pagination

            @retry_503
            def _make_request():
                return request.get(ids='ga:' + profile_id,
                                   start_date=str(start_date_step),
                                   end_date=str(end_date_step),
                                   metrics='ga:pageviews',
                                   dimensions='ga:pagePath',
                                   filters=('ga:pagePathLevel2==/kb/;'
                                            'ga:pagePathLevel1==/en-US/'),
                                   max_results=max_results,
                                   start_index=start_index).execute()

            results = _make_request()

            if verbose:
                d = (max_results - 1 if start_index + max_results -
                     1 < results['totalResults'] else results['totalResults'] -
                     start_index)
                print '- Got %s of %s results.' % (start_index + d,
                                                   results['totalResults'])

            for result in results['rows']:
                path = result[0]
                pageviews = int(result[1])
                doc = Document.from_url(path, id_only=True, check_host=False)
                if not doc:
                    continue

                # The same document can appear multiple times due to url params
                counts[doc.pk] = counts.get(doc.pk, 0) + pageviews

            # Move to next page of results.
            start_index += max_results
            if start_index > results['totalResults']:
                break

        end_date_step = start_date_step - timedelta(1)

        if start_date_step == start_date or end_date_step < start_date:
            break

    return counts