Beispiel #1
0
    def reload_period_from_analytics(cls, period, verbose=False):
        """Replace the stats for the given period from Google Analytics."""
        counts = googleanalytics.pageviews_by_document(*period_dates(period), verbose=verbose)
        if counts:
            # Close any existing connections because our load balancer times
            # them out at 5 minutes and the GA calls take forever.
            close_old_connections()

            # Delete and remake the rows:
            # Horribly inefficient until
            # http://code.djangoproject.com/ticket/9519 is fixed.
            # cls.objects.filter(period=period).delete()

            # Instead, we use raw SQL!
            cursor = connection.cursor()
            cursor.execute(
                "DELETE FROM `dashboards_wikidocumentvisits`" "    WHERE `period` = %s", [period]
            )

            # Now we create them again with fresh data.
            for doc_id, visits in counts.items():
                cls.objects.create(document=Document(pk=doc_id), visits=visits, period=period)
        else:
            # Don't erase interesting data if there's nothing to replace it:
            log.warning("Google Analytics returned no interesting data," " so I kept what I had.")
Beispiel #2
0
 def test_document_translate_fallback(self):
     d_en = DocumentFactory(locale='en-US',
                            title=u'How to delete Google Chrome?')
     invalid_translate = reverse('wiki.document',
                                 locale='tr',
                                 args=[d_en.slug])
     self.assertEqual(d_en, Document.from_url(invalid_translate))
Beispiel #3
0
 def test_document_translate_fallback(self):
     d_en = document(locale='en-US',
                     title=u'How to delete Google Chrome?',
                     save=True)
     invalid_translate = reverse('wiki.document', locale='tr',
                                 args=[d_en.slug])
     self.assertEqual(d_en, Document.from_url(invalid_translate))
Beispiel #4
0
 def test_document_translate_fallback(self):
     d_en = DocumentFactory(locale="en-US",
                            title="How to delete Google Chrome?")
     invalid_translate = reverse("wiki.document",
                                 locale="tr",
                                 args=[d_en.slug])
     self.assertEqual(d_en, Document.from_url(invalid_translate))
Beispiel #5
0
def document(**kwargs):
    """Return an empty document with enough stuff filled out that it can be
    saved."""
    defaults = {
        'category': CATEGORIES[0][0],
        'title': u'đ' + str(datetime.now())
    }
    defaults.update(kwargs)
    if 'slug' not in kwargs:
        defaults['slug'] = slugify(defaults['title'])
    return Document(**defaults)
Beispiel #6
0
def pageviews_by_document(start_date, end_date):
    """Return the number of pageviews by document in a given date range.

    * Only returns en-US documents for now since that's what we did with
    webtrends.

    Returns a dict with pageviews for each document:
        {<document_id>: <pageviews>,
         1: 42,
         7: 1337,...}
    """
    counts = {}
    request = _build_request()
    start_index = 1
    max_results = 10000

    while True:  # To deal with pagination

        @retry_503
        def _make_request():
            return request.get(
                ids='ga:' + profile_id,
                start_date=str(start_date),
                end_date=str(end_date),
                metrics='ga:pageviews',
                dimensions='ga:pagePath',
                filters='ga:pagePathLevel2==/kb/;ga:pagePathLevel1==/en-US/',
                max_results=max_results,
                start_index=start_index).execute()

        results = _make_request()

        for result in results['rows']:
            path = result[0]
            pageviews = int(result[1])
            doc = Document.from_url(path, id_only=True, check_host=False)
            if not doc:
                continue

            # The same document can appear multiple times due to url params.
            counts[doc.pk] = counts.get(doc.pk, 0) + pageviews

        # Move to next page of results.
        start_index += max_results
        if start_index > results['totalResults']:
            break

    return counts
Beispiel #7
0
 def reload_period_from_analytics(cls, period):
     """Replace the stats for the given period from Google Analytics."""
     counts = googleanalytics.pageviews_by_document(*period_dates(period))
     if counts:
         # Delete and remake the rows:
         # Horribly inefficient until
         # http://code.djangoproject.com/ticket/9519 is fixed.
         cls.objects.filter(period=period).delete()
         for doc_id, visits in counts.iteritems():
             cls.objects.create(document=Document(pk=doc_id),
                                visits=visits,
                                period=period)
     else:
         # Don't erase interesting data if there's nothing to replace it:
         log.warning('Google Analytics returned no interesting data,'
                     ' so I kept what I had.')
Beispiel #8
0
    def related_documents(self):
        """Return documents that are 'morelikethis' one"""
        if not self.product:
            return []

        # First try to get the results from the cache
        key = 'questions_question:related_docs:%s' % self.id
        documents = cache.get(key)
        if documents is not None:
            statsd.incr('questions.related_documents.cache.hit')
            log.debug(
                'Getting MLT documents for {question} from cache.'.format(
                    question=repr(self)))
            return documents

        try:
            statsd.incr('questions.related_documents.cache.miss')
            s = Document.get_mapping_type().search()
            documents = (s.values_dict('id', 'document_title', 'url').filter(
                document_locale=self.locale,
                document_is_archived=False,
                document_category__in=settings.IA_DEFAULT_CATEGORIES,
                product__in=[self.product.slug]
            ).query(
                __mlt={
                    'fields':
                    ['document_title', 'document_summary', 'document_content'],
                    'like_text':
                    self.title,
                    'min_term_freq':
                    1,
                    'min_doc_freq':
                    1
                })[:3])
            documents = list(documents)
            cache.add(key, documents)
        except ES_EXCEPTIONS:
            statsd.incr('questions.related_documents.esexception')
            log.exception('ES MLT related_documents')
            documents = []

        return documents
Beispiel #9
0
    def related_documents(self):
        """Return documents that are 'morelikethis' one"""
        if not self.product:
            return []

        # First try to get the results from the cache
        key = 'questions_question:related_docs:%s' % self.id
        documents = cache.get(key)
        if documents is not None:
            statsd.incr('questions.related_documents.cache.hit')
            log.debug('Getting MLT documents for {question} from cache.'
                      .format(question=repr(self)))
            return documents

        try:
            statsd.incr('questions.related_documents.cache.miss')
            s = Document.get_mapping_type().search()
            documents = (
                s.values_dict('id', 'document_title', 'url')
                .filter(document_locale=self.locale,
                        document_is_archived=False,
                        document_category__in=settings.IA_DEFAULT_CATEGORIES,
                        product__in=[self.product.slug])
                .query(__mlt={
                    'fields': ['document_title', 'document_summary',
                               'document_content'],
                    'like_text': self.title,
                    'min_term_freq': 1,
                    'min_doc_freq': 1})
                [:3])
            documents = list(documents)
            cache.add(key, documents)
        except ES_EXCEPTIONS:
            statsd.incr('questions.related_documents.esexception')
            log.exception('ES MLT related_documents')
            documents = []

        return documents
Beispiel #10
0
class UntranslatedReadout(Readout):
    title = _lazy(u'Untranslated')
    description = _lazy(
        u'This indicates there are no approved translations of these articles. '
        'Some of the articles may have proposed translations waiting to be '
        'reviewed and will appear in the Unreviewed Changes section as well.')
    short_title = _lazy(u'Untranslated')
    details_link_text = _lazy(u'All untranslated articles...')
    slug = 'untranslated'
    column4_label = _lazy(u'Updated')

    def _query_and_params(self, max):
        # Filter by product if specified.
        if self.product:
            extra_joins = PRODUCT_FILTER
            params = (self.locale, LAST_30_DAYS, self.product.id,
                      settings.WIKI_DEFAULT_LANGUAGE)
        else:
            extra_joins = ''
            params = (self.locale, LAST_30_DAYS,
                      settings.WIKI_DEFAULT_LANGUAGE)

        # Incidentally, we tried this both as a left join and as a search
        # against an inner query returning translated docs, and the left join
        # yielded a faster-looking plan (on a production corpus).
        #
        # Find non-archived, localizable documents in categories 10,
        # 20 and 60 having at least one ready- for-localization
        # revision. Of those, show the ones that have no translation.
        query = (
            'SELECT engdoc.slug, engdoc.title, '
            'wiki_revision.reviewed, dashboards_wikidocumentvisits.visits '
            'FROM wiki_document engdoc '
            'INNER JOIN wiki_revision ON '
            'engdoc.latest_localizable_revision_id=wiki_revision.id '
            'LEFT JOIN wiki_document translated ON '
            'engdoc.id=translated.parent_id AND translated.locale=%s '
            'LEFT JOIN dashboards_wikidocumentvisits ON '
            'engdoc.id=dashboards_wikidocumentvisits.document_id AND '
            'dashboards_wikidocumentvisits.period=%s ' + extra_joins + 'WHERE '
            '(translated.id IS NULL OR translated.current_revision_id IS NULL) '
            'AND engdoc.is_localizable AND '
            'engdoc.category in (10, 20, 60) AND '
            'engdoc.locale=%s AND NOT engdoc.is_archived '
            'AND wiki_revision.content NOT LIKE "REDIRECT%%" ' +
            self._order_clause() + self._limit_clause(max))

        return query, params

    def _order_clause(self):
        return ('ORDER BY wiki_revision.reviewed DESC, engdoc.title ASC'
                if self.mode == MOST_RECENT else
                'ORDER BY dashboards_wikidocumentvisits.visits DESC, '
                'engdoc.title ASC')

    def _format_row(self, (slug, title, reviewed, visits)):
        # Run the data through the model to (potentially) format it and
        # take advantage of SPOTs (like for get_absolute_url()):
        d = Document(slug=slug,
                     title=title,
                     locale=settings.WIKI_DEFAULT_LANGUAGE)
        return dict(title=d.title,
                    url=d.get_absolute_url(),
                    visits=visits,
                    updated=reviewed)
Beispiel #11
0
 def test_document_translate_fallback(self):
     d_en = document(locale="en-US", title=u"How to delete Google Chrome?", save=True)
     invalid_translate = reverse("wiki.document", locale="tr", args=[d_en.slug])
     self.assertEqual(d_en, Document.from_url(invalid_translate))
Beispiel #12
0
def pageviews_by_document(start_date, end_date, verbose=False):
    """Return the number of pageviews by document in a given date range.

    * Only returns en-US documents for now since that's what we did with
    webtrends.

    Returns a dict with pageviews for each document:
        {<document_id>: <pageviews>,
         1: 42,
         7: 1337,...}
    """
    counts = {}
    request = _build_request()
    max_results = 10000

    end_date_step = end_date

    while True:  # To reduce the size of result set request 3 months at a time
        start_date_step = end_date_step - timedelta(90)

        if start_date_step < start_date:
            start_date_step = start_date

        if verbose:
            print 'Fetching data for {0!s} to {1!s}:'.format(start_date_step,
                                                   end_date_step)

        start_index = 1

        while True:  # To deal with pagination

            @retry_503
            def _make_request():
                return request.get(
                    ids='ga:' + profile_id,
                    start_date=str(start_date_step),
                    end_date=str(end_date_step),
                    metrics='ga:pageviews',
                    dimensions='ga:pagePath',
                    filters=('ga:pagePathLevel2==/kb/;'
                             'ga:pagePathLevel1==/en-US/'),
                    max_results=max_results,
                    start_index=start_index).execute()

            results = _make_request()

            if verbose:
                d = (max_results - 1
                     if start_index + max_results - 1 < results['totalResults']
                     else results['totalResults'] - start_index)
                print '- Got {0!s} of {1!s} results.'.format(start_index + d,
                                                   results['totalResults'])

            for result in results.get('rows', []):
                path = result[0]
                pageviews = int(result[1])
                doc = Document.from_url(path, id_only=True, check_host=False)
                if not doc:
                    continue

                # The same document can appear multiple times due to url params
                counts[doc.pk] = counts.get(doc.pk, 0) + pageviews

            # Move to next page of results.
            start_index += max_results
            if start_index > results.get('totalResults', 0):
                break

        end_date_step = start_date_step - timedelta(1)

        if start_date_step == start_date or end_date_step < start_date:
            break

    return counts
def pageviews_by_document(start_date, end_date, verbose=False):
    """Return the number of pageviews by document in a given date range.

    * Only returns en-US documents for now since that's what we did with
    webtrends.

    Returns a dict with pageviews for each document:
        {<document_id>: <pageviews>,
         1: 42,
         7: 1337,...}
    """
    counts = {}
    request = _build_request()
    max_results = 10000

    end_date_step = end_date

    while True:  # To reduce the size of result set request 3 months at a time
        start_date_step = end_date_step - timedelta(90)

        if start_date_step < start_date:
            start_date_step = start_date

        if verbose:
            print 'Fetching data for %s to %s:' % (start_date_step,
                                                   end_date_step)

        start_index = 1

        while True:  # To deal with pagination

            @retry_503
            def _make_request():
                return request.get(ids='ga:' + profile_id,
                                   start_date=str(start_date_step),
                                   end_date=str(end_date_step),
                                   metrics='ga:pageviews',
                                   dimensions='ga:pagePath',
                                   filters=('ga:pagePathLevel2==/kb/;'
                                            'ga:pagePathLevel1==/en-US/'),
                                   max_results=max_results,
                                   start_index=start_index).execute()

            results = _make_request()

            if verbose:
                d = (max_results - 1 if start_index + max_results -
                     1 < results['totalResults'] else results['totalResults'] -
                     start_index)
                print '- Got %s of %s results.' % (start_index + d,
                                                   results['totalResults'])

            for result in results['rows']:
                path = result[0]
                pageviews = int(result[1])
                doc = Document.from_url(path, id_only=True, check_host=False)
                if not doc:
                    continue

                # The same document can appear multiple times due to url params
                counts[doc.pk] = counts.get(doc.pk, 0) + pageviews

            # Move to next page of results.
            start_index += max_results
            if start_index > results['totalResults']:
                break

        end_date_step = start_date_step - timedelta(1)

        if start_date_step == start_date or end_date_step < start_date:
            break

    return counts