Ejemplo n.º 1
0
def test_spam_with_many_response(create_revision, akismet_wiki_user,
                                 akismet_wiki_user_2, user_client,
                                 enable_akismet_submissions,
                                 akismet_mock_requests):
    submission = RevisionAkismetSubmission(
        type="ham",
        sender=akismet_wiki_user_2,
        revision=create_revision
    )
    submission.save()

    # Check that one RevisionAkismetSubmission instance exists.
    ras = RevisionAkismetSubmission.objects.filter(revision=create_revision)
    assert ras.count() == 1

    # Create another Akismet revision via the endpoint.
    url = reverse('wiki.submit_akismet_spam', locale='en-US')
    response = user_client.post(url, data={'revision': create_revision.id})
    assert response.status_code == 201
    assert 'max-age=0' in response['Cache-Control']
    assert 'no-cache' in response['Cache-Control']
    assert 'no-store' in response['Cache-Control']
    assert 'must-revalidate' in response['Cache-Control']
    data = json.loads(response.content)
    assert len(data) == 2
    assert data[0]['type'] == 'ham'
    assert data[0]['sender'] == akismet_wiki_user_2.username
    assert data[1]['type'] == 'spam'
    assert data[1]['sender'] == akismet_wiki_user.username

    # Check that the Akismet endpoints were called.
    assert akismet_mock_requests.called
    assert akismet_mock_requests.call_count == 2
Ejemplo n.º 2
0
def test_spam_with_many_response(create_revision, akismet_wiki_user,
                                 akismet_wiki_user_2, user_client,
                                 enable_akismet_submissions,
                                 akismet_mock_requests):
    submission = RevisionAkismetSubmission(type="ham",
                                           sender=akismet_wiki_user_2,
                                           revision=create_revision)
    submission.save()

    # Check that one RevisionAkismetSubmission instance exists.
    ras = RevisionAkismetSubmission.objects.filter(revision=create_revision)
    assert ras.count() == 1

    # Create another Akismet revision via the endpoint.
    url = reverse('wiki.submit_akismet_spam', locale='en-US')
    response = user_client.post(url, data={'revision': create_revision.id})
    assert response.status_code == 201
    assert 'max-age=0' in response['Cache-Control']
    assert 'no-cache' in response['Cache-Control']
    assert 'no-store' in response['Cache-Control']
    assert 'must-revalidate' in response['Cache-Control']
    data = json.loads(response.content)
    assert len(data) == 2
    assert data[0]['type'] == 'ham'
    assert data[0]['sender'] == akismet_wiki_user_2.username
    assert data[1]['type'] == 'spam'
    assert data[1]['sender'] == akismet_wiki_user.username

    # Check that the Akismet endpoints were called.
    assert akismet_mock_requests.called
    assert akismet_mock_requests.call_count == 2
Ejemplo n.º 3
0
    def test_recent_spam_revisions_show(self, mock_analytics_upageviews):
        """The correct spam revisions should show up."""
        doc1 = create_document(save=True)
        doc2 = create_document(save=True)
        # Create some revisions by self.testuser
        rev_doc0 = self.create_revisions(num=1,
                                         creator=self.testuser,
                                         document=self.document)
        rev_doc1 = self.create_revisions(num=1,
                                         creator=self.testuser,
                                         document=doc1)
        rev_doc2 = self.create_revisions(num=1,
                                         creator=self.testuser,
                                         document=doc2)
        created_revisions = rev_doc0 + rev_doc1 + rev_doc2
        # Mark each revision as created yesterday
        for rev in created_revisions:
            rev.created = datetime.datetime.today() - datetime.timedelta(
                days=1)
            rev.save()
        # Mark each of self.testuser's revisions as spam
        for revision in created_revisions:
            revision.akismet_submissions.add(
                RevisionAkismetSubmission(sender=self.admin, type="spam"))
        # self.admin creates some revisions on a different document
        self.create_revisions(num=3, creator=self.admin)

        mock_analytics_upageviews.return_value = {
            rev_doc0[0].id: 0,
            rev_doc1[0].id: 0,
            rev_doc2[0].id: 0
        }

        self.client.login(username='******', password='******')
        # The first response will say that the report is being processed
        response = self.client.get(reverse('dashboards.spam', locale='en-US'))
        eq_(200, response.status_code)

        response2 = self.client.get(reverse('dashboards.spam', locale='en-US'))
        page = pq(response2.content)
        table_rows = page.find('.spam-events-table tbody tr')
        table_row_text = ''
        for table_row in table_rows:
            table_row_text += table_row.text_content()

        eq_(len(table_rows), len(created_revisions))
        for revision in created_revisions:
            document_url = reverse(
                'wiki.document',
                kwargs={'document_path': revision.document.slug})
            ok_(document_url in table_row_text)
Ejemplo n.º 4
0
def submit_akismet_spam(request):
    """
    Submit a published revision as spam.

    If successful, the revision dashboard is loaded again, and displays that
    the revision was marked as spam. On failure, no errors are returned, just
    reloads the dashboard.
    """
    url = request.POST.get('next')
    if url is None or not is_safe_url(url, request.get_host()):
        url = reverse('dashboards.revisions')

    submission = RevisionAkismetSubmission(sender=request.user, type='spam')
    form = RevisionAkismetSubmissionSpamForm(data=request.POST,
                                             instance=submission,
                                             request=request)
    if form.is_valid():
        form.save()

    return redirect(url)
Ejemplo n.º 5
0
def submit_akismet_spam(request):
    """
    Creates SPAM Akismet record for revision.
    Return json object with Akismet Record data.

    TODO: Create Submitting as HAM record for revision
    """

    submission = RevisionAkismetSubmission(sender=request.user, type="spam")
    data = RevisionAkismetSubmissionSpamForm(data=request.POST,
                                             instance=submission,
                                             request=request)

    if data.is_valid():
        data.save()

        revision = data.cleaned_data['revision']
        akismet_revisions = (RevisionAkismetSubmission.objects.filter(
            revision=revision).order_by('id').values('sender__username',
                                                     'sent', 'type'))

        data = [{
            "sender":
            rev["sender__username"],
            "sent":
            format_date_time(value=rev["sent"],
                             format='datetime',
                             request=request)[0],
            "type":
            rev["type"]
        } for rev in akismet_revisions]

        return HttpResponse(json.dumps(data, sort_keys=True),
                            content_type='application/json; charset=utf-8',
                            status=201)

    return HttpResponseBadRequest()
Ejemplo n.º 6
0
def ban_user_and_cleanup_summary(request, username):
    """
    A summary page of actions taken when banning a user and reverting revisions
    This method takes all the revisions from the last three days,
    sends back the list of revisions that were successfully reverted/deleted
    and submitted to Akismet, and also a list of
    revisions where no action was taken (revisions needing follow up).
    """
    user = get_object_or_404(User, username=username)

    # Is this user already banned?
    user_ban = UserBan.objects.filter(user=user, is_active=True)

    # If the user is not banned, ban user; else, update 'by' and 'reason'
    if not user_ban.exists():
        ban = UserBan(user=user, by=request.user, reason="Spam", is_active=True)
        ban.save()
    else:
        user_ban.update(by=request.user, reason="Spam")

    date_three_days_ago = datetime.now().date() - timedelta(days=3)
    revisions_from_last_three_days = user.created_revisions.prefetch_related("document")
    revisions_from_last_three_days = revisions_from_last_three_days.defer(
        "content", "summary"
    ).order_by("-id")
    revisions_from_last_three_days = revisions_from_last_three_days.filter(
        created__gte=date_three_days_ago
    )

    """ The "Actions Taken" section """
    # The revisions to be submitted to Akismet and reverted,
    # these must be sorted descending so that they are reverted accordingly
    revisions_to_mark_as_spam_and_revert = revisions_from_last_three_days.filter(
        id__in=request.POST.getlist("revision-id")
    ).order_by("-id")

    # 1. Submit revisions to Akismet as spam
    # 2. If this is the most recent revision for a document:
    #    Revert revision if it has a previous version OR
    #    Delete revision if it is a new document
    submitted_to_akismet = []
    not_submitted_to_akismet = []
    revisions_reverted_list = []
    revisions_not_reverted_list = []
    revisions_deleted_list = []
    revisions_not_deleted_list = []
    latest_is_not_spam = [
        rev
        for rev in revision_by_distinct_doc(revisions_to_mark_as_spam_and_revert)
        if rev.document.current_revision != rev
    ]
    previous_good_rev = {}

    for revision in revisions_to_mark_as_spam_and_revert:
        submission = RevisionAkismetSubmission(sender=request.user, type="spam")
        akismet_submission_data = {"revision": revision.id}

        data = RevisionAkismetSubmissionSpamForm(
            data=akismet_submission_data, instance=submission, request=request
        )
        # Submit to Akismet or note that validation & sending to Akismet failed
        if data.is_valid():
            data.save()
            # Since we only want to display 1 revision per document, only add to
            # this list if this is one of the revisions for a distinct document
            submitted_to_akismet.append(revision)
        else:
            not_submitted_to_akismet.append(revision)

        # If there is a current revision and the revision is not in the spam list,
        # to be reverted, do not revert any revisions
        try:
            revision.document.refresh_from_db(fields=["current_revision"])
        except Document.DoesNotExist:
            continue  # This document was previously deleted in this loop, continue
        if (
            revision.document.current_revision
            not in revisions_to_mark_as_spam_and_revert
        ):
            if revision.document_id not in previous_good_rev:
                previous_good_rev[
                    revision.document_id
                ] = revision.document.current_revision

            continue  # This document has a more current revision, no need to revert

        # Loop through all previous revisions to find the oldest spam
        # revision on a specific document from this request.
        while revision.previous in revisions_to_mark_as_spam_and_revert:
            revision = revision.previous
        # If this is a new revision on an existing document, revert it
        if revision.previous:
            previous_good_rev[revision.document_id] = revision.previous

            reverted = revert_document(
                request=request, revision_id=revision.previous.id
            )
            if reverted:
                revisions_reverted_list.append(revision)
            else:
                # If the revert was unsuccessful, include this in the follow-up list
                revisions_not_reverted_list.append(revision)

        # If this is a new document/translation, delete it
        else:
            deleted = delete_document(request=request, document=revision.document)
            if deleted:
                revisions_deleted_list.append(revision)
            else:
                # If the delete was unsuccessful, include this in the follow-up list
                revisions_not_deleted_list.append(revision)

    # Find just the latest revision for each document
    submitted_to_akismet_by_distinct_doc = revision_by_distinct_doc(
        submitted_to_akismet
    )
    not_submitted_to_akismet_by_distinct_doc = revision_by_distinct_doc(
        not_submitted_to_akismet
    )
    revisions_reverted_by_distinct_doc = revision_by_distinct_doc(
        revisions_reverted_list
    )
    revisions_not_reverted_by_distinct_doc = revision_by_distinct_doc(
        revisions_not_reverted_list
    )
    revisions_deleted_by_distinct_doc = revision_by_distinct_doc(revisions_deleted_list)
    revisions_not_deleted_by_distinct_doc = revision_by_distinct_doc(
        revisions_not_deleted_list
    )

    actions_taken = {
        "revisions_reported_as_spam": submitted_to_akismet_by_distinct_doc,
        "revisions_reverted_list": revisions_reverted_by_distinct_doc,
        "revisions_deleted_list": revisions_deleted_by_distinct_doc,
    }

    """ The "Needs followup" section """
    # TODO: Phase V: If user made actions while reviewer was banning them
    new_action_by_user = []
    skipped_revisions = [
        rev
        for rev in revisions_to_mark_as_spam_and_revert
        if rev.document_id in previous_good_rev
        and rev.id < previous_good_rev[rev.document_id].id
    ]
    skipped_revisions = revision_by_distinct_doc(skipped_revisions)

    needs_follow_up = {
        "manual_revert": new_action_by_user,
        "skipped_revisions": skipped_revisions,
        "not_submitted_to_akismet": not_submitted_to_akismet_by_distinct_doc,
        "not_reverted_list": revisions_not_reverted_by_distinct_doc,
        "not_deleted_list": revisions_not_deleted_by_distinct_doc,
    }

    """ The "No Actions Taken" section """
    revisions_already_spam = revisions_from_last_three_days.filter(
        id__in=request.POST.getlist("revision-already-spam")
    )
    revisions_already_spam = list(revisions_already_spam)
    revisions_already_spam_by_distinct_doc = revision_by_distinct_doc(
        revisions_already_spam
    )

    identified_as_not_spam = [
        rev
        for rev in revisions_from_last_three_days
        if rev not in revisions_already_spam
        and rev not in revisions_to_mark_as_spam_and_revert
    ]
    identified_as_not_spam_by_distinct_doc = revision_by_distinct_doc(
        identified_as_not_spam
    )

    no_actions_taken = {
        "latest_revision_is_not_spam": latest_is_not_spam,
        "revisions_already_identified_as_spam": revisions_already_spam_by_distinct_doc,
        "revisions_identified_as_not_spam": identified_as_not_spam_by_distinct_doc,
    }

    context = {
        "detail_user": user,
        "form": UserBanForm(),
        "actions_taken": actions_taken,
        "needs_follow_up": needs_follow_up,
        "no_actions_taken": no_actions_taken,
    }

    # Send an email to the spam watch mailing list.
    ban_and_revert_notification(user, request.user, context)

    return render(request, "users/ban_user_and_cleanup_summary.html", context)
Ejemplo n.º 7
0
    def handle(self, *args, **options):
        dry_run = options['dry_run']

        # first get the deleted document logs for the last n days
        ttl = timezone.now() - timedelta(days=options['days'])
        logged_deletions = DocumentDeletionLog.objects.filter(
            timestamp__gte=ttl,
        ).filter(
            # They use "spam" or "junk"
            # deleting spam revisions;
            # the spam makes me cry.  -- willkg
            models.Q(reason__icontains='spam') |
            models.Q(reason__icontains='junk')
        )
        count = logged_deletions.count()
        self.stdout.write('Checking %s deleted document logs' % count)

        sender = get_user_model().objects.get(username=options['username'])
        self.stdout.write(u'Submitting spam to Akismet as user %s' % sender)

        akismet = Akismet()

        if not akismet.ready:
            raise CommandError('Akismet client is not ready')

        for i, logged_deletion in enumerate(logged_deletions.iterator(), 1):
            self.stdout.write('%d/%d: ' % (i, count), ending='')
            # get the deleted document in question
            document = Document.admin_objects.filter(
                locale=logged_deletion.locale,
                slug=logged_deletion.slug,
            ).first()

            if document is None:
                # no document found with that locale and slug,
                # probably purged at some point
                self.stderr.write(u'Ignoring locale %s and slug %s' %
                                  (logged_deletion.locale,
                                   logged_deletion.slug))
                continue

            if not document.deleted:
                # guess the document got undeleted at some point again,
                # ignoring..
                self.stderr.write(u'Ignoring undeleted document %s' % document)
                continue

            if not document.current_revision:
                # no current revision found, which means something is fishy
                # but we can't submit it as spam since we don't have a revision
                self.stderr.write(u'Ignoring document %s without current '
                                  u'revision' % document.pk)
                continue

            params = revision_akismet_parameters(document.current_revision)
            if dry_run:
                # we're in dry-run, so let's continue okay?
                self.stdout.write(u'Not submitting current revision %s of '
                                  u'document %s because of dry-mode' %
                                  (document.current_revision.pk, document.pk))
                continue
            try:
                akismet.submit_spam(**params)
            except AkismetError as exc:
                self.stderr.write(u'Akismet error while submitting current '
                                  u'revision %s of document %s: %s' %
                                  (document.current_revision.pk, document.pk,
                                   exc.debug_help))
            else:
                self.stdout.write(u'Successfully submitted current '
                                  u'revision %s of document %s' %
                                  (document.current_revision.pk, document.pk))
                submission = RevisionAkismetSubmission(
                    revision=document.current_revision,
                    sender=sender,
                    type=RevisionAkismetSubmission.SPAM_TYPE,
                )
                submission.save()
Ejemplo n.º 8
0
    def test_spam_trends_stats(self, mock_analytics_upageviews):
        """Test that the correct stats show up on the spam trends dashboard."""
        # Period length
        days_in_week = 7
        days_in_month = 28
        days_in_quarter = 91
        # Dates
        today = datetime.datetime.today()
        yesterday = today - datetime.timedelta(days=1)
        three_days_ago = today - datetime.timedelta(days=3)
        weekly_start_date = today - datetime.timedelta(days=days_in_week)
        ten_days_ago = today - datetime.timedelta(days=10)
        monthly_start_date = today - datetime.timedelta(days=days_in_month)
        thirtyfive_days_ago = today - datetime.timedelta(days=35)
        quarterly_start_date = today - datetime.timedelta(days=days_in_quarter)
        hundred_days_ago = today - datetime.timedelta(days=100)

        # Revisions made by self.testuser: 3 made today, 3 made 3 days ago,
        # 3 made 10 days ago, 3 made 35 days ago, 3 made 100 days ago
        revs = self.create_revisions(num=15, creator=self.testuser, document=self.document)
        for i in range(0, 3):
            revs[i].created = today
        for i in range(3, 6):
            revs[i].created = three_days_ago
        for i in range(6, 9):
            revs[i].created = ten_days_ago
        for i in range(9, 12):
            revs[i].created = thirtyfive_days_ago
        for i in range(12, 15):
            revs[i].created = hundred_days_ago
        for rev in revs:
            rev.save()

        # Published spam by self.testuser
        spam_rev_today = revs[2]
        spam_rev_3_days_ago = revs[5]
        spam_rev_10_days_ago = revs[8]
        spam_rev_35_days_ago = revs[11]
        spam_rev_100_days_ago = revs[14]
        spam_revs = [spam_rev_today, spam_rev_3_days_ago, spam_rev_10_days_ago,
                     spam_rev_35_days_ago, spam_rev_100_days_ago]
        # Summary of spam submissions
        spam_weekly = [spam_rev_3_days_ago]
        spam_monthly = [spam_rev_3_days_ago, spam_rev_10_days_ago]
        spam_quarterly = [spam_rev_3_days_ago, spam_rev_10_days_ago,
                          spam_rev_35_days_ago]
        # All of the spam_revs were published and then marked as spam
        for rev in spam_revs:
            rev.save()
            rev.akismet_submissions.add(RevisionAkismetSubmission(
                sender=self.admin, type="spam")
            )

        # Summary of self.testuser's ham submissions
        ham_weekly = revs[3:5]
        ham_monthly = revs[3:5] + revs[6:8]
        ham_quarterly = revs[3:5] + revs[6:8] + revs[9:11]

        # There were 2 correctly blocked spam attempts 3 days ago (within past week)
        true_blocked_spam_num = 2
        for i in range(0, true_blocked_spam_num):
            document_spam_rev_3_days_ago = DocumentSpamAttempt(
                user=self.testuser,
                title='A spam revision',
                slug='spam-revision-slug',
                document=self.document,
                review=DocumentSpamAttempt.SPAM
            )
            document_spam_rev_3_days_ago.save()
            document_spam_rev_3_days_ago.created = three_days_ago
            document_spam_rev_3_days_ago.save()

        # There was 1 incorrectly blocked spam attempt 3 days ago
        false_blocked_spam_num = 1
        for i in range(0, false_blocked_spam_num):
            document_ham_rev_3_days_ago = DocumentSpamAttempt(
                user=self.testuser,
                title='Not a spam revision',
                slug='ham-revision-slug',
                document=self.document,
                review=DocumentSpamAttempt.HAM
            )
            document_ham_rev_3_days_ago.save()
            document_ham_rev_3_days_ago.created = three_days_ago
            document_ham_rev_3_days_ago.save()

        page_views = {}
        # The spam from 3 days ago was seen 3 times, from 10 days ago see 10 times,
        # and from 35 days ago seen 35 times
        page_views[spam_rev_3_days_ago.id] = 3
        page_views[spam_rev_10_days_ago.id] = 10
        page_views[spam_rev_35_days_ago.id] = 35
        # The mock Google Analytics return values for page views
        mock_analytics_upageviews.return_value = page_views

        self.client.login(username='******', password='******')
        # The first response will say that the report is being processed
        response = self.client.get(reverse('dashboards.spam', locale='en-US'))
        eq_(200, response.status_code)

        response2 = self.client.get(reverse('dashboards.spam', locale='en-US'))
        page = pq(response2.content)

        row_daily = page.find('.spam-trends-table tbody tr')[0].text_content().replace(' ', '').strip('\n').split('\n')
        row_weekly = page.find('.spam-trends-table tbody tr')[1].text_content().replace(' ', '').strip('\n').split('\n')
        row_monthly = page.find('.spam-trends-table tbody tr')[2].text_content().replace(' ', '').strip('\n').split('\n')
        row_quarterly = page.find('.spam-trends-table tbody tr')[3].text_content().replace(' ', '').strip('\n').split('\n')

        # These are the columns in the spam dashboard spam trends table
        period = 0
        start_date = 1
        spam_viewers_change_percent = 2
        spam_viewers = 3
        daily_average_viewers = 4
        published_spam = 5
        blocked_spam = 6
        blocked_ham = 7
        true_positive_rate = 8
        true_negative_rate = 9

        # The periods are identified as 'Daily', 'Weekly', 'Monthly', 'Quarterly'
        eq_(row_daily[period], 'Daily')
        eq_(row_weekly[period], 'Weekly')
        eq_(row_monthly[period], 'Monthly')
        eq_(row_quarterly[period], 'Quarterly')
        # The start dates for each period are correct
        eq_(row_daily[start_date], yesterday.strftime('%Y-%m-%d'))
        eq_(row_weekly[start_date], weekly_start_date.strftime('%Y-%m-%d'))
        eq_(row_monthly[start_date], monthly_start_date.strftime('%Y-%m-%d'))
        eq_(row_quarterly[start_date], quarterly_start_date.strftime('%Y-%m-%d'))
        # The page views during the week, month, quarter
        spam_views_week = page_views[spam_rev_3_days_ago.id]
        spam_views_month = spam_views_week + page_views[spam_rev_10_days_ago.id]
        spam_views_month_exclude_week = page_views[spam_rev_10_days_ago.id]
        spam_views_quarter = spam_views_month + page_views[spam_rev_35_days_ago.id]
        spam_views_quarter_exclude_month = page_views[spam_rev_35_days_ago.id]
        # The percentage change in spam viewers
        weekly_spam_change_percent = '{:.1%}'.format(
            float(spam_views_week - spam_views_month_exclude_week) / spam_views_month_exclude_week
        )
        monthly_spam_change_percent = '{:.1%}'.format(
            float(spam_views_month - spam_views_quarter_exclude_month) / spam_views_quarter_exclude_month
        )
        eq_(row_daily[spam_viewers_change_percent], '0.0%')
        eq_(row_weekly[spam_viewers_change_percent], weekly_spam_change_percent)
        eq_(row_monthly[spam_viewers_change_percent], monthly_spam_change_percent)
        eq_(row_quarterly[spam_viewers_change_percent], '0.0%')
        # The spam viewers
        eq_(int(row_daily[spam_viewers]), 0)
        eq_(int(row_weekly[spam_viewers]), spam_views_week)
        eq_(int(row_monthly[spam_viewers]), spam_views_month)
        eq_(int(row_quarterly[spam_viewers]), spam_views_quarter)
        # The daily average of spam viewers
        eq_(float(row_daily[daily_average_viewers]), 0.0)
        eq_(row_weekly[daily_average_viewers],
            '{:.1f}'.format(float(spam_views_week) / days_in_week))
        eq_(row_monthly[daily_average_viewers],
            '{:.1f}'.format(float(spam_views_month) / days_in_month))
        eq_(row_quarterly[daily_average_viewers],
            '{:.1f}'.format(float(spam_views_quarter) / days_in_quarter))
        # The published spam: 1 this week, 2 this month, 3 this quarter
        eq_(int(row_daily[published_spam]), len([]))
        eq_(int(row_weekly[published_spam]), len(spam_weekly))
        eq_(int(row_monthly[published_spam]), len(spam_monthly))
        eq_(int(row_quarterly[published_spam]), len(spam_quarterly))
        # The blocked spam: there were 2 correctly blocked spam attempts 3 days ago
        eq_(int(row_daily[blocked_spam]), 0)
        eq_(int(row_weekly[blocked_spam]), true_blocked_spam_num)
        eq_(int(row_monthly[blocked_spam]), true_blocked_spam_num)
        eq_(int(row_quarterly[blocked_spam]), true_blocked_spam_num)
        # The blocked ham: there was 1 incorrectly blocked spam attempt 3 days ago
        eq_(int(row_daily[blocked_ham]), 0)
        eq_(int(row_weekly[blocked_ham]), false_blocked_spam_num)
        eq_(int(row_monthly[blocked_ham]), false_blocked_spam_num)
        eq_(int(row_quarterly[blocked_ham]), false_blocked_spam_num)
        # The true positive rate == blocked_spam / total spam
        tpr_weekly = '{:.1%}'.format(
            true_blocked_spam_num / float(true_blocked_spam_num + len(spam_weekly))
        )
        tpr_monthly = '{:.1%}'.format(
            true_blocked_spam_num / float(true_blocked_spam_num + len(spam_monthly))
        )
        tpr_quarterly = '{:.1%}'.format(
            true_blocked_spam_num / float(true_blocked_spam_num + len(spam_quarterly))
        )
        eq_(row_daily[true_positive_rate], '100.0%')
        eq_(row_weekly[true_positive_rate], tpr_weekly)
        eq_(row_monthly[true_positive_rate], tpr_monthly)
        eq_(row_quarterly[true_positive_rate], tpr_quarterly)
        # The true negative rate == published ham / total ham
        tnr_weekly = '{:.1%}'.format(
            len(ham_weekly) / float(false_blocked_spam_num + len(ham_weekly))
        )
        tnr_monthly = '{:.1%}'.format(
            len(ham_monthly) / float(false_blocked_spam_num + len(ham_monthly))
        )
        tnr_quarterly = '{:.1%}'.format(
            len(ham_quarterly) / float(false_blocked_spam_num + len(ham_quarterly))
        )
        eq_(row_daily[true_negative_rate], '100.0%')
        eq_(row_weekly[true_negative_rate], tnr_weekly)
        eq_(row_monthly[true_negative_rate], tnr_monthly)
        eq_(row_quarterly[true_negative_rate], tnr_quarterly)
Ejemplo n.º 9
0
    def handle(self, *args, **options):
        dry_run = options['dry_run']

        # first get the deleted document logs for the last n days
        ttl = timezone.now() - timedelta(days=options['days'])
        logged_deletions = DocumentDeletionLog.objects.filter(
            # They use "spam"
            # deleting spam revisions;
            # the spam makes me cry.  -- willkg
            timestamp__gte=ttl,
            reason__icontains='spam'
        )
        count = logged_deletions.count()
        self.stdout.write('Checking %s deleted document logs' % count)

        sender = get_user_model().objects.get(username=options['username'])
        self.stdout.write('Submitting spam to Akismet as user %s' % sender)

        akismet = Akismet()

        if not akismet.ready:
            raise CommandError('Akismet client is not ready')

        for i, logged_deletion in enumerate(logged_deletions.iterator(), 1):
            self.stdout.write('%d/%d: ' % (i, count), ending='')
            # get the deleted document in question
            document = Document.admin_objects.filter(
                locale=logged_deletion.locale,
                slug=logged_deletion.slug,
            ).first()

            if document is None:
                # no document found with that locale and slug,
                # probably purged at some point
                self.stderr.write('Ignoring locale %s and slug %s' %
                                  (logged_deletion.locale,
                                   logged_deletion.slug))
                continue

            if not document.deleted:
                # guess the document got undeleted at some point again,
                # ignoring..
                self.stderr.write('Ignoring undeleted document %s' % document)
                continue

            if not document.current_revision:
                # no current revision found, which means something is fishy
                # but we can't submit it as spam since we don't have a revision
                self.stderr.write('Ignoring document %s without current '
                                  'revision' % document.pk)
                continue

            akismet_data = AkismetHistoricalData(document.current_revision)
            params = akismet_data.parameters
            if dry_run:
                # we're in dry-run, so let's continue okay?
                self.stdout.write('Not submitting current revision %s of '
                                  'document %s because of dry-mode' %
                                  (document.current_revision.pk, document.pk))
                continue
            try:
                akismet.submit_spam(**params)
            except AkismetError as exc:
                self.stderr.write('Akismet error while submitting current '
                                  'revision %s of document %s: %s' %
                                  (document.current_revision.pk, document.pk,
                                   exc.debug_help))
            else:
                self.stdout.write('Successfully submitted current '
                                  'revision %s of document %s' %
                                  (document.current_revision.pk, document.pk))
                submission = RevisionAkismetSubmission(
                    revision=document.current_revision,
                    sender=sender,
                    type=RevisionAkismetSubmission.SPAM_TYPE,
                )
                submission.save()