Esempio n. 1
0
    def test_spam_trends_stats(self, mock_analytics_upageviews):
        """Test that the correct stats show up on the spam trends dashboard."""
        # Period length
        days_in_week = 7
        days_in_month = 28
        days_in_quarter = 91
        # Dates
        today = datetime.datetime.today()
        yesterday = today - datetime.timedelta(days=1)
        three_days_ago = today - datetime.timedelta(days=3)
        weekly_start_date = today - datetime.timedelta(days=days_in_week)
        ten_days_ago = today - datetime.timedelta(days=10)
        monthly_start_date = today - datetime.timedelta(days=days_in_month)
        thirtyfive_days_ago = today - datetime.timedelta(days=35)
        quarterly_start_date = today - datetime.timedelta(days=days_in_quarter)
        hundred_days_ago = today - datetime.timedelta(days=100)

        # Revisions made by self.testuser: 3 made today, 3 made 3 days ago,
        # 3 made 10 days ago, 3 made 35 days ago, 3 made 100 days ago
        revs = self.create_revisions(num=15, creator=self.testuser, document=self.document)
        for i in range(0, 3):
            revs[i].created = today
        for i in range(3, 6):
            revs[i].created = three_days_ago
        for i in range(6, 9):
            revs[i].created = ten_days_ago
        for i in range(9, 12):
            revs[i].created = thirtyfive_days_ago
        for i in range(12, 15):
            revs[i].created = hundred_days_ago
        for rev in revs:
            rev.save()

        # Published spam by self.testuser
        spam_rev_today = revs[2]
        spam_rev_3_days_ago = revs[5]
        spam_rev_10_days_ago = revs[8]
        spam_rev_35_days_ago = revs[11]
        spam_rev_100_days_ago = revs[14]
        spam_revs = [spam_rev_today, spam_rev_3_days_ago, spam_rev_10_days_ago,
                     spam_rev_35_days_ago, spam_rev_100_days_ago]
        # Summary of spam submissions
        spam_weekly = [spam_rev_3_days_ago]
        spam_monthly = [spam_rev_3_days_ago, spam_rev_10_days_ago]
        spam_quarterly = [spam_rev_3_days_ago, spam_rev_10_days_ago,
                          spam_rev_35_days_ago]
        # All of the spam_revs were published and then marked as spam
        for rev in spam_revs:
            rev.save()
            rev.akismet_submissions.add(RevisionAkismetSubmission(
                sender=self.admin, type="spam")
            )

        # Summary of self.testuser's ham submissions
        ham_weekly = revs[3:5]
        ham_monthly = revs[3:5] + revs[6:8]
        ham_quarterly = revs[3:5] + revs[6:8] + revs[9:11]

        # There were 2 correctly blocked spam attempts 3 days ago (within past week)
        true_blocked_spam_num = 2
        for i in range(0, true_blocked_spam_num):
            document_spam_rev_3_days_ago = DocumentSpamAttempt(
                user=self.testuser,
                title='A spam revision',
                slug='spam-revision-slug',
                document=self.document,
                review=DocumentSpamAttempt.SPAM
            )
            document_spam_rev_3_days_ago.save()
            document_spam_rev_3_days_ago.created = three_days_ago
            document_spam_rev_3_days_ago.save()

        # There was 1 incorrectly blocked spam attempt 3 days ago
        false_blocked_spam_num = 1
        for i in range(0, false_blocked_spam_num):
            document_ham_rev_3_days_ago = DocumentSpamAttempt(
                user=self.testuser,
                title='Not a spam revision',
                slug='ham-revision-slug',
                document=self.document,
                review=DocumentSpamAttempt.HAM
            )
            document_ham_rev_3_days_ago.save()
            document_ham_rev_3_days_ago.created = three_days_ago
            document_ham_rev_3_days_ago.save()

        page_views = {}
        # The spam from 3 days ago was seen 3 times, from 10 days ago see 10 times,
        # and from 35 days ago seen 35 times
        page_views[spam_rev_3_days_ago.id] = 3
        page_views[spam_rev_10_days_ago.id] = 10
        page_views[spam_rev_35_days_ago.id] = 35
        # The mock Google Analytics return values for page views
        mock_analytics_upageviews.return_value = page_views

        self.client.login(username='******', password='******')
        # The first response will say that the report is being processed
        response = self.client.get(reverse('dashboards.spam', locale='en-US'))
        eq_(200, response.status_code)

        response2 = self.client.get(reverse('dashboards.spam', locale='en-US'))
        page = pq(response2.content)

        row_daily = page.find('.spam-trends-table tbody tr')[0].text_content().replace(' ', '').strip('\n').split('\n')
        row_weekly = page.find('.spam-trends-table tbody tr')[1].text_content().replace(' ', '').strip('\n').split('\n')
        row_monthly = page.find('.spam-trends-table tbody tr')[2].text_content().replace(' ', '').strip('\n').split('\n')
        row_quarterly = page.find('.spam-trends-table tbody tr')[3].text_content().replace(' ', '').strip('\n').split('\n')

        # These are the columns in the spam dashboard spam trends table
        period = 0
        start_date = 1
        spam_viewers_change_percent = 2
        spam_viewers = 3
        daily_average_viewers = 4
        published_spam = 5
        blocked_spam = 6
        blocked_ham = 7
        true_positive_rate = 8
        true_negative_rate = 9

        # The periods are identified as 'Daily', 'Weekly', 'Monthly', 'Quarterly'
        eq_(row_daily[period], 'Daily')
        eq_(row_weekly[period], 'Weekly')
        eq_(row_monthly[period], 'Monthly')
        eq_(row_quarterly[period], 'Quarterly')
        # The start dates for each period are correct
        eq_(row_daily[start_date], yesterday.strftime('%Y-%m-%d'))
        eq_(row_weekly[start_date], weekly_start_date.strftime('%Y-%m-%d'))
        eq_(row_monthly[start_date], monthly_start_date.strftime('%Y-%m-%d'))
        eq_(row_quarterly[start_date], quarterly_start_date.strftime('%Y-%m-%d'))
        # The page views during the week, month, quarter
        spam_views_week = page_views[spam_rev_3_days_ago.id]
        spam_views_month = spam_views_week + page_views[spam_rev_10_days_ago.id]
        spam_views_month_exclude_week = page_views[spam_rev_10_days_ago.id]
        spam_views_quarter = spam_views_month + page_views[spam_rev_35_days_ago.id]
        spam_views_quarter_exclude_month = page_views[spam_rev_35_days_ago.id]
        # The percentage change in spam viewers
        weekly_spam_change_percent = '{:.1%}'.format(
            float(spam_views_week - spam_views_month_exclude_week) / spam_views_month_exclude_week
        )
        monthly_spam_change_percent = '{:.1%}'.format(
            float(spam_views_month - spam_views_quarter_exclude_month) / spam_views_quarter_exclude_month
        )
        eq_(row_daily[spam_viewers_change_percent], '0.0%')
        eq_(row_weekly[spam_viewers_change_percent], weekly_spam_change_percent)
        eq_(row_monthly[spam_viewers_change_percent], monthly_spam_change_percent)
        eq_(row_quarterly[spam_viewers_change_percent], '0.0%')
        # The spam viewers
        eq_(int(row_daily[spam_viewers]), 0)
        eq_(int(row_weekly[spam_viewers]), spam_views_week)
        eq_(int(row_monthly[spam_viewers]), spam_views_month)
        eq_(int(row_quarterly[spam_viewers]), spam_views_quarter)
        # The daily average of spam viewers
        eq_(float(row_daily[daily_average_viewers]), 0.0)
        eq_(row_weekly[daily_average_viewers],
            '{:.1f}'.format(float(spam_views_week) / days_in_week))
        eq_(row_monthly[daily_average_viewers],
            '{:.1f}'.format(float(spam_views_month) / days_in_month))
        eq_(row_quarterly[daily_average_viewers],
            '{:.1f}'.format(float(spam_views_quarter) / days_in_quarter))
        # The published spam: 1 this week, 2 this month, 3 this quarter
        eq_(int(row_daily[published_spam]), len([]))
        eq_(int(row_weekly[published_spam]), len(spam_weekly))
        eq_(int(row_monthly[published_spam]), len(spam_monthly))
        eq_(int(row_quarterly[published_spam]), len(spam_quarterly))
        # The blocked spam: there were 2 correctly blocked spam attempts 3 days ago
        eq_(int(row_daily[blocked_spam]), 0)
        eq_(int(row_weekly[blocked_spam]), true_blocked_spam_num)
        eq_(int(row_monthly[blocked_spam]), true_blocked_spam_num)
        eq_(int(row_quarterly[blocked_spam]), true_blocked_spam_num)
        # The blocked ham: there was 1 incorrectly blocked spam attempt 3 days ago
        eq_(int(row_daily[blocked_ham]), 0)
        eq_(int(row_weekly[blocked_ham]), false_blocked_spam_num)
        eq_(int(row_monthly[blocked_ham]), false_blocked_spam_num)
        eq_(int(row_quarterly[blocked_ham]), false_blocked_spam_num)
        # The true positive rate == blocked_spam / total spam
        tpr_weekly = '{:.1%}'.format(
            true_blocked_spam_num / float(true_blocked_spam_num + len(spam_weekly))
        )
        tpr_monthly = '{:.1%}'.format(
            true_blocked_spam_num / float(true_blocked_spam_num + len(spam_monthly))
        )
        tpr_quarterly = '{:.1%}'.format(
            true_blocked_spam_num / float(true_blocked_spam_num + len(spam_quarterly))
        )
        eq_(row_daily[true_positive_rate], '100.0%')
        eq_(row_weekly[true_positive_rate], tpr_weekly)
        eq_(row_monthly[true_positive_rate], tpr_monthly)
        eq_(row_quarterly[true_positive_rate], tpr_quarterly)
        # The true negative rate == published ham / total ham
        tnr_weekly = '{:.1%}'.format(
            len(ham_weekly) / float(false_blocked_spam_num + len(ham_weekly))
        )
        tnr_monthly = '{:.1%}'.format(
            len(ham_monthly) / float(false_blocked_spam_num + len(ham_monthly))
        )
        tnr_quarterly = '{:.1%}'.format(
            len(ham_quarterly) / float(false_blocked_spam_num + len(ham_quarterly))
        )
        eq_(row_daily[true_negative_rate], '100.0%')
        eq_(row_weekly[true_negative_rate], tnr_weekly)
        eq_(row_monthly[true_negative_rate], tnr_monthly)
        eq_(row_quarterly[true_negative_rate], tnr_quarterly)
Esempio n. 2
0
    def test_spam_trends_stats(self, mock_analytics_upageviews):
        """Test that the correct stats show up on the spam trends dashboard."""
        # Period length
        days_in_week = 7
        days_in_month = 28
        days_in_quarter = 91
        # Dates
        today = datetime.datetime.today()
        yesterday = today - datetime.timedelta(days=1)
        three_days_ago = today - datetime.timedelta(days=3)
        weekly_start_date = today - datetime.timedelta(days=days_in_week)
        ten_days_ago = today - datetime.timedelta(days=10)
        monthly_start_date = today - datetime.timedelta(days=days_in_month)
        thirtyfive_days_ago = today - datetime.timedelta(days=35)
        quarterly_start_date = today - datetime.timedelta(days=days_in_quarter)
        hundred_days_ago = today - datetime.timedelta(days=100)

        # Revisions made by self.testuser: 3 made today, 3 made 3 days ago,
        # 3 made 10 days ago, 3 made 35 days ago, 3 made 100 days ago
        revs = self.create_revisions(num=15,
                                     creator=self.testuser,
                                     document=self.document)
        for i in range(0, 3):
            revs[i].created = today
        for i in range(3, 6):
            revs[i].created = three_days_ago
        for i in range(6, 9):
            revs[i].created = ten_days_ago
        for i in range(9, 12):
            revs[i].created = thirtyfive_days_ago
        for i in range(12, 15):
            revs[i].created = hundred_days_ago
        for rev in revs:
            rev.save()

        # Published spam by self.testuser
        spam_rev_today = revs[2]
        spam_rev_3_days_ago = revs[5]
        spam_rev_10_days_ago = revs[8]
        spam_rev_35_days_ago = revs[11]
        spam_rev_100_days_ago = revs[14]
        spam_revs = [
            spam_rev_today,
            spam_rev_3_days_ago,
            spam_rev_10_days_ago,
            spam_rev_35_days_ago,
            spam_rev_100_days_ago,
        ]
        # Summary of spam submissions
        spam_weekly = [spam_rev_3_days_ago]
        spam_monthly = [spam_rev_3_days_ago, spam_rev_10_days_ago]
        spam_quarterly = [
            spam_rev_3_days_ago,
            spam_rev_10_days_ago,
            spam_rev_35_days_ago,
        ]
        # All of the spam_revs were published and then marked as spam
        for rev in spam_revs:
            rev.save()
            rev.akismet_submissions.create(sender=self.admin, type="spam")

        # Summary of self.testuser's ham submissions
        ham_weekly = revs[3:5]
        ham_monthly = revs[3:5] + revs[6:8]
        ham_quarterly = revs[3:5] + revs[6:8] + revs[9:11]

        # There were 2 correctly blocked spam attempts 3 days ago (within past week)
        true_blocked_spam_num = 2
        for i in range(0, true_blocked_spam_num):
            document_spam_rev_3_days_ago = DocumentSpamAttempt(
                user=self.testuser,
                title="A spam revision",
                slug="spam-revision-slug",
                document=self.document,
                review=DocumentSpamAttempt.SPAM,
            )
            document_spam_rev_3_days_ago.save()
            document_spam_rev_3_days_ago.created = three_days_ago
            document_spam_rev_3_days_ago.save()

        # There was 1 incorrectly blocked spam attempt 3 days ago
        false_blocked_spam_num = 1
        for i in range(0, false_blocked_spam_num):
            document_ham_rev_3_days_ago = DocumentSpamAttempt(
                user=self.testuser,
                title="Not a spam revision",
                slug="ham-revision-slug",
                document=self.document,
                review=DocumentSpamAttempt.HAM,
            )
            document_ham_rev_3_days_ago.save()
            document_ham_rev_3_days_ago.created = three_days_ago
            document_ham_rev_3_days_ago.save()

        page_views = {}
        # The spam from 3 days ago was seen 3 times, from 10 days ago see 10 times,
        # and from 35 days ago seen 35 times
        page_views[spam_rev_3_days_ago.id] = 3
        page_views[spam_rev_10_days_ago.id] = 10
        page_views[spam_rev_35_days_ago.id] = 35
        # The mock Google Analytics return values for page views
        mock_analytics_upageviews.return_value = page_views

        self.client.login(username="******", password="******")
        # The first response will say that the report is being processed
        response = self.client.get(reverse("dashboards.spam"),
                                   HTTP_HOST=settings.WIKI_HOST)
        assert 200 == response.status_code

        response2 = self.client.get(reverse("dashboards.spam"),
                                    HTTP_HOST=settings.WIKI_HOST)
        page = pq(response2.content)

        row_daily = (
            page.find(".spam-trends-table tbody tr")[0].text_content().replace(
                " ", "").strip("\n").split("\n"))
        row_weekly = (
            page.find(".spam-trends-table tbody tr")[1].text_content().replace(
                " ", "").strip("\n").split("\n"))
        row_monthly = (
            page.find(".spam-trends-table tbody tr")[2].text_content().replace(
                " ", "").strip("\n").split("\n"))
        row_quarterly = (
            page.find(".spam-trends-table tbody tr")[3].text_content().replace(
                " ", "").strip("\n").split("\n"))

        # These are the columns in the spam dashboard spam trends table
        period = 0
        start_date = 1
        spam_viewers_change_percent = 2
        spam_viewers = 3
        daily_average_viewers = 4
        published_spam = 5
        blocked_spam = 6
        blocked_ham = 7
        true_positive_rate = 8
        true_negative_rate = 9

        # The periods are identified as 'Daily', 'Weekly', 'Monthly', 'Quarterly'
        assert "Daily" == row_daily[period]
        assert "Weekly" == row_weekly[period]
        assert "Monthly" == row_monthly[period]
        assert "Quarterly" == row_quarterly[period]
        # The start dates for each period are correct
        assert yesterday.strftime("%Y-%m-%d") == row_daily[start_date]
        assert weekly_start_date.strftime("%Y-%m-%d") == row_weekly[start_date]
        assert monthly_start_date.strftime(
            "%Y-%m-%d") == row_monthly[start_date]
        assert quarterly_start_date.strftime(
            "%Y-%m-%d") == row_quarterly[start_date]
        # The page views during the week, month, quarter
        spam_views_week = page_views[spam_rev_3_days_ago.id]
        spam_views_month = spam_views_week + page_views[
            spam_rev_10_days_ago.id]
        spam_views_month_exclude_week = page_views[spam_rev_10_days_ago.id]
        spam_views_quarter = spam_views_month + page_views[
            spam_rev_35_days_ago.id]
        spam_views_quarter_exclude_month = page_views[spam_rev_35_days_ago.id]
        # The percentage change in spam viewers
        weekly_spam_change_percent = "{:.1%}".format(
            float(spam_views_week - spam_views_month_exclude_week) /
            spam_views_month_exclude_week)
        monthly_spam_change_percent = "{:.1%}".format(
            float(spam_views_month - spam_views_quarter_exclude_month) /
            spam_views_quarter_exclude_month)
        assert "0.0%" == row_daily[spam_viewers_change_percent]
        assert weekly_spam_change_percent == row_weekly[
            spam_viewers_change_percent]
        assert monthly_spam_change_percent == row_monthly[
            spam_viewers_change_percent]
        assert "0.0%" == row_quarterly[spam_viewers_change_percent]
        # The spam viewers
        assert 0 == int(row_daily[spam_viewers])
        assert spam_views_week == int(row_weekly[spam_viewers])
        assert spam_views_month == int(row_monthly[spam_viewers])
        assert spam_views_quarter == int(row_quarterly[spam_viewers])
        # The daily average of spam viewers
        assert float(row_daily[daily_average_viewers]) == 0.0
        assert row_weekly[daily_average_viewers] == "{:.1f}".format(
            float(spam_views_week) / days_in_week)
        assert row_monthly[daily_average_viewers] == "{:.1f}".format(
            float(spam_views_month) / days_in_month)
        assert row_quarterly[daily_average_viewers] == "{:.1f}".format(
            float(spam_views_quarter) / days_in_quarter)
        # The published spam: 1 this week, 2 this month, 3 this quarter
        assert not int(row_daily[published_spam])
        assert len(spam_weekly) == int(row_weekly[published_spam])
        assert len(spam_monthly) == int(row_monthly[published_spam])
        assert len(spam_quarterly) == int(row_quarterly[published_spam])
        # The blocked spam: there were 2 correctly blocked spam attempts 3 days ago
        assert 0 == int(row_daily[blocked_spam])
        assert true_blocked_spam_num == int(row_weekly[blocked_spam])
        assert true_blocked_spam_num == int(row_monthly[blocked_spam])
        assert true_blocked_spam_num == int(row_quarterly[blocked_spam])
        # The blocked ham: there was 1 incorrectly blocked spam attempt 3 days ago
        assert 0 == int(row_daily[blocked_ham])
        assert false_blocked_spam_num == int(row_weekly[blocked_ham])
        assert false_blocked_spam_num == int(row_monthly[blocked_ham])
        assert false_blocked_spam_num == int(row_quarterly[blocked_ham])
        # The true positive rate == blocked_spam / total spam
        tpr_weekly = "{:.1%}".format(
            true_blocked_spam_num /
            float(true_blocked_spam_num + len(spam_weekly)))
        tpr_monthly = "{:.1%}".format(
            true_blocked_spam_num /
            float(true_blocked_spam_num + len(spam_monthly)))
        tpr_quarterly = "{:.1%}".format(
            true_blocked_spam_num /
            float(true_blocked_spam_num + len(spam_quarterly)))
        assert "100.0%" == row_daily[true_positive_rate]
        assert tpr_weekly == row_weekly[true_positive_rate]
        assert tpr_monthly == row_monthly[true_positive_rate]
        assert tpr_quarterly == row_quarterly[true_positive_rate]
        # The true negative rate == published ham / total ham
        tnr_weekly = "{:.1%}".format(
            len(ham_weekly) / float(false_blocked_spam_num + len(ham_weekly)))
        tnr_monthly = "{:.1%}".format(
            len(ham_monthly) /
            float(false_blocked_spam_num + len(ham_monthly)))
        tnr_quarterly = "{:.1%}".format(
            len(ham_quarterly) /
            float(false_blocked_spam_num + len(ham_quarterly)))
        assert "100.0%" == row_daily[true_negative_rate]
        assert tnr_weekly == row_weekly[true_negative_rate]
        assert tnr_monthly == row_monthly[true_negative_rate]
        assert tnr_quarterly == row_quarterly[true_negative_rate]