Example #1
0
    def test_changelist_view_returns_metrics_when_accurate_and_inaccurate_newsitems_exist_and_finally_one_of_each_class(self):
        news_item = NewsItem()
        news_item.title = 'foo'
        news_item.score = 1
        news_item.added_at = '2018-12-03 21:00:00+00:00'
        news_item.save()

        corpus = Corpus()
        corpus.positive = False
        corpus.news_item = news_item
        corpus.save()

        news_item = NewsItem()
        news_item.title = 'bar'
        news_item.score = 1
        news_item.added_at = '2018-12-03 22:00:00+00:00'
        news_item.save()

        superuser = self.create_superuser('superuser')
        request = self.mocked_authenticated_request('/admin/rss/newsitemmetric/', superuser)
        response = self.admin.changelist_view(request)

        self.assertEquals(2, response.context_data['news_items_count'])
        self.assertEquals(0, response.context_data['news_items_unclassified'])
        self.assertEquals(2, response.context_data['classification_initial']['positive'])
        self.assertEquals(0, response.context_data['classification_initial']['negative'])
        self.assertEquals(1, response.context_data['classification_supervised']['positive'])
        self.assertEquals(1, response.context_data['classification_supervised']['negative'])
        self.assertEquals(0, response.context_data['corpus_count']['positive'])
        self.assertEquals(1, response.context_data['corpus_count']['negative'])
        self.assertEquals([{'accuracy': 50, 'added_at': '2018-12-03'}], response.context_data['accuracy'])
Example #2
0
    def test_get_accuracy_does_not_include_unclassified_newsitems(self):
        news_item = NewsItem()
        news_item.score = None
        news_item.added_at = '2018-11-24 01:00:00+00:00'
        news_item.save()

        news_item = NewsItem()
        news_item.score = None
        news_item.published = True
        news_item.added_at = '2018-11-24 02:00:00+00:00'
        news_item.save()

        metrics = self.newsitem_metric.get_accuracy(self.date_range)
        self.assertEquals([], metrics)
Example #3
0
    def test_get_accuracy_returns_empty_list_when_no_newsitems_between_range(
            self):
        news_item = NewsItem()
        news_item.score = 1
        news_item.added_at = '2018-11-23 01:00:00+00:00'
        news_item.save()

        news_item = NewsItem()
        news_item.score = 1
        news_item.added_at = '2018-11-26 01:00:00+00:00'
        news_item.save()

        metrics = self.newsitem_metric.get_accuracy(self.date_range)
        self.assertEquals([], metrics)
Example #4
0
    def test_get_accuracy_total_returns_none_when_no_newsitems_between_range(
            self):
        news_item = NewsItem()
        news_item.score = 1
        news_item.added_at = '2018-11-23 01:00:00+00:00'
        news_item.save()

        news_item = NewsItem()
        news_item.score = 1
        news_item.added_at = '2018-11-26 01:00:00+00:00'
        news_item.save()

        accuracy = self.newsitem_metric.get_accuracy_total(self.date_range)
        self.assertEquals(None, accuracy)
Example #5
0
    def test_to_dict_returns_one_dictionary_when_cursor_has_one_result(self):
        source = Source()
        source.id = 1
        source.name = 'foo_source'
        source.save()

        news_item = NewsItem()
        news_item.id = 1
        news_item.title = 'foo'
        news_item.description = 'bar'
        news_item.url = 'https://www.google.com'
        news_item.added_at = '2018-11-23 01:00:00+00:00'
        news_item.source = source
        news_item.published = False
        news_item.score = 1
        news_item.save()

        with connection.cursor() as cursor:
            cursor.execute('SELECT * FROM news_item')
            result = self.newsitem_metric.to_dict(cursor)

        self.assertEquals(1, len(result))
        self.assertEquals([{
            'id': 1,
            'title': 'foo',
            'description': 'bar',
            'url': 'https://www.google.com',
            'added_at': datetime(2018, 11, 23, 1, 0),
            'source_id': 1,
            'published': False,
            'score': 1
        }], result)
Example #6
0
    def test_get_accuracy_total_returns_100_percent_when_no_corpora(self):
        news_item = NewsItem()
        news_item.score = 1
        news_item.added_at = '2018-11-24 01:00:00+00:00'
        news_item.save()

        accuracy = self.newsitem_metric.get_accuracy_total(self.date_range)
        self.assertEquals(100, accuracy)
Example #7
0
    def test_get_accuracy_total_uses_two_day_statistics_when_exist(self):
        news_item = NewsItem()
        news_item.score = 1
        news_item.added_at = '2018-11-24 01:00:00+00:00'
        news_item.save()

        corpus = Corpus()
        corpus.news_item = news_item
        corpus.positive = False
        corpus.save()

        news_item = NewsItem()
        news_item.score = 1
        news_item.added_at = '2018-11-25 01:00:00+00:00'
        news_item.save()

        accuracy = self.newsitem_metric.get_accuracy_total(self.date_range)
        self.assertEquals(50.0, accuracy)
Example #8
0
    def test_to_dict_returns_dictionaries_when_cursor_more_than_one_results(
            self):
        news_item = NewsItem()
        news_item.score = 1
        news_item.added_at = '2018-11-23 01:00:00+00:00'
        news_item.save()

        news_item = NewsItem()
        news_item.score = 1
        news_item.added_at = '2018-11-23 02:00:00+00:00'
        news_item.save()

        with connection.cursor() as cursor:
            cursor.execute('SELECT * FROM news_item')
            results = self.newsitem_metric.to_dict(cursor)

        self.assertEquals(2, len(results))
        self.assertEquals(dict, type(results[0]))
        self.assertEquals(dict, type(results[1]))
Example #9
0
    def test_get_accuracy_total_returns_50_percent_when_one_accurate_newsitem_and_one_not_accurate(
            self):
        news_item = NewsItem()
        news_item.score = 1
        news_item.added_at = '2018-11-24 01:00:00+00:00'
        news_item.save()

        corpus = Corpus()
        corpus.news_item = news_item
        corpus.positive = False
        corpus.save()

        news_item = NewsItem()
        news_item.score = 1
        news_item.added_at = '2018-11-24 02:00:00+00:00'
        news_item.save()

        accuracy = self.newsitem_metric.get_accuracy_total(self.date_range)
        self.assertEquals(50.0, accuracy)
Example #10
0
    def test_get_accuracy_returns_100_percent_when_no_corpora(self):
        news_item = NewsItem()
        news_item.score = 1
        news_item.added_at = '2018-11-24 01:00:00+00:00'
        news_item.save()

        metrics = self.newsitem_metric.get_accuracy(self.date_range)
        self.assertEquals(1, len(metrics))
        self.assertEquals(100, metrics[0]['accuracy'])
        self.assertEquals('2018-11-24', metrics[0]['added_at'])
Example #11
0
    def test_get_accuracy_returns_50_percent_when_one_accurate_newsitem_and_one_not_accurate(
            self):
        news_item = NewsItem()
        news_item.score = 1
        news_item.added_at = '2018-11-24 01:00:00+00:00'
        news_item.save()

        corpus = Corpus()
        corpus.news_item = news_item
        corpus.positive = False
        corpus.save()

        news_item = NewsItem()
        news_item.score = 1
        news_item.added_at = '2018-11-24 02:00:00+00:00'
        news_item.save()

        metrics = self.newsitem_metric.get_accuracy(self.date_range)
        self.assertEquals(1, len(metrics))
        self.assertEquals(50, metrics[0]['accuracy'])
        self.assertEquals('2018-11-24', metrics[0]['added_at'])
Example #12
0
    def test_get_accuracy_returns_two_day_statistics_when_newsitems_for_two_days_exist(
            self):
        news_item = NewsItem()
        news_item.score = 1
        news_item.added_at = '2018-11-24 01:00:00+00:00'
        news_item.save()

        corpus = Corpus()
        corpus.news_item = news_item
        corpus.positive = False
        corpus.save()

        news_item = NewsItem()
        news_item.score = 1
        news_item.added_at = '2018-11-25 01:00:00+00:00'
        news_item.save()

        metrics = self.newsitem_metric.get_accuracy(self.date_range)
        self.assertEquals(2, len(metrics))
        self.assertEquals('2018-11-24', metrics[0]['added_at'])
        self.assertEquals(0, metrics[0]['accuracy'])
        self.assertEquals('2018-11-25', metrics[1]['added_at'])
        self.assertEquals(100, metrics[1]['accuracy'])
Example #13
0
    def crawl(self, source, channel):
        source.crawling()

        self.logger.info('Crawling \'%s\'...', source.name)

        try:
            feedparser.USER_AGENT = settings.RSS_CRAWL_USER_AGENT
            feed = feedparser.parse(source.url)
        except RuntimeError:
            self.logger.error('Could not crawl \'%s\'.', source.name)
            return

        for entry in feed['entries']:
            if 'published' in entry:
                pass
            elif 'updated' in entry:
                entry['published'] = entry['updated']
            else:
                entry['published'] = timezone.now().isoformat()

            if NewsItem.exists(entry['title'], parse(entry['published']),
                               source):
                continue

            description = entry['summary'] if 'summary' in entry else entry[
                'title']

            news_item = NewsItem()
            news_item.title = entry['title']
            news_item.description = description
            news_item.url = entry['link']
            news_item.source = source
            news_item.score = None
            news_item.added_at = parse(entry['published'])
            news_item.save()

            body = serializers.serialize('json', [news_item])
            channel.basic_publish(exchange='',
                                  routing_key=settings.QUEUE_NAME_CLASSIFY,
                                  body=body,
                                  properties=pika.BasicProperties(
                                      delivery_mode=2,
                                      headers={'x-is-self-train': False}))

        source.crawled()

        self.logger.info('Successfully crawled \'%s\'!', source.name)
Example #14
0
    def test_news_item_publish_and_corpus_create_negative_publishes_newsitems_and_creates_negative_corpora_when_newsitems_in_query_set(
            self):
        news_item = NewsItem()
        news_item.title = 'foo'
        news_item.publshed = True
        news_item.score = 1.00
        news_item.save()

        query_set = [news_item]
        self.newsitem.news_item_unpublish_and_corpus_create_negative(
            None, None, query_set)

        self.newsitem.enqueue_corpus_creation.assert_called_once()
        news_items = NewsItem.objects.all()
        self.assertEquals(1, len(news_items))
        self.assertFalse(news_items[0].published)
        corpus = Corpus.objects.filter(news_item=news_items[0])
        self.assertNotEquals(None, corpus)
        self.assertFalse(corpus[0].positive)
Example #15
0
    def test_news_returns_http_response_with_template_and_positive_newsitems_when_positive_newsitems_exist(self):
        # create a positive news item
        news_item = NewsItem()
        news_item.title = 'foo'
        news_item.score = 1
        news_item.published = True
        news_item.save()

        # make request to news view
        response = self.get_news()

        # returns an instance of HttpResponse
        self.assertTrue(type(response) is HttpResponse)

        # request didn't fail
        self.assertEquals(200, response.status_code)

        # response contains news items
        content = response.getvalue()
        self.assertTrue('news-item' in str(content))
Example #16
0
    def test_changelist_view_returns_metrics_when_unclassified_newsitems_exist_and_no_corpora(self):
        news_item = NewsItem()
        news_item.title = 'foo'
        news_item.score = None
        news_item.added_at = '2018-12-03 21:00:00+00:00'
        news_item.save()

        superuser = self.create_superuser('superuser')
        request = self.mocked_authenticated_request('/admin/rss/newsitemmetric/', superuser)
        response = self.admin.changelist_view(request)

        self.assertEquals(1, response.context_data['news_items_count'])
        self.assertEquals(1, response.context_data['news_items_unclassified'])
        self.assertEquals(0, response.context_data['classification_initial']['positive'])
        self.assertEquals(0, response.context_data['classification_initial']['negative'])
        self.assertEquals(0, response.context_data['classification_supervised']['positive'])
        self.assertEquals(0, response.context_data['classification_supervised']['negative'])
        self.assertEquals(0, response.context_data['corpus_count']['positive'])
        self.assertEquals(0, response.context_data['corpus_count']['negative'])
        self.assertEquals([], response.context_data['accuracy'])
Example #17
0
    def test_changelist_view_returns_metrics_when_newsitems_exist_but_no_corpora_and_date_query_includes_newsitem(self):
        news_item = NewsItem()
        news_item.title = 'foo'
        news_item.score = 1
        news_item.added_at = '2018-12-03 21:00:00+00:00'
        news_item.save()

        superuser = self.create_superuser('superuser')
        request = self.mocked_authenticated_request('/admin/rss/newsitemmetric/?added_at__month=12&added_at__year=2018', superuser)
        response = self.admin.changelist_view(request)

        self.assertEquals(1, response.context_data['news_items_count'])
        self.assertEquals(0, response.context_data['news_items_unclassified'])
        self.assertEquals(1, response.context_data['classification_initial']['positive'])
        self.assertEquals(0, response.context_data['classification_initial']['negative'])
        self.assertEquals(1, response.context_data['classification_supervised']['positive'])
        self.assertEquals(0, response.context_data['classification_supervised']['negative'])
        self.assertEquals(0, response.context_data['corpus_count']['positive'])
        self.assertEquals(0, response.context_data['corpus_count']['negative'])
        self.assertEquals([{'accuracy': 100.0, 'added_at': '2018-12-03'}], response.context_data['accuracy'])
Example #18
0
    def test_about_returns_http_response_with_stats(self):
        # create a news item
        news_item = NewsItem()
        news_item.title = 'foo'
        news_item.score = 1
        news_item.published = False
        news_item.save()

        # create an active source
        source = Source()
        source.name = 'foo'
        source.url = 'http://www.foo.com'
        source.homepage = 'http://www.foo.com'
        source.save()

        # create an inactive source
        source = Source()
        source.name = 'bar'
        source.url = 'http://www.bar.com'
        source.homepage = 'http://www.bar.com'
        source.active = False
        source.save()

        # make request to news view
        response = self.get_about()

        # returns an instance of HttpResponse
        self.assertTrue(type(response) is HttpResponse)

        # request didn't fail
        self.assertEquals(200, response.status_code)

        # response does not contain any news items
        content = response.getvalue()
        self.assertTrue('Sources crawled</strong>: 1' in str(content))
        self.assertTrue('News classified</strong>: 1' in str(content))
        self.assertTrue('Corpora created</strong>: 0' in str(content))
        self.assertTrue('Classification accuracy</strong>: 100%' in str(content))