def test_get_topics(self): """Test getting topics from the database.""" self.assertEqual(0, database_reader.get_number_topics()) self.assertEqual(set(database_reader.get_topics()), set()) groups = classifier.group_articles(test_utils.SIMILAR_ARTICLES) database_writer.write_groups(groups) self.assertEqual(database_reader.get_topics()[0]["title"], groups[0].get_title())
def test_get_grouped_articles(self): """Test getting grouped articles from the database.""" self.assertEqual(database_reader.get_grouped_articles(), []) groups = classifier.group_articles(test_utils.SIMILAR_ARTICLES) database_writer.write_articles(test_utils.SIMILAR_ARTICLES) database_writer.write_groups(groups) self.assertEqual(database_reader.get_grouped_articles()[0], groups[0])
def test_get_urls(self): """Test getting urls from the database.""" self.assertEqual(set(database_reader.get_urls()), set()) groups = classifier.group_articles(test_utils.SIMILAR_ARTICLES) database_writer.write_groups(groups) self.assertEqual( set(database_reader.get_urls()), set(model.get_url() for model in test_utils.SIMILAR_ARTICLES))
def test_write_read_similar(self): """Test writing then reading similar articles.""" self.assertEqual(0, database_reader.get_number_topics()) groups = classifier.group_articles(test_utils.SIMILAR_ARTICLES) database_writer.write_groups(groups) stories = database_reader.get_stories_for_topic(groups[0].get_uuid()) stories = set(a.get('link') for a in stories.get('articles')) self.assertEqual(stories, set(model.get_url() for model in test_utils.SIMILAR_ARTICLES)) self.assertEqual(1, database_reader.get_number_topics())
def test_keywordless_articles(self): """Tests that keywordless articles are put into a separate grouping.""" articles = [models.Article(url="example.com", keywords=[]), models.Article(url="test.com", keywords=[])] articles.extend(test_utils.SIMILAR_ARTICLES) groups = classifier.group_articles(articles) self.assertEqual(2, len(groups)) for group in groups: if len(group.get_articles()[0].get_keywords()) == 0: self.assertTrue(group.in_database())
def update_database(): """Update the database with all the headlines from get_top_headlines.""" database_writer.clean_database() articles = get_top_headlines(add_category_information=True) urls_in_database = database_reader.get_urls() articles = [article for article in articles if article.get_url() not in urls_in_database] database_writer.write_articles(articles, ) grouped = classifier.group_articles() database_writer.write_groups(grouped) database_writer.write_group_fits() database_writer.write_overall_fits() if database_reader.get_number_articles_without_overall_fit() > constants.ARTICLES_NEEDED_BEFORE_ALL_FIT_UPDATED: print "Not enough new articles" database_writer.update_topic_pictures()
def test_dissimilar_urls(self): """Test that dissimilar urls do not get grouped together.""" self.assertEqual(2, len(classifier.group_articles(test_utils.DISSIMILAR_ARTICLES)))
def test_similar_urls_one_in_database(self): """Test that similar urls get grouped together when one is already in the database.""" groupings = classifier.group_articles([test_utils.SIMILAR_ARTICLES[0]]) database_writer.write_groups(groupings) self.assertEqual(1, len(classifier.group_articles([test_utils.SIMILAR_ARTICLES[1]])))
def test_similar_urls(self): """Test that similar urls get grouped together.""" self.assertEqual(1, len(classifier.group_articles(test_utils.SIMILAR_ARTICLES)))