def test_combine_group(self): """Test combining two groups.""" g0 = models.Grouping(test_utils.SIMILAR_ARTICLES[0]) g1 = models.Grouping(test_utils.SIMILAR_ARTICLES[1]) g0.combine_group(g1) self.assertEqual(list(test_utils.SIMILAR_ARTICLES[0:2]), list(g0.get_articles()))
def group_articles(article_list=None, debug=False): """Group good articles in the database.""" if article_list is None: article_list = database_reader.get_ungrouped_articles() else: article_list = [ models.Article(url=a) if isinstance(a, (str, unicode)) else a for a in article_list ] groupings = database_reader.get_grouped_articles() no_keyword_grouping = None for index, article in enumerate(article_list): if debug: print "Grouping", index, "out of", len(article_list) if not article.get_keywords(): if no_keyword_grouping is None: # in_database is set to True here because we do not want a no keyword grouping in the database. no_keyword_grouping = models.Grouping(article, in_database=True) else: no_keyword_grouping.add_article(article) continue # Skip the article if the keywords cannot be gotten from it. best_grouping, best_grouping_similarity = None, 0 # Need to make a shallow copy of list for the possibility of combining two of the items in the list. for grouping in groupings[:]: similarity = grouping.best_similarity(article) if similarity > best_grouping_similarity: # If this article has a high similarity with two separate groups, then combine the groups. if best_grouping_similarity > constants.MIN_COMBINE_GROUP_PERCENTAGE: if best_grouping.in_database(): if grouping.in_database(): database_writer.remove_grouping_from_database( grouping) best_grouping.combine_group(grouping) groupings.remove(grouping) else: grouping.combine_group(best_grouping) groupings.remove(best_grouping) best_grouping = grouping best_grouping_similarity = similarity if best_grouping is not None and best_grouping_similarity > constants.MIN_GROUPING_PERCENTAGE: best_grouping.add_article(article) else: groupings.append(models.Grouping(article)) if no_keyword_grouping: groupings.append(no_keyword_grouping) return groupings
def setUp(self): """Set up the class for the tests.""" self._database_name_mock = mock.patch( "server.database_utils.database_name", return_value="mudima_test.db") self._database_name_mock.start() self._database_location = database_utils.database_path( database_utils.database_name()) self._delete_database() self.article = models.Article("example.com", title="Example", keywords=["0", "1"]) self.grouping = models.Grouping(self.article)
def test_clean_database(self): """Test clean database.""" database_writer.write_groups([self.grouping]) self.assertEqual(1, len(database_reader.get_urls())) database_writer.clean_database() self.assertEqual(1, len(database_reader.get_urls())) grouping = models.Grouping( models.Article(url="google.com", publishedAt="2016-10-11T23:41:34Z", keywords=["a"])) database_writer.write_groups([grouping]) self.assertEqual(2, len(database_reader.get_urls())) database_writer.clean_database() self.assertEqual(1, len(database_reader.get_urls()))
def addGrouping(): content = request.get_json() location = models.Grouping(content.get('job_id'), content.get('location_id'), content.get('category_id'), content.get('start_time'), content.get('end_time')) db.session.add(location) db.session.commit() response = { 'status': 200, 'message': "Grouping added to database", } return json.dumps(response, sort_keys=True, indent=4, separators=(',', ': '), default=dateconverter)
def get_grouped_articles(): """Get the items in the database and puts them into Article and Grouping objects.""" with database_utils.DatabaseConnection() as (connection, cursor): cursor.execute( "SELECT name, topic_id, link, article_text, image_url FROM article " "WHERE article_text != '' AND topic_id IS NOT NULL;") groups = {} for item in cursor.fetchall(): name, id, url, article_text, image_url = item article = models.Article(url=url, title=name, text=article_text, urlToImage=image_url, in_database=True) article.set_keywords(_get_article_keywords(url, cursor)) if id in groups: groups.get(id).add_article(article, new_article=False) else: groups[id] = models.Grouping(article, uuid=id, in_database=True, has_new_articles=False) return list(groups.values())