Esempio n. 1
0
 def test_combine_group(self):
     """Test combining two groups."""
     g0 = models.Grouping(test_utils.SIMILAR_ARTICLES[0])
     g1 = models.Grouping(test_utils.SIMILAR_ARTICLES[1])
     g0.combine_group(g1)
     self.assertEqual(list(test_utils.SIMILAR_ARTICLES[0:2]),
                      list(g0.get_articles()))
Esempio n. 2
0
def group_articles(article_list=None, debug=False):
    """Group good articles in the database."""
    if article_list is None:
        article_list = database_reader.get_ungrouped_articles()
    else:
        article_list = [
            models.Article(url=a) if isinstance(a, (str, unicode)) else a
            for a in article_list
        ]
    groupings = database_reader.get_grouped_articles()
    no_keyword_grouping = None
    for index, article in enumerate(article_list):
        if debug:
            print "Grouping", index, "out of", len(article_list)
        if not article.get_keywords():
            if no_keyword_grouping is None:
                # in_database is set to True here because we do not want a no keyword grouping in the database.
                no_keyword_grouping = models.Grouping(article,
                                                      in_database=True)
            else:
                no_keyword_grouping.add_article(article)
            continue  # Skip the article if the keywords cannot be gotten from it.
        best_grouping, best_grouping_similarity = None, 0

        # Need to make a shallow copy of list for the possibility of combining two of the items in the list.
        for grouping in groupings[:]:
            similarity = grouping.best_similarity(article)
            if similarity > best_grouping_similarity:
                # If this article has a high similarity with two separate groups, then combine the groups.
                if best_grouping_similarity > constants.MIN_COMBINE_GROUP_PERCENTAGE:
                    if best_grouping.in_database():
                        if grouping.in_database():
                            database_writer.remove_grouping_from_database(
                                grouping)
                        best_grouping.combine_group(grouping)
                        groupings.remove(grouping)
                    else:
                        grouping.combine_group(best_grouping)
                        groupings.remove(best_grouping)
                best_grouping = grouping
                best_grouping_similarity = similarity
        if best_grouping is not None and best_grouping_similarity > constants.MIN_GROUPING_PERCENTAGE:
            best_grouping.add_article(article)
        else:
            groupings.append(models.Grouping(article))
    if no_keyword_grouping:
        groupings.append(no_keyword_grouping)
    return groupings
Esempio n. 3
0
 def setUp(self):
     """Set up the class for the tests."""
     self._database_name_mock = mock.patch(
         "server.database_utils.database_name",
         return_value="mudima_test.db")
     self._database_name_mock.start()
     self._database_location = database_utils.database_path(
         database_utils.database_name())
     self._delete_database()
     self.article = models.Article("example.com",
                                   title="Example",
                                   keywords=["0", "1"])
     self.grouping = models.Grouping(self.article)
Esempio n. 4
0
 def test_clean_database(self):
     """Test clean database."""
     database_writer.write_groups([self.grouping])
     self.assertEqual(1, len(database_reader.get_urls()))
     database_writer.clean_database()
     self.assertEqual(1, len(database_reader.get_urls()))
     grouping = models.Grouping(
         models.Article(url="google.com",
                        publishedAt="2016-10-11T23:41:34Z",
                        keywords=["a"]))
     database_writer.write_groups([grouping])
     self.assertEqual(2, len(database_reader.get_urls()))
     database_writer.clean_database()
     self.assertEqual(1, len(database_reader.get_urls()))
Esempio n. 5
0
def addGrouping():
    content = request.get_json()
    location = models.Grouping(content.get('job_id'),
                               content.get('location_id'),
                               content.get('category_id'),
                               content.get('start_time'),
                               content.get('end_time'))
    db.session.add(location)
    db.session.commit()
    response = {
        'status': 200,
        'message': "Grouping added to database",
    }
    return json.dumps(response,
                      sort_keys=True,
                      indent=4,
                      separators=(',', ': '),
                      default=dateconverter)
Esempio n. 6
0
def get_grouped_articles():
    """Get the items in the database and puts them into Article and Grouping objects."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        cursor.execute(
            "SELECT name, topic_id, link, article_text, image_url FROM article "
            "WHERE article_text != '' AND topic_id IS NOT NULL;")
        groups = {}
        for item in cursor.fetchall():
            name, id, url, article_text, image_url = item
            article = models.Article(url=url,
                                     title=name,
                                     text=article_text,
                                     urlToImage=image_url,
                                     in_database=True)
            article.set_keywords(_get_article_keywords(url, cursor))
            if id in groups:
                groups.get(id).add_article(article, new_article=False)
            else:
                groups[id] = models.Grouping(article,
                                             uuid=id,
                                             in_database=True,
                                             has_new_articles=False)
        return list(groups.values())