예제 #1
0
def get_stories_for_topic(topic_id):
    """Get all of the stories for the topic with the given topic id. Returns empty dict if topic not in database."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        cursor.execute("SELECT name FROM topic WHERE id=?", (topic_id, ))
        db_item = cursor.fetchone()
        if db_item is not None:
            title = db_item[0]
            cursor.execute(
                "SELECT name, link, image_url, group_fit_x, group_fit_y, popularity, source, favicon "
                "FROM article WHERE topic_id=?", (topic_id, ))
            items = cursor.fetchall()
        else:
            title, items = None, []
        return {
            "title":
            title,
            "articles": [{
                "name": item[0],
                "link": item[1],
                "image": item[2],
                "x": item[3],
                "y": item[4],
                "popularity": item[5],
                "source": item[6],
                "favicon": item[7]
            } for item in items]
        }
예제 #2
0
def remove_grouping_from_database(grouping):
    """Remove the given grouping from the database with its associated articles."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        _remove_group_ids_from_database(grouping.get_uuid())
        grouping.set_in_database(False)
        for article in grouping.get_articles():
            article.set_in_database(False)
예제 #3
0
def mark_item_as_clicked(url):
    """Mark the article as visited by incrementing its popularity."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        cursor.execute(
            "UPDATE article SET popularity = popularity + 1 WHERE link = ?",
            (url, ))
        connection.commit()
예제 #4
0
def get_articles(keyword, page=0, limit=10, order_by=None, descending=True):
    """Get the items in the database and puts them into Article and Grouping objects."""
    order_by = "date" if order_by is None else order_by
    with database_utils.DatabaseConnection() as (connection, cursor):
        cursor.execute(
            "SELECT name, link, image_url, fit_x, fit_y, popularity, source, favicon "
            "FROM keyword JOIN article ON keyword.article_link = article.link "
            "WHERE keyword = ? OR ? GROUP BY article_link ORDER BY ? DESC;",
            (keyword, keyword is None, order_by))
        items = [item for item in cursor.fetchall()]
        num_items = len(items)
        if not descending:
            items.reverse()
        start = limit * page
        items = items[start:start + limit]
        return {
            "num":
            num_items,
            "articles": [{
                "name": item[0],
                "link": item[1],
                "image": item[2],
                "x": item[3],
                "y": item[4],
                "popularity": item[5],
                "source": item[6],
                "favicon": item[7]
            } for item in items]
        }
예제 #5
0
def get_top_keywords(num=constants.DEFAULT_NUM_KEYWORDS):
    """Get the top keywords used in the database."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        cursor.execute(
            "SELECT keyword, COUNT(1) AS c FROM keyword GROUP BY keyword ORDER BY c DESC LIMIT ?;",
            (num, ))
        return [item[0] for item in cursor.fetchall()]
예제 #6
0
def get_groups_with_unfit_articles():
    """Get the ids of the groups in the database that have articles that are not fit."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        cursor.execute(
            "SELECT topic_id FROM article WHERE group_fit_x IS NULL AND topic_id IS NOT NULL "
            "GROUP BY topic_id;")
        return [i[0] for i in cursor.fetchall()]
예제 #7
0
def get_number_articles_without_overall_fit():
    """Get the number of articles in the database without an overall fit."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        cursor.execute(
            "SELECT topic_id FROM article WHERE group_fit_x IS NULL AND topic_id IS NOT NULL;"
        )
        return len(cursor.fetchall())
예제 #8
0
def get_topics(category=None,
               page_number=0,
               articles_per_page=constants.ARTICLES_PER_PAGE):
    """Get the topics for the given page."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        start = page_number * articles_per_page
        end = (page_number + 1) * articles_per_page
        total_items = get_number_topics()
        if category is None:
            cursor.execute(
                "SELECT topic.name, topic.id, topic.image_url, topic.category, count(*) FROM article, topic "
                "WHERE article.topic_id = topic.id AND article.topic_id IS NOT NULL "
                "GROUP BY topic.id ORDER BY count(*) DESC;")
        else:
            cursor.execute(
                "SELECT topic.name, topic.id, topic.image_url, topic.category, count(*) FROM article, topic "
                "WHERE article.topic_id = topic.id AND topic.category = ? AND article.topic_id IS NOT NULL "
                "GROUP BY topic.id ORDER BY count(*) DESC;", (category, ))
        return sorted([{
            "total_items": total_items,
            "title": item[0],
            "id": item[1],
            "image": item[2],
            "category": item[3],
            "count": item[4]
        } for item in cursor.fetchall()[start:end]],
                      key=lambda x: -x["count"])
예제 #9
0
def get_urls():
    """Get all of the urls in articles in the database."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        cursor.execute("SELECT link FROM article;")
        urls = set(item[0] for item in cursor.fetchall())
        cursor.execute("SELECT link FROM bad_article;")
        return urls.union(item[0] for item in cursor.fetchall())
예제 #10
0
 def test_populate_keywords(self):
     """Test writing and retrieving the keywords for an article."""
     article = test_utils.SIMILAR_ARTICLES[0]
     database_writer.write_articles([article])
     with database_utils.DatabaseConnection() as (connection, cursor):
         self.assertEqual(
             article.get_keywords(),
             database_reader._get_article_keywords(article.get_url(),
                                                   cursor))
예제 #11
0
def _remove_group_ids_from_database(group_ids):
    """Remove the topics with the given ids from the database with the associated articles."""
    if isinstance(group_ids, (str, unicode)):
        group_ids = [group_ids]
    with database_utils.DatabaseConnection() as (connection, cursor):
        for group_id in group_ids:
            cursor.execute("""DELETE FROM topic WHERE id = ?""", (group_id, ))
            cursor.execute("""DELETE FROM article WHERE topic_id = ?""",
                           (group_id, ))
예제 #12
0
def get_ungrouped_articles():
    """Get the items in the database and puts them into Article and Grouping objects."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        cursor.execute("SELECT name, link, article_text FROM article "
                       "WHERE article_text != '' AND topic_id IS NULL;")
        articles = []
        for item in cursor.fetchall():
            name, url, article_text = item
            articles.append(
                models.Article(url=url, title=name, text=article_text))
        return articles
예제 #13
0
def update_topic_pictures():
    """Mark the article as visited by incrementing its popularity."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        cursor.execute("SELECT id FROM topic WHERE image_url IS NULL")
        for id in [item[0] for item in cursor.fetchall()]:
            cursor.execute(
                "SELECT image_url FROM article WHERE topic_id = ? AND image_url IS NOT NULL",
                (id, ))
            item = cursor.fetchone()
            if item:
                cursor.execute("UPDATE topic SET image_url = ? WHERE id = ?",
                               (item[0], id))
예제 #14
0
def get_number_topics(category=None):
    """Get just the number of topics from the database."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        if category is None:
            cursor.execute(
                "SELECT 1 FROM article, topic WHERE article.topic_id = topic.id AND "
                "article.topic_id IS NOT NULL GROUP BY topic.id ORDER BY count(*) DESC;"
            )
        else:
            cursor.execute(
                "SELECT 1 FROM article, topic WHERE article.topic_id = topic.id AND article.category = ? AND"
                " article.topic_id IS NOT NULL GROUP BY topic.id ORDER BY count(*) DESC;",
                (category, ))
        return len(cursor.fetchall())
예제 #15
0
def write_group_fits(grouping_list=None):
    """Write the group fits into the database."""
    if grouping_list is None:
        group_ids = [
            str(id) for id in database_reader.get_groups_with_unfit_articles()
        ]
        grouping_list = [
            group for group in database_reader.get_grouped_articles()
            if group.get_uuid() in group_ids
        ]
    with database_utils.DatabaseConnection() as (connection, cursor):
        for i, grouping in enumerate(grouping_list):
            _print_status("group fits", i, len(grouping_list))
            for article, fit in grouping.calculate_fit():
                cursor.execute(
                    "UPDATE article SET group_fit_x = ?, group_fit_y = ? WHERE link = ?",
                    (fit[0], fit[1], article.get_url()))
예제 #16
0
def write_overall_fits(grouping_list=None):
    """Write overall fits into the database."""
    grouping_list = database_reader.get_grouped_articles(
    ) if grouping_list is None else grouping_list
    with database_utils.DatabaseConnection() as (connection, cursor):
        articles = [
            article for grouping in grouping_list
            for article in grouping.get_articles()
        ]
        fits = models.calculate_fit(articles, max_iter=500)
        i = 1
        for article, fit in fits:
            _print_status("fits", i, len(fits))
            cursor.execute(
                "UPDATE article SET fit_x = ?, fit_y = ? WHERE link = ?",
                (fit[0], fit[1], article.get_url()))
            i += 1
예제 #17
0
def write_groups(grouping_list=None):
    """Write groups in the grouping list into the database if they are not already there."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        for grouping in grouping_list:
            if not grouping.in_database():
                cursor.execute(
                    "INSERT INTO topic (name, id, image_url, category) VALUES (?, ?, ?, ?)",
                    (grouping.get_title(), grouping.get_uuid(),
                     grouping.get_image_url(), grouping.get_category()))
                grouping.set_in_database(True)
            for article in grouping.get_new_articles():
                if not article.in_database():
                    _write_article(article, connection, cursor)
                cursor.execute(
                    "UPDATE article SET topic_id = ? WHERE link = ?",
                    (grouping.get_uuid(), article.get_url()))
            connection.commit()
예제 #18
0
def clean_database():
    """Remove articles from the database when they are old."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        # Remove all of the topics with no topic and
        cursor.execute(
            "DELETE FROM article WHERE article.topic_id IS NULL "
            "AND julianday(CURRENT_TIMESTAMP) - julianday(article.date) >= ?",
            (constants.ARTICLE_REPLACEMENT_TIME, ))

        # Remove all of the topics that only have articles that are over some number of days old
        cursor.execute(
            "SELECT id FROM topic WHERE NOT EXISTS(SELECT 1 FROM article WHERE topic.id = article.topic_id "
            "AND julianday(CURRENT_TIMESTAMP) - julianday(date) <= ?)",
            (constants.ARTICLE_REPLACEMENT_TIME, ))
        groups_to_remove = [item[0] for item in cursor.fetchall()]
        if groups_to_remove:
            print "Removing", len(groups_to_remove), "groups"
    _remove_group_ids_from_database(groups_to_remove)
예제 #19
0
def get_stories_for_topic(topic_id):
    """Get all of the stories for the topic with the given topic id. Returns empty dict if topic not in database."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        cursor.execute("SELECT name FROM topic WHERE id=?", (topic_id, ))
        title = cursor.fetchone()[0]
        cursor.execute(
            "SELECT name, link, image_url, fit_x, fit_y, popularity, source FROM article WHERE topic_id=?",
            (topic_id, ))
        return {
            "title":
            title,
            "articles": [{
                "name": item[0],
                "link": item[1],
                "image": item[2],
                "x": item[3],
                "y": item[4],
                "popularity": item[5],
                "source": item[6]
            } for item in cursor.fetchall()]
        }
예제 #20
0
def get_grouped_articles():
    """Get the items in the database and puts them into Article and Grouping objects."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        cursor.execute(
            "SELECT name, topic_id, link, article_text, image_url FROM article "
            "WHERE article_text != '' AND topic_id IS NOT NULL;")
        groups = {}
        for item in cursor.fetchall():
            name, id, url, article_text, image_url = item
            article = models.Article(url=url,
                                     title=name,
                                     text=article_text,
                                     urlToImage=image_url,
                                     in_database=True)
            article.set_keywords(_get_article_keywords(url, cursor))
            if id in groups:
                groups.get(id).add_article(article, new_article=False)
            else:
                groups[id] = models.Grouping(article,
                                             uuid=id,
                                             in_database=True,
                                             has_new_articles=False)
        return list(groups.values())
예제 #21
0
def get_sources():
    """Get all of the stories for the topic with the given topic id. Returns empty dict if topic not in database."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        cursor.execute("SELECT source, count(1) FROM article GROUP BY source")
        return cursor.fetchall()
예제 #22
0
def write_articles(article_list):
    """Write articles in the article list into the database."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        for i, article in enumerate(article_list):
            _print_status("articles", i, len(article_list))
            _write_article(article, cursor)
예제 #23
0
def write_articles(article_list):
    """Write articles in the article list into the database."""
    with database_utils.DatabaseConnection() as (connection, cursor):
        for i, article in enumerate(article_list):
            print "adding article", i, "out of", len(article_list)
            _write_article(article, connection, cursor)