def get_stories_for_topic(topic_id): """Get all of the stories for the topic with the given topic id. Returns empty dict if topic not in database.""" with database_utils.DatabaseConnection() as (connection, cursor): cursor.execute("SELECT name FROM topic WHERE id=?", (topic_id, )) db_item = cursor.fetchone() if db_item is not None: title = db_item[0] cursor.execute( "SELECT name, link, image_url, group_fit_x, group_fit_y, popularity, source, favicon " "FROM article WHERE topic_id=?", (topic_id, )) items = cursor.fetchall() else: title, items = None, [] return { "title": title, "articles": [{ "name": item[0], "link": item[1], "image": item[2], "x": item[3], "y": item[4], "popularity": item[5], "source": item[6], "favicon": item[7] } for item in items] }
def remove_grouping_from_database(grouping): """Remove the given grouping from the database with its associated articles.""" with database_utils.DatabaseConnection() as (connection, cursor): _remove_group_ids_from_database(grouping.get_uuid()) grouping.set_in_database(False) for article in grouping.get_articles(): article.set_in_database(False)
def mark_item_as_clicked(url): """Mark the article as visited by incrementing its popularity.""" with database_utils.DatabaseConnection() as (connection, cursor): cursor.execute( "UPDATE article SET popularity = popularity + 1 WHERE link = ?", (url, )) connection.commit()
def get_articles(keyword, page=0, limit=10, order_by=None, descending=True): """Get the items in the database and puts them into Article and Grouping objects.""" order_by = "date" if order_by is None else order_by with database_utils.DatabaseConnection() as (connection, cursor): cursor.execute( "SELECT name, link, image_url, fit_x, fit_y, popularity, source, favicon " "FROM keyword JOIN article ON keyword.article_link = article.link " "WHERE keyword = ? OR ? GROUP BY article_link ORDER BY ? DESC;", (keyword, keyword is None, order_by)) items = [item for item in cursor.fetchall()] num_items = len(items) if not descending: items.reverse() start = limit * page items = items[start:start + limit] return { "num": num_items, "articles": [{ "name": item[0], "link": item[1], "image": item[2], "x": item[3], "y": item[4], "popularity": item[5], "source": item[6], "favicon": item[7] } for item in items] }
def get_top_keywords(num=constants.DEFAULT_NUM_KEYWORDS): """Get the top keywords used in the database.""" with database_utils.DatabaseConnection() as (connection, cursor): cursor.execute( "SELECT keyword, COUNT(1) AS c FROM keyword GROUP BY keyword ORDER BY c DESC LIMIT ?;", (num, )) return [item[0] for item in cursor.fetchall()]
def get_groups_with_unfit_articles(): """Get the ids of the groups in the database that have articles that are not fit.""" with database_utils.DatabaseConnection() as (connection, cursor): cursor.execute( "SELECT topic_id FROM article WHERE group_fit_x IS NULL AND topic_id IS NOT NULL " "GROUP BY topic_id;") return [i[0] for i in cursor.fetchall()]
def get_number_articles_without_overall_fit(): """Get the number of articles in the database without an overall fit.""" with database_utils.DatabaseConnection() as (connection, cursor): cursor.execute( "SELECT topic_id FROM article WHERE group_fit_x IS NULL AND topic_id IS NOT NULL;" ) return len(cursor.fetchall())
def get_topics(category=None, page_number=0, articles_per_page=constants.ARTICLES_PER_PAGE): """Get the topics for the given page.""" with database_utils.DatabaseConnection() as (connection, cursor): start = page_number * articles_per_page end = (page_number + 1) * articles_per_page total_items = get_number_topics() if category is None: cursor.execute( "SELECT topic.name, topic.id, topic.image_url, topic.category, count(*) FROM article, topic " "WHERE article.topic_id = topic.id AND article.topic_id IS NOT NULL " "GROUP BY topic.id ORDER BY count(*) DESC;") else: cursor.execute( "SELECT topic.name, topic.id, topic.image_url, topic.category, count(*) FROM article, topic " "WHERE article.topic_id = topic.id AND topic.category = ? AND article.topic_id IS NOT NULL " "GROUP BY topic.id ORDER BY count(*) DESC;", (category, )) return sorted([{ "total_items": total_items, "title": item[0], "id": item[1], "image": item[2], "category": item[3], "count": item[4] } for item in cursor.fetchall()[start:end]], key=lambda x: -x["count"])
def get_urls(): """Get all of the urls in articles in the database.""" with database_utils.DatabaseConnection() as (connection, cursor): cursor.execute("SELECT link FROM article;") urls = set(item[0] for item in cursor.fetchall()) cursor.execute("SELECT link FROM bad_article;") return urls.union(item[0] for item in cursor.fetchall())
def test_populate_keywords(self): """Test writing and retrieving the keywords for an article.""" article = test_utils.SIMILAR_ARTICLES[0] database_writer.write_articles([article]) with database_utils.DatabaseConnection() as (connection, cursor): self.assertEqual( article.get_keywords(), database_reader._get_article_keywords(article.get_url(), cursor))
def _remove_group_ids_from_database(group_ids): """Remove the topics with the given ids from the database with the associated articles.""" if isinstance(group_ids, (str, unicode)): group_ids = [group_ids] with database_utils.DatabaseConnection() as (connection, cursor): for group_id in group_ids: cursor.execute("""DELETE FROM topic WHERE id = ?""", (group_id, )) cursor.execute("""DELETE FROM article WHERE topic_id = ?""", (group_id, ))
def get_ungrouped_articles(): """Get the items in the database and puts them into Article and Grouping objects.""" with database_utils.DatabaseConnection() as (connection, cursor): cursor.execute("SELECT name, link, article_text FROM article " "WHERE article_text != '' AND topic_id IS NULL;") articles = [] for item in cursor.fetchall(): name, url, article_text = item articles.append( models.Article(url=url, title=name, text=article_text)) return articles
def update_topic_pictures(): """Mark the article as visited by incrementing its popularity.""" with database_utils.DatabaseConnection() as (connection, cursor): cursor.execute("SELECT id FROM topic WHERE image_url IS NULL") for id in [item[0] for item in cursor.fetchall()]: cursor.execute( "SELECT image_url FROM article WHERE topic_id = ? AND image_url IS NOT NULL", (id, )) item = cursor.fetchone() if item: cursor.execute("UPDATE topic SET image_url = ? WHERE id = ?", (item[0], id))
def get_number_topics(category=None): """Get just the number of topics from the database.""" with database_utils.DatabaseConnection() as (connection, cursor): if category is None: cursor.execute( "SELECT 1 FROM article, topic WHERE article.topic_id = topic.id AND " "article.topic_id IS NOT NULL GROUP BY topic.id ORDER BY count(*) DESC;" ) else: cursor.execute( "SELECT 1 FROM article, topic WHERE article.topic_id = topic.id AND article.category = ? AND" " article.topic_id IS NOT NULL GROUP BY topic.id ORDER BY count(*) DESC;", (category, )) return len(cursor.fetchall())
def write_group_fits(grouping_list=None): """Write the group fits into the database.""" if grouping_list is None: group_ids = [ str(id) for id in database_reader.get_groups_with_unfit_articles() ] grouping_list = [ group for group in database_reader.get_grouped_articles() if group.get_uuid() in group_ids ] with database_utils.DatabaseConnection() as (connection, cursor): for i, grouping in enumerate(grouping_list): _print_status("group fits", i, len(grouping_list)) for article, fit in grouping.calculate_fit(): cursor.execute( "UPDATE article SET group_fit_x = ?, group_fit_y = ? WHERE link = ?", (fit[0], fit[1], article.get_url()))
def write_overall_fits(grouping_list=None): """Write overall fits into the database.""" grouping_list = database_reader.get_grouped_articles( ) if grouping_list is None else grouping_list with database_utils.DatabaseConnection() as (connection, cursor): articles = [ article for grouping in grouping_list for article in grouping.get_articles() ] fits = models.calculate_fit(articles, max_iter=500) i = 1 for article, fit in fits: _print_status("fits", i, len(fits)) cursor.execute( "UPDATE article SET fit_x = ?, fit_y = ? WHERE link = ?", (fit[0], fit[1], article.get_url())) i += 1
def write_groups(grouping_list=None): """Write groups in the grouping list into the database if they are not already there.""" with database_utils.DatabaseConnection() as (connection, cursor): for grouping in grouping_list: if not grouping.in_database(): cursor.execute( "INSERT INTO topic (name, id, image_url, category) VALUES (?, ?, ?, ?)", (grouping.get_title(), grouping.get_uuid(), grouping.get_image_url(), grouping.get_category())) grouping.set_in_database(True) for article in grouping.get_new_articles(): if not article.in_database(): _write_article(article, connection, cursor) cursor.execute( "UPDATE article SET topic_id = ? WHERE link = ?", (grouping.get_uuid(), article.get_url())) connection.commit()
def clean_database(): """Remove articles from the database when they are old.""" with database_utils.DatabaseConnection() as (connection, cursor): # Remove all of the topics with no topic and cursor.execute( "DELETE FROM article WHERE article.topic_id IS NULL " "AND julianday(CURRENT_TIMESTAMP) - julianday(article.date) >= ?", (constants.ARTICLE_REPLACEMENT_TIME, )) # Remove all of the topics that only have articles that are over some number of days old cursor.execute( "SELECT id FROM topic WHERE NOT EXISTS(SELECT 1 FROM article WHERE topic.id = article.topic_id " "AND julianday(CURRENT_TIMESTAMP) - julianday(date) <= ?)", (constants.ARTICLE_REPLACEMENT_TIME, )) groups_to_remove = [item[0] for item in cursor.fetchall()] if groups_to_remove: print "Removing", len(groups_to_remove), "groups" _remove_group_ids_from_database(groups_to_remove)
def get_stories_for_topic(topic_id): """Get all of the stories for the topic with the given topic id. Returns empty dict if topic not in database.""" with database_utils.DatabaseConnection() as (connection, cursor): cursor.execute("SELECT name FROM topic WHERE id=?", (topic_id, )) title = cursor.fetchone()[0] cursor.execute( "SELECT name, link, image_url, fit_x, fit_y, popularity, source FROM article WHERE topic_id=?", (topic_id, )) return { "title": title, "articles": [{ "name": item[0], "link": item[1], "image": item[2], "x": item[3], "y": item[4], "popularity": item[5], "source": item[6] } for item in cursor.fetchall()] }
def get_grouped_articles(): """Get the items in the database and puts them into Article and Grouping objects.""" with database_utils.DatabaseConnection() as (connection, cursor): cursor.execute( "SELECT name, topic_id, link, article_text, image_url FROM article " "WHERE article_text != '' AND topic_id IS NOT NULL;") groups = {} for item in cursor.fetchall(): name, id, url, article_text, image_url = item article = models.Article(url=url, title=name, text=article_text, urlToImage=image_url, in_database=True) article.set_keywords(_get_article_keywords(url, cursor)) if id in groups: groups.get(id).add_article(article, new_article=False) else: groups[id] = models.Grouping(article, uuid=id, in_database=True, has_new_articles=False) return list(groups.values())
def get_sources(): """Get all of the stories for the topic with the given topic id. Returns empty dict if topic not in database.""" with database_utils.DatabaseConnection() as (connection, cursor): cursor.execute("SELECT source, count(1) FROM article GROUP BY source") return cursor.fetchall()
def write_articles(article_list): """Write articles in the article list into the database.""" with database_utils.DatabaseConnection() as (connection, cursor): for i, article in enumerate(article_list): _print_status("articles", i, len(article_list)) _write_article(article, cursor)
def write_articles(article_list): """Write articles in the article list into the database.""" with database_utils.DatabaseConnection() as (connection, cursor): for i, article in enumerate(article_list): print "adding article", i, "out of", len(article_list) _write_article(article, connection, cursor)