def insert_into_db(metaData): """Inserts the supplied articles into the database. Duplicate articles are ignored.""" print('Trying to insert %d elements into the database.' % len(metaData)) conn = database.get_connection() cur = conn.cursor() try: insert_categories(metaData, cur) article_category_sql = 'insert into article_categories values(%s,%s)' for i, (article_id, article) in enumerate(metaData.items()): insert_article(cur, article_id, article) if cur.rowcount == 0: # Ignore article already in database continue for category in article['categories']: cur.execute(article_category_sql, (article_id, category)) for author in article['authors']: insert_author(cur, article_id, author['firstname'], author['lastname'], author['affiliations']) conn.commit() print('\rInserted {} elements.'.format(i), end='') print('\nSuccessfully inserted the elements.') finally: cur.close() conn.close()
def get_user_suggested_topics(user_id): """Gets suggested topics per system for one user. Returns {system_id:[topic_id, topic_id, topic_id, ... ], ... } with score decending""" connection = database.get_connection() with closing(connection.cursor(dictionary=True)) as cur: sql = '''select topic_recommendations.topic_id, topic_recommendations.system_id from topic_recommendations inner join topics on topics.topic_id = topic_recommendations.topic_id left join user_topics on user_topics.topic_id = topic_recommendations.topic_id and user_topics.user_id = topic_recommendations.user_id where user_topics.state is NULL and topic_recommendations.user_id = %s and topic_recommendations.interleaving_batch is NULL order by topic_recommendations.system_score DESC''' cur.execute(sql, (user_id, )) rankings = defaultdict(list) for row in cur.fetchall(): topic_id = row['topic_id'] system_id = row['system_id'] rankings[system_id].append(topic_id) return rankings
def get_users_for_suggestion_generation(limit, offset, all_users=False): """Gets the users that have not provided Semantic Scholar profile links and have not previously accepted/declined any suggestions. :param limit: Number of users to retrieve. :param offset: An offset to the first user returned. :param all_users: Return all users, regardless of whether they have provided Semantic Scholar profile links and/or have previously accepted/declined any suggestions. :return: A dictionary {user_id: {firstname, lastname}}. """ with closing(database.get_connection().cursor(dictionary=True)) as cur: sql = '''SELECT u.user_id, u.firstname, u.lastname FROM users u LEFT JOIN semantic_scholar_suggestion_log log ON u.user_id=log.user_id WHERE %s OR (semantic_scholar_profile = "" AND log.user_id IS NULL) ORDER BY user_id LIMIT %s OFFSET %s''' cur.execute(sql, (all_users, limit, offset)) users = {u['user_id']: u for u in cur.fetchall()} for user_id, user_data in users.items(): sql = '''SELECT t.topic FROM user_topics ut NATURAL JOIN topics t WHERE user_id = %s AND NOT t.filtered and ut.state in ('USER_ADDED','SYSTEM_RECOMMENDED_ACCEPTED')''' cur.execute(sql, (user_id, )) user_data['topics'] = cur.fetchall() return users
def get_number_of_users_for_suggestion_generation(): """Gets the number of users that have not provided links to their Semantic Scholar profiles and have not previously accepted/declined any suggestions.""" with closing(database.get_connection().cursor()) as cur: sql = '''SELECT count(*) FROM users u LEFT JOIN semantic_scholar_suggestion_log log ON u.user_id=log.user_id WHERE semantic_scholar_profile = "" AND log.user_id IS NULL''' cur.execute(sql) return cur.fetchone()[0]
def insert_topic_suggestions(suggested_topics): """Inserts the score for the interleaved topic suggestions into the database along with the datetime they were interleaved""" connection = database.get_connection() with closing(connection.cursor(dictionary=True)) as cur: sql = '''update topic_recommendations set interleaving_order = %s, interleaving_batch = %s where user_id = %s and topic_id = %s and system_id = %s''' cur.executemany(sql, suggested_topics) connection.commit()
def assign_unsubscribe_trace(user_id): """Gives a user an unsubscribe trace if they dont have one.""" trace = str(uuid4()) connection = database.get_connection() with closing(connection.cursor(dictionary=True)) as cur: sql = '''update users set unsubscribe_trace = %s where user_id = %s''' cur.execute(sql, (trace, user_id)) connection.commit() return trace
def get_article_data(): """Returns a dictionary of article_ids: title and authors.""" with closing(database.get_connection().cursor()) as cur: sql = '''SELECT article_id,title, GROUP_CONCAT(concat(firstname,' ',lastname) SEPARATOR ', ') FROM articles NATURAL LEFT JOIN article_authors WHERE datestamp >=DATE_SUB(UTC_DATE(),INTERVAL 8 DAY) GROUP BY article_id''' cur.execute(sql) return {x[0]: {'title': x[1], 'authors': x[2]} for x in cur.fetchall()}
def get_users(limit, offset): """Fetches users in batches. :param limit: Number of users to retrieve. :param offset: An offset to the first user returned. :return: A dictionary of user_ids: {email, name, notification_interval}. """ with closing(database.get_connection().cursor(dictionary=True)) as cur: sql = '''SELECT u.user_id, u.email, u.firstname as name, u.notification_interval, unsubscribe_trace FROM users u ORDER BY user_id LIMIT %s OFFSET %s''' cur.execute(sql, (limit, offset)) return {u.pop('user_id'): u for u in cur.fetchall()}
def insert_article_recommendations(recommendations): """Inserts the recommended articles into the article feedback table.""" connection = database.get_connection() with closing(connection.cursor(dictionary=True)) as cur: sql = '''INSERT INTO article_feedback (user_id, article_id, system_id, explanation, score, recommendation_date) VALUES(%s, %s, %s, %s, %s, %s)''' cur.executemany(sql, recommendations) users = list({x[0] for x in recommendations}) sql = '''UPDATE users SET last_recommendation_date=UTC_DATE() WHERE user_id in ({})'''.format(','.join(['%s'] * len(users))) cur.execute(sql, users) connection.commit()
def insert_mail_trackers(article_traces): """Inserts mail trackers into the article feedback table.""" connection = database.get_connection() with closing(connection.cursor(dictionary=True)) as cur: sql = '''UPDATE article_feedback af, users u SET af.seen_email = CURRENT_TIMESTAMP, af.trace_click_email = %(click_trace)s, af.trace_save_email= %(save_trace)s, u.last_email_date=UTC_DATE() WHERE af.user_id=%(user_id)s AND af.article_id=%(article_id)s AND u.user_id = %(user_id)s''' cur.executemany(sql, article_traces) connection.commit()
def update_semantic_scholar_suggestions(suggestions, timestamp: datetime): """Deletes existing Semantic Scholar profile suggestions and inserts new ones for a set of users.""" conn = database.get_connection() with closing(conn.cursor()) as cur: for user_id, user_suggestions in suggestions.items(): cur.execute( 'DELETE FROM semantic_scholar_suggestions WHERE user_id = %s', (user_id, )) for author_id, suggestion in user_suggestions.items(): sql = '''INSERT INTO semantic_scholar_suggestions (semantic_scholar_id, name, score, user_id, created) VALUES (%s, %s, %s, %s, %s)''' cur.execute(sql, (author_id, suggestion["name"], suggestion["score"], user_id, timestamp)) sql = 'UPDATE users SET show_semantic_scholar_popup=true WHERE user_id=%s' cur.execute(sql, (user_id, )) conn.commit()
def get_article_recommendations(limit, offset): """Fetches article recommendation for articles released the past week. :param limit: Number of users to retrieve recommendations for. :param offset: An offset to the first user returned. :return: Article recommendations (rankings) in the following format sorted by score descending: {user_id: {system_id:[article_id, article_id, article_id, ... ], ... }, ... } And explanations in the format: {user_id: {system_id: {article_id: explanation, ...}, ...}, ...} """ with closing(database.get_connection().cursor(dictionary=True)) as cur: sql = '''SELECT ar.user_id, ar.system_id, ar.article_id, ar.explanation FROM article_recommendations ar Left JOIN article_feedback af on ar.article_id = af.article_id AND ar.user_id = af.user_id JOIN users u on ar.user_id = u.user_id JOIN articles a on a.article_id = ar.article_id RIGHT JOIN (SELECT user_id FROM users ORDER BY user_id LIMIT %s OFFSET %s) limit_u on limit_u.user_id = ar.user_id WHERE af.score is null AND a.datestamp > DATE_SUB(UTC_DATE(), INTERVAL 7 DAY) AND u.last_recommendation_date < UTC_DATE() ORDER BY ar.score DESC;''' cur.execute(sql, (limit, offset)) rankings = defaultdict(lambda: defaultdict(list)) explanations = defaultdict(lambda: defaultdict(dict)) for row in cur.fetchall(): user_id = row['user_id'] article_id = row['article_id'] system_id = row['system_id'] explanation = row['explanation'] rankings[user_id][system_id].append(article_id) explanations[user_id][system_id][article_id] = explanation return rankings, explanations
def get_multileaved_articles(limit, offset): """Fetches multileaved article recommendations. :param limit: Number of users to retrieve recommendations for. :param offset: An offset to the first user returned. :return: Nested dict in format user_id: date: article_id: recommendation """ with closing(database.get_connection().cursor(dictionary=True)) as cur: sql = '''SELECT user_id, DATE(recommendation_date) as date, article_id, score, explanation FROM article_feedback NATURAL JOIN users NATURAL RIGHT JOIN (SELECT user_id FROM users ORDER BY user_id LIMIT %s OFFSET %s) limit_u WHERE DATE(recommendation_date) > DATE_SUB(UTC_DATE(), INTERVAL 7 DAY) AND last_email_date < UTC_DATE()''' cur.execute(sql, (limit, offset)) result = defaultdict(lambda: defaultdict(dict)) for r in cur.fetchall(): result[r.pop('user_id')][r.pop('date')][r.pop('article_id')] = r return result
def get_number_of_users(): """This method returns the number of users in the database.""" with closing(database.get_connection().cursor()) as cur: cur.execute('SELECT count(*) FROM users') return cur.fetchone()[0]