def insert_altmetric_score(sqlite3_file, doi, altmetrics_data): """ Try to insert altmetric score into biorxiv_altmetrics_log :param sqlite3_file: sqlite3 database file :return: boolean """ try: with sqlite3.connect(sqlite3_file) as conn: c = conn.cursor() # insert altmetric score into biorxiv_altmetrics_log sql = """UPDATE biorxiv_altmetrics_log SET altmetric_score = ?, altmetric_pct = ?, altmetric_flg = ? WHERE doi = ?""" c.execute( sql, tuple([ altmetrics_data.altmetric_score, altmetrics_data.pct, altmetrics_data.flg, doi ])) conn.commit() return True except sqlite3.Error as e: logger(__name__).error(e) return False
def check_RSS(url): """ Check the RSS feed of PubMed. - Nature biotechnology - Nature methods - Nature genetics - Molecular cell - eLife - PLoS biology - Genome research - Genes & development - Nature cell biology :param subjects: subject categories :return: RSS data list """ # Get & Parse RSS feed = feedparser.parse(url) rss_data_list = [] # RSS data list object if feed.bozo == 1: logger(__name__).error(feed.bozo_exception) logger(__name__).error("Failed to reach the feed.") else: for pub in feed["items"]: link = pub["link"].split('?')[0] doi = getDOI(link) rss_data_list.append( RSS_data(doi=doi, title=pub["title"], url="https://doi.org/{0}".format(doi), date=datetime.now().strftime("%Y-%m-%d"))) sleep(0.5) return rss_data_list
def select_target_doi(sqlite3_file): """ try to select target doi from biorxiv_altmetrics_log. :param sqlite3_file: sqlite3 database file :return: target doi list """ try: with sqlite3.connect(sqlite3_file) as conn: c = conn.cursor() # Select target doi from biorxiv_altmetrics_log sql = """SELECT doi, title, link, update_date from biorxiv_altmetrics_log WHERE altmetric_flg = 0""" c.execute(sql) # Store doi data as target_doi_data object target_doi_list = [] for doi_info in c.fetchall(): target_doi_list.append( target_doi_data(doi=doi_info[0], title=doi_info[1], url=doi_info[2], date=doi_info[3])) return target_doi_list except sqlite3.Error as e: logger(__name__).error(e) return []
def create_tables(sqlite3_file): """ Try to create new tables if not exists. :param sqlite3_file: sqlite3 database file :return: Boolean """ try: with sqlite3.connect(sqlite3_file) as conn: c = conn.cursor() # Create biorxiv_altmetrics_log table sql = """CREATE TABLE IF NOT EXISTS biorxiv_altmetrics_log (doi TEXT, title TEXT, link TEXT, update_date TEXT, altmetric_score INTEGER, altmetric_pct INTEGER, altmetric_flg INTEGER, PRIMARY KEY(doi) )""" c.execute(sql) conn.commit() return True except sqlite3.Error as e: logger(__name__).error(e) return False
def insert_new_doi(sqlite3_file, RSS_data_list): """ Try to insert new doi into sqlite3 database. :param sqlite3_file: sqlite3 database file :param RSS_data_list: RSS data list :return: boolean """ try: with sqlite3.connect(sqlite3_file) as conn: c = conn.cursor() # Insert article info into biorxiv_altmetrics_log if not already exists sql = """INSERT OR IGNORE INTO biorxiv_altmetrics_log VALUES(?,?,?,?,?,?,?)""" doi_info = [ tuple([p.doi, p.title, p.url, p.date, 0, 0, 0]) for p in RSS_data_list ] c.executemany(sql, doi_info) conn.commit() return True except sqlite3.Error as e: logger(__name__).error(e) return False
def send_slack_message(slack_token, channel, message): """ Simple wrapper for sending a Slack message. """ sc = SlackClient(slack_token) response = sc.api_call("chat.postMessage", channel=channel, text=message) # Check to see if the message sent successfully if response["ok"]: logger(__name__).info("Message posted successfully: " + response["message"]["ts"]) # If the message failed, check for rate limit headers in the response elif response["ok"] is False and response["headers"]["Retry-After"]: delay = int(response["headers"]["Retry-After"]) logger(__name__).warning("Rate limited. Retrying in " + str(delay) + " seconds") sleep(delay) response = sc.api_call("chat.postMessage", channel=channel, text=message)
def check_RSS(subjects): """ Check the RSS feed of BioRxiv :param subjects: subject categories :return: RSS data list """ # Get & Parse RSS feed = feedparser.parse( "http://connect.biorxiv.org/biorxiv_xml.php?subject={0}".format( "+".join(subjects))) rss_data_list = [] # RSS data list object if feed.bozo == 1: logger(__name__).error(feed.bozo_exception) logger(__name__).error("Failed to reach the feed.") else: for pub in feed["items"]: rss_data_list.append( RSS_data(doi=pub["dc_identifier"], title=pub["title"], url=pub["link"].split('?')[0], date=pub["updated"])) return rss_data_list
def main(): # Get setting file setting_dict = get_argument() logger(__name__).info(setting_dict) # Parse RSS feed logger(__name__).info("Start Parsing RSS feed...") RSS_data_list = [] for link in setting_dict['pubmed_rss_link'].values(): RSS_data_list.extend(check_RSS(link)) # Create sqlite3 database if not exists sqlite3_file = "../db/storeAltmetrics4PubMed.sqlite3" if not access_sqlite3.create_tables(sqlite3_file): return # Insert new target articles into sqlite3 db logger(__name__).info("Insert new target articles into sqlite3 db.") if not access_sqlite3.insert_new_doi(sqlite3_file, RSS_data_list): return # Get all target articles for checking altmetrics score logger(__name__).info( "Get all target articles for checking altmetrics score.") target_doi_list = access_sqlite3.select_target_doi(sqlite3_file) # Get altmetric score for each article for doi_info in target_doi_list: logger(__name__).info("Get altmetric score for " + doi_info.doi) altmetrics_data = check_altmetrics(doi_info) if altmetrics_data == None: continue # Insert scores into sqlite3 db logger(__name__).info("Insert scores into sqlite3 db.") access_sqlite3.insert_altmetric_score(sqlite3_file, doi_info.doi, altmetrics_data) # Send a message to SNS if altmetrics_data.flg == 1: try: message = """{0}\n{1}\n""".format(doi_info.title, doi_info.url) send_slack_message(setting_dict['slack_token'], setting_dict['slack_channel'], message) # Tweet message send_twitter_message( setting_dict['twitter_consumer_key_pubmed'], setting_dict['twitter_consumer_secret_pubmed'], setting_dict['twitter_access_token_pubmed'], setting_dict['twitter_access_token_secret_pubmed'], message) except: logger(__name__).error("Fail to send a message to SNS " + doi_info.doi) logger(__name__).info("Successfully finished.")
def check_altmetrics(doi_info): try: # Get altmetric score sleep(1) # Escaping hammer altmetric server doi = doi_info.doi altmetric_api = altmetric_utils.Altmetric() response = altmetric_api.doi(doi) if response: logger(__name__).info("Get altmetrics score for " + doi) # Check altmetric score (pct: >=90) try: if response["context"]['journal']['pct'] >= 90: flg = 1 else: flg = 0 pct = response["context"]['journal']['pct'] except KeyError: flg = 0 pct = 0 logger(__name__).error("Fail to getting PCT for " + doi) # Check elasped date date = str(doi_info.date).split("-") updated_date = datetime(int(date[0]), int(date[1]), int(date[2])) elasped_date = (datetime.now() - updated_date).days if elasped_date > 30: flg = -1 try: altmetric_score = response["score"] except: altmetric_score = 0 logger(__name__).error( "Fail to getting altmetrics score for " + doi) return altmetrics_data(altmetric_score=altmetric_score, pct=pct, flg=flg) else: logger(__name__).error("Fail to getting altmetrics score for " + doi) return altmetrics_data(altmetric_score=0, pct=0, flg=0) except altmetric_utils.AltmetricHTTPException as e: if e.status_code == 403: logger(__name__).error("You aren't authorized for this call.") logger(__name__).error(e.msg) elif e.status_code == 420: logger(__name__).error("You are being rate limited.") logger(__name__).error(e.msg) elif e.status_code == 502: logger(__name__).error( "The API version you are using is currently down for maintenance." ) logger(__name__).error(e.msg) elif e.status_code == 404: logger(__name__).error( "Altmetric doesn't have any details for the article or set of articles you requested." ) logger(__name__).error(e.msg) return None