def stats(): """Hidden URL for statistics I've intentionally not publicised this feature because it makes lots of HTTP requests to the Sanger website and I probably only want me / administrators to use it on an infrequent basis""" all_pubmed_ids = set(publications.keys()) confirmed_pubmed_ids = set(publications.confirmed().keys()) denied_pubmed_ids = set(publications.denied().keys()) to_be_confirmed_ids = all_pubmed_ids.difference(confirmed_pubmed_ids, denied_pubmed_ids) library_pubmed_ids = set(LibrarySearcher.get_pubmed_ids()) confirmed_in_library_ids = library_pubmed_ids.intersection(confirmed_pubmed_ids) to_be_confirmed_in_library_ids = library_pubmed_ids.intersection(to_be_confirmed_ids) not_in_libary_ids = all_pubmed_ids.difference(library_pubmed_ids) confirmed_not_in_library = not_in_libary_ids.intersection(confirmed_pubmed_ids) to_be_confirmed_not_in_library = not_in_libary_ids.intersection(to_be_confirmed_ids) statistics = { 'tracked: total': len(all_pubmed_ids), 'tracked: confirmed': len(confirmed_pubmed_ids), 'tracked: possible': len(to_be_confirmed_ids), 'tracked: denied': len(denied_pubmed_ids), 'library: total': len(library_pubmed_ids), 'library: confirmed': len(confirmed_in_library_ids), 'library: possible': len(to_be_confirmed_in_library_ids), 'other: total': len(confirmed_not_in_library) + len(to_be_confirmed_not_in_library), 'other: confirmed': len(confirmed_not_in_library), 'other: possible': len(to_be_confirmed_not_in_library) } return jsonify(**statistics)
def stats(): """Hidden URL for statistics I've intentionally not publicised this feature because it makes lots of HTTP requests to the Sanger website and I probably only want me / administrators to use it on an infrequent basis""" all_pubmed_ids = set(publications.keys()) confirmed_pubmed_ids = set(publications.confirmed().keys()) denied_pubmed_ids = set(publications.denied().keys()) to_be_confirmed_ids = all_pubmed_ids.difference(confirmed_pubmed_ids, denied_pubmed_ids) library_pubmed_ids = set(LibrarySearcher.get_pubmed_ids()) confirmed_in_library_ids = library_pubmed_ids.intersection( confirmed_pubmed_ids) to_be_confirmed_in_library_ids = library_pubmed_ids.intersection( to_be_confirmed_ids) not_in_libary_ids = all_pubmed_ids.difference(library_pubmed_ids) confirmed_not_in_library = not_in_libary_ids.intersection( confirmed_pubmed_ids) to_be_confirmed_not_in_library = not_in_libary_ids.intersection( to_be_confirmed_ids) statistics = { 'tracked: total': len(all_pubmed_ids), 'tracked: confirmed': len(confirmed_pubmed_ids), 'tracked: possible': len(to_be_confirmed_ids), 'tracked: denied': len(denied_pubmed_ids), 'library: total': len(library_pubmed_ids), 'library: confirmed': len(confirmed_in_library_ids), 'library: possible': len(to_be_confirmed_in_library_ids), 'other: total': len(confirmed_not_in_library) + len(to_be_confirmed_not_in_library), 'other: confirmed': len(confirmed_not_in_library), 'other: possible': len(to_be_confirmed_not_in_library) } return jsonify(**statistics)
def missing(): """Hidden URL for missing publications I've intentionally not publicised this feature because it makes lots of HTTP requests to the Sanger website and I probably only want me / administrators to use it on an infrequent basis. This endpoint lists all of the publicatons which are currently not in the Sanger Library's dataset""" library_pubmed_ids = set(LibrarySearcher.get_pubmed_ids()) possible_pubmed_ids = set(publications.not_denied().keys()) missing_pubmed_ids = possible_pubmed_ids.difference(library_pubmed_ids) missing_publications = Publications([publications[pubmed_id] for pubmed_id in missing_pubmed_ids]) return render_template('affiliated.html', publications=missing_publications, users=users, user_title="Publications not in the Sanger Library")
def missing(): """Hidden URL for missing publications I've intentionally not publicised this feature because it makes lots of HTTP requests to the Sanger website and I probably only want me / administrators to use it on an infrequent basis. This endpoint lists all of the publicatons which are currently not in the Sanger Library's dataset""" library_pubmed_ids = set(LibrarySearcher.get_pubmed_ids()) possible_pubmed_ids = set(publications.not_denied().keys()) missing_pubmed_ids = possible_pubmed_ids.difference(library_pubmed_ids) missing_publications = Publications( [publications[pubmed_id] for pubmed_id in missing_pubmed_ids]) return render_template('affiliated.html', publications=missing_publications, users=users, user_title="Publications not in the Sanger Library")
with open(options.usersfile, "r") as usersfile: users = User.from_yaml(usersfile.read()) users_count = len(users) logging.info("Loaded %s users from %s" % (users_count, options.usersfile)) pubmed_ids = set() logging.info("Searching pubmed for publications") for user in users.values(): new_pubmed_ids = Searcher.get_pubmed_ids_for_user(user, options.start, options.end) pubmed_ids.update(new_pubmed_ids) pubmed_id_count = len(pubmed_ids) logging.info("Found %s citations by searching pubmed" % pubmed_id_count) logging.info("Getting pubmed_ids from the Library") new_pubmed_ids = LibrarySearcher.get_pubmed_ids(options.start, options.end) pubmed_ids.update(new_pubmed_ids) logging.info("Found additional %s publications in Library" % (len(pubmed_ids) - pubmed_id_count)) new_publications = Publications.from_pubmed_ids(list(pubmed_ids)) publications = Publications.merge(publications, new_publications) for publication in publications.values(): publication.update_authors(users) if options.outputfile != "": with open(options.outputfile, "wb") as output: affiliated_publications = publications.not_denied() affiliated_publications.to_csv(output) logging.info("%s citations with at least one user matching the input queries have been printed to %s" % ( len(affiliated_publications), options.outputfile))
with open(options.usersfile, "r") as usersfile: users = User.from_yaml(usersfile.read()) users_count = len(users) logging.info("Loaded %s users from %s" % (users_count, options.usersfile)) pubmed_ids = set() logging.info("Searching pubmed for publications") for user in users.values(): new_pubmed_ids = Searcher.get_pubmed_ids_for_user( user, options.start, options.end) pubmed_ids.update(new_pubmed_ids) pubmed_id_count = len(pubmed_ids) logging.info("Found %s citations by searching pubmed" % pubmed_id_count) logging.info("Getting pubmed_ids from the Library") new_pubmed_ids = LibrarySearcher.get_pubmed_ids(options.start, options.end) pubmed_ids.update(new_pubmed_ids) logging.info("Found additional %s publications in Library" % (len(pubmed_ids) - pubmed_id_count)) new_publications = Publications.from_pubmed_ids(list(pubmed_ids)) publications = Publications.merge(publications, new_publications) for publication in publications.values(): publication.update_authors(users) if options.outputfile != "": with open(options.outputfile, "wb") as output: affiliated_publications = publications.not_denied() affiliated_publications.to_csv(output) logging.info(