def stats():
  """Hidden URL for statistics

  I've intentionally not publicised this feature because it makes lots of HTTP
  requests to the Sanger website and I probably only want me / administrators to
  use it on an infrequent basis"""
  all_pubmed_ids = set(publications.keys())
  confirmed_pubmed_ids = set(publications.confirmed().keys())
  denied_pubmed_ids = set(publications.denied().keys())
  to_be_confirmed_ids = all_pubmed_ids.difference(confirmed_pubmed_ids, denied_pubmed_ids)

  library_pubmed_ids = set(LibrarySearcher.get_pubmed_ids())
  confirmed_in_library_ids = library_pubmed_ids.intersection(confirmed_pubmed_ids)
  to_be_confirmed_in_library_ids = library_pubmed_ids.intersection(to_be_confirmed_ids)

  not_in_libary_ids = all_pubmed_ids.difference(library_pubmed_ids)
  confirmed_not_in_library = not_in_libary_ids.intersection(confirmed_pubmed_ids)
  to_be_confirmed_not_in_library = not_in_libary_ids.intersection(to_be_confirmed_ids)

  statistics = {
    'tracked: total': len(all_pubmed_ids),
    'tracked: confirmed': len(confirmed_pubmed_ids),
    'tracked: possible': len(to_be_confirmed_ids),
    'tracked: denied': len(denied_pubmed_ids),
    'library: total': len(library_pubmed_ids),
    'library: confirmed': len(confirmed_in_library_ids),
    'library: possible': len(to_be_confirmed_in_library_ids),
    'other: total': len(confirmed_not_in_library) + len(to_be_confirmed_not_in_library),
    'other: confirmed': len(confirmed_not_in_library),
    'other: possible': len(to_be_confirmed_not_in_library)
  }

  return jsonify(**statistics)
def stats():
    """Hidden URL for statistics

  I've intentionally not publicised this feature because it makes lots of HTTP
  requests to the Sanger website and I probably only want me / administrators to
  use it on an infrequent basis"""
    all_pubmed_ids = set(publications.keys())
    confirmed_pubmed_ids = set(publications.confirmed().keys())
    denied_pubmed_ids = set(publications.denied().keys())
    to_be_confirmed_ids = all_pubmed_ids.difference(confirmed_pubmed_ids,
                                                    denied_pubmed_ids)

    library_pubmed_ids = set(LibrarySearcher.get_pubmed_ids())
    confirmed_in_library_ids = library_pubmed_ids.intersection(
        confirmed_pubmed_ids)
    to_be_confirmed_in_library_ids = library_pubmed_ids.intersection(
        to_be_confirmed_ids)

    not_in_libary_ids = all_pubmed_ids.difference(library_pubmed_ids)
    confirmed_not_in_library = not_in_libary_ids.intersection(
        confirmed_pubmed_ids)
    to_be_confirmed_not_in_library = not_in_libary_ids.intersection(
        to_be_confirmed_ids)

    statistics = {
        'tracked: total':
        len(all_pubmed_ids),
        'tracked: confirmed':
        len(confirmed_pubmed_ids),
        'tracked: possible':
        len(to_be_confirmed_ids),
        'tracked: denied':
        len(denied_pubmed_ids),
        'library: total':
        len(library_pubmed_ids),
        'library: confirmed':
        len(confirmed_in_library_ids),
        'library: possible':
        len(to_be_confirmed_in_library_ids),
        'other: total':
        len(confirmed_not_in_library) + len(to_be_confirmed_not_in_library),
        'other: confirmed':
        len(confirmed_not_in_library),
        'other: possible':
        len(to_be_confirmed_not_in_library)
    }

    return jsonify(**statistics)
def missing():
  """Hidden URL for missing publications

  I've intentionally not publicised this feature because it makes lots of HTTP
  requests to the Sanger website and I probably only want me / administrators to
  use it on an infrequent basis.

  This endpoint lists all of the publicatons which are currently not in the
  Sanger Library's dataset"""
  library_pubmed_ids = set(LibrarySearcher.get_pubmed_ids())
  possible_pubmed_ids = set(publications.not_denied().keys())
  missing_pubmed_ids = possible_pubmed_ids.difference(library_pubmed_ids)
  missing_publications = Publications([publications[pubmed_id] for pubmed_id in
                                       missing_pubmed_ids])
  return render_template('affiliated.html',
                        publications=missing_publications,
                        users=users,
                        user_title="Publications not in the Sanger Library")
def missing():
    """Hidden URL for missing publications

  I've intentionally not publicised this feature because it makes lots of HTTP
  requests to the Sanger website and I probably only want me / administrators to
  use it on an infrequent basis.

  This endpoint lists all of the publicatons which are currently not in the
  Sanger Library's dataset"""
    library_pubmed_ids = set(LibrarySearcher.get_pubmed_ids())
    possible_pubmed_ids = set(publications.not_denied().keys())
    missing_pubmed_ids = possible_pubmed_ids.difference(library_pubmed_ids)
    missing_publications = Publications(
        [publications[pubmed_id] for pubmed_id in missing_pubmed_ids])
    return render_template('affiliated.html',
                           publications=missing_publications,
                           users=users,
                           user_title="Publications not in the Sanger Library")
  with open(options.usersfile, "r") as usersfile:
    users = User.from_yaml(usersfile.read())
  users_count = len(users)
  logging.info("Loaded %s users from %s" % (users_count, options.usersfile))

  pubmed_ids = set()
  logging.info("Searching pubmed for publications")
  for user in users.values():
    new_pubmed_ids = Searcher.get_pubmed_ids_for_user(user, options.start, options.end)
    pubmed_ids.update(new_pubmed_ids)
  pubmed_id_count = len(pubmed_ids)
  logging.info("Found %s citations by searching pubmed" % pubmed_id_count)

  logging.info("Getting pubmed_ids from the Library")
  new_pubmed_ids = LibrarySearcher.get_pubmed_ids(options.start, options.end)
  pubmed_ids.update(new_pubmed_ids)
  logging.info("Found additional %s publications in Library" % (len(pubmed_ids) - pubmed_id_count))

  new_publications = Publications.from_pubmed_ids(list(pubmed_ids))
  publications = Publications.merge(publications, new_publications)

  for publication in publications.values():
    publication.update_authors(users)

  if options.outputfile != "":
    with open(options.outputfile, "wb") as output:
      affiliated_publications = publications.not_denied()
      affiliated_publications.to_csv(output)
      logging.info("%s citations with at least one user matching the input queries have been printed to %s" % (
        len(affiliated_publications), options.outputfile))
Example #6
0
    with open(options.usersfile, "r") as usersfile:
        users = User.from_yaml(usersfile.read())
    users_count = len(users)
    logging.info("Loaded %s users from %s" % (users_count, options.usersfile))

    pubmed_ids = set()
    logging.info("Searching pubmed for publications")
    for user in users.values():
        new_pubmed_ids = Searcher.get_pubmed_ids_for_user(
            user, options.start, options.end)
        pubmed_ids.update(new_pubmed_ids)
    pubmed_id_count = len(pubmed_ids)
    logging.info("Found %s citations by searching pubmed" % pubmed_id_count)

    logging.info("Getting pubmed_ids from the Library")
    new_pubmed_ids = LibrarySearcher.get_pubmed_ids(options.start, options.end)
    pubmed_ids.update(new_pubmed_ids)
    logging.info("Found additional %s publications in Library" %
                 (len(pubmed_ids) - pubmed_id_count))

    new_publications = Publications.from_pubmed_ids(list(pubmed_ids))
    publications = Publications.merge(publications, new_publications)

    for publication in publications.values():
        publication.update_authors(users)

    if options.outputfile != "":
        with open(options.outputfile, "wb") as output:
            affiliated_publications = publications.not_denied()
            affiliated_publications.to_csv(output)
            logging.info(