def show(identifier): """Display the JSON for the single item in the database. The identifier may be a PMID, DOI, email, API key, label, ISSN, ISSN-L, ORCID, or IUID of the document. """ db = utils.get_db() for designname, viewname, operation in [ ("publication", "pmid", _asis), ("publication", "doi", _asis), ("account", "email", _asis), ("account", "api_key", _asis), ("label", "normalized_value", _normalized), ("journal", "issn", _asis), ("journal", "issn_l", _asis), ("researcher", "orcid", _asis), ("blacklist", "doi", _asis), ("blacklist", "pmid", _asis), ]: try: doc = utils.get_doc(db, designname, viewname, operation(identifier)) break except KeyError: pass else: try: doc = db[identifier] except couchdb2.NotFoundError: raise click.ClickException("No such item in the database.") click.echo(json.dumps(doc, ensure_ascii=False, indent=2))
def find_pmid(csvfilepath): """Find the PMID for the publications in the CSV file. Search by DOI and title. Note that a delay is inserted between each call to PubMed to avoid bad behaviour towards the web service. """ db = utils.get_db() count = 0 iuids = get_iuids_from_csv(csvfilepath) click.echo(f"{len(iuids)} publications in CSV input file.") account = {"email": os.getlogin(), "user_agent": "CLI"} for iuid in iuids: try: publ = db[iuid] except KeyError: click.echo(f"No such publication {iuid}; skipping.") continue if publ.get("pmid"): continue doi = publ.get("doi") if doi: result = pubmed.search(doi=doi) else: result = pubmed.search(title=publ["title"]) if len(result) == 1: with PublicationSaver(doc=publ, db=db, account=account) as saver: saver["pmid"] = result[0] click.echo(f"PMID {result[0]}: {publ['title'][:50]}...") count += 1 click.echo(f"Set PMID for {count} publications.")
def xrefs(filepath): """Output all xrefs as CSV data to the given file. The db and key of the xref form the first two columnds. If a URL is defined, it is written to the third column. """ db = utils.get_db() dbs = dict() for publication in utils.get_docs(db, "publication", "modified"): for xref in publication.get("xrefs", []): dbs.setdefault(xref["db"], set()).add(xref["key"]) with open(filepath, "w") as outfile: writer = csv.writer(outfile) writer.writerow(["db", "key", "url"]) count = 0 for db, keys in sorted(dbs.items()): for key in sorted(keys): row = [db, key] try: url = settings["XREF_TEMPLATE_URLS"][db.lower()] if "%-s" in url: # Use lowercase key url.replace("%-s", "%s") key = key.lower() row.append(url % key) except KeyError: pass writer.writerow(row) count += 1 click.echo(f"{count} xrefs")
def add_label(label, csvfilepath): """Add a label to a set of publications.""" db = utils.get_db() parts = label.split("/", 1) if len(parts) == 2: label = parts[0] qualifier = parts[1] else: qualifier = None try: label = utils.get_label(db, label)["value"] except KeyError as error: raise click.ClickException(str(error)) if qualifier and qualifier not in settings["SITE_LABEL_QUALIFIERS"]: raise click.ClickException(f"No such label qualifier {qualifier}.") count = 0 for iuid in get_iuids_from_csv(csvfilepath): try: publ = db[iuid] except KeyError: click.echo(f"No such publication {iuid}; skipping.") else: if add_label_to_publication(db, publ, label, qualifier): count += 1 click.echo(f"Added label to {count} publications.")
def dump(dumpfile, dumpdir, progressbar): "Dump all data in the database to a .tar.gz dump file." db = utils.get_db() if not dumpfile: dumpfile = "dump_{0}.tar.gz".format(time.strftime("%Y-%m-%d")) if dumpdir: filepath = os.path.join(dumpdir, dumpfile) ndocs, nfiles = db.dump(dumpfile, exclude_designs=True, progressbar=progressbar) click.echo(f"Dumped {ndocs} documents and {nfiles} files to '{dumpfile}'.")
def admin(email, password): "Create a user account having the admin role." db = utils.get_db() try: with AccountSaver(db=db) as saver: saver.set_email(email) saver["owner"] = email if not password: password = click.prompt("Password", hide_input=True, confirmation_prompt=True) saver.set_password(password) saver["role"] = constants.ADMIN saver["labels"] = [] except ValueError as error: raise click.ClickException(str(error)) click.echo(f"Created 'admin' role account {email}")
def password(email, password): "Set the password for the given account." db = utils.get_db() try: user = utils.get_account(db, email) except KeyError as error: raise click.ClickException(str(error)) try: with AccountSaver(doc=user, db=db) as saver: if not password: password = click.prompt("Password", hide_input=True, confirmation_prompt=True) saver.set_password(password) except ValueError as error: raise click.ClickException(str(error)) click.echo(f"Password set for account {email}")
def update_pubmed(csvfilepath): """Use PubMed to update the publications in the CSV file. If a publication lacks PMID then that publication is skipped. Note that a delay is inserted between each call to PubMed to avoid bad behaviour towards the web service. """ db = utils.get_db() count = 0 iuids = get_iuids_from_csv(csvfilepath) click.echo(f"{len(iuids)} publications in CSV input file.") account = {"email": os.getlogin(), "user_agent": "CLI"} for iuid in iuids: try: publ = db[iuid] except KeyError: click.echo(f"No such publication {iuid}; skipping.") continue pmid = publ.get("pmid") if not pmid: continue try: new = pubmed.fetch( pmid, timeout=settings["PUBMED_TIMEOUT"], delay=settings["PUBMED_DELAY"], api_key=settings["NCBI_API_KEY"], ) except (OSError, IOError): click.echo(f"No response from PubMed for {pmid}.") except ValueError as error: click.echo(f"{pmid}, {error}") else: with PublicationSaver(doc=publ, db=db, account=account) as saver: saver.update(new) saver.fix_journal() click.echo(f"Updated {iuid} {publ['title'][:50]}...") count += 1 click.echo(f"Updated {count} publications from PubMed.")
def update_crossref(csvfilepath): """Use Crossref to update the publications in the CSV file. If a publication lacks DOI then that publication is skipped. Note that a delay is inserted between each call to Crossref to avoid bad behaviour towards the web service. """ db = utils.get_db() count = 0 iuids = get_iuids_from_csv(csvfilepath) click.echo(f"{len(iuids)} publications in CSV input file.") account = {"email": os.getlogin(), "user_agent": "CLI"} for iuid in iuids: try: publ = db[iuid] except KeyError: click.echo(f"No such publication {iuid}; skipping.") continue doi = publ.get("doi") if not doi: continue try: new = crossref.fetch( doi, timeout=settings["CROSSREF_TIMEOUT"], delay=settings["CROSSREF_DELAY"], ) except (OSError, IOError): click.echo(f"No response from Crossref for {doi}.") except ValueError as error: click.echo(f"{doi}, {error}") else: with PublicationSaver(doc=publ, db=db, account=account) as saver: saver.update(new) saver.fix_journal() click.echo(f"Updated {iuid} {publ['title'][:50]}...") count += 1 click.echo(f"Updated {count} publications from Crossref.")
def remove_label(label, csvfilepath): "Remove a label from a set of publications." db = utils.get_db() try: label = utils.get_label(db, label)["value"] except KeyError as error: raise click.ClickException(str(error)) count = 0 for iuid in get_iuids_from_csv(csvfilepath): try: publ = db[iuid] except KeyError: click.echo(f"No such publication {iuid}; skipping.") continue if label not in publ["labels"]: continue account = {"email": os.getlogin(), "user_agent": "CLI"} with PublicationSaver(doc=publ, db=db, account=account) as saver: labels = publ["labels"].copy() labels.pop(label) saver["labels"] = labels count += 1 click.echo(f"Removed label from {count} publications.")
with PublicationSaver(doc=publ, db=db) as saver: labels = publ['labels'].copy() labels[label] = qualifier saver['labels'] = labels count += 1 print("Label '%s/%s' added to %i publications" % (label, qualifier, count)) for error in errors: print(error) if __name__ == '__main__': parser = utils.get_command_line_parser( 'Add a label to all publications in a list.') parser.add_argument('--label', action='store', dest='label', default=None, help='label to add') parser.add_argument('--qualifier', action='store', dest='qualifier', default=None, help='qualifier of label to add') parser.add_argument('--file', action='store', dest='idfile', metavar='IDFILE', help='path to file containing publication identifiers') args = parser.parse_args() utils.load_settings(filepath=args.settings) db = utils.get_db() identifiers = [] with open(args.idfile) as infile: for line in infile: line = line.strip() if line: identifiers.append(line) print(len(identifiers), 'identifiers') add_label_to_publications(db, args.label, args.qualifier, identifiers)
def fetch(filepath, label): """Fetch publications given a file containing PMIDs and/or DOIs, one per line. If the publication is already in the database, the label, if given, is added. For a PMID, the publication is fetched from PubMed. For a DOI, an attempt is first made to get the publication from PubMed. If that does not work, Crossref is tried. Delay, timeout and API key for fetching is defined in the settings file. """ db = utils.get_db() identifiers = [] try: with open(filepath) as infile: for line in infile: try: identifiers.append(line.strip().split()[0]) except IndexError: pass except IOError as error: raise click.ClickException(str(error)) if label: parts = label.split("/", 1) if len(parts) == 2: label = parts[0] qualifier = parts[1] else: qualifier = None try: label = utils.get_label(db, label)["value"] except KeyError as error: raise click.ClickException(str(error)) if qualifier and qualifier not in settings["SITE_LABEL_QUALIFIERS"]: raise click.ClickException(f"No such label qualifier {qualifier}.") labels = {label: qualifier} else: labels = {} account = {"email": os.getlogin(), "user_agent": "CLI"} # All labels are allowed from the CLI; as if admin were logged in. allowed_labels = set( [l["value"] for l in utils.get_docs(db, "label", "value")]) for identifier in identifiers: try: publ = utils.get_publication(db, identifier) except KeyError: try: publ = fetch_publication( db, identifier, labels=labels, account=account, allowed_labels=allowed_labels, ) except IOError as error: click.echo(f"Error: {error}") except KeyError as error: click.echo(f"Warning: {error}") else: click.echo(f"Fetched {publ['title']}") else: if add_label_to_publication(db, publ, label, qualifier): click.echo(f"{identifier} already in database; label updated.") else: click.echo(f"{identifier} already in database.")
def select( years, labels, authors, orcids, expression, format, filepath, all_authors, issn, encoding, delimiter, quoting, single_label, numbered, maxline, doi_url, pmid_url, ): """Select a subset of publications and output to a file. The options '--year', '--label' and '--orcid' may be given multiple times, giving the union of publications within the option type. These separate sets are the intersected to give the final subset. """ db = utils.get_db() app = publications.app_publications.get_application() subsets = [] def _union(s, t): return s | t if years: subsets.append( functools.reduce(_union, [Subset(db, year=y) for y in years])) if labels: subsets.append( functools.reduce(_union, [Subset(db, label=l) for l in labels])) if orcids: subsets.append( functools.reduce(_union, [Subset(db, orcid=o) for o in orcids])) if authors: subsets.append( functools.reduce(_union, [Subset(db, author=a) for a in authors])) if subsets: result = functools.reduce(lambda s, t: s & t, subsets) else: result = Subset(db) if expression: parser = get_parser() try: with open(expression) as infile: parsed = parser.parseString(infile.read(), parseAll=True) except IOError as error: raise click.ClickException(str(error)) except pp.ParseException as error: raise click.ClickException(f"Expression invalid: {error}") try: subset = parsed[0].evaluate(db) except Exception as error: raise click.ClickException( f"Evaluating selection expression: {error}") if subsets: # Were any previous subset(s) defined? result = result & subset else: result = subset if format == "CSV": writer = publications.writer.CsvWriter( db, app, all_authors=all_authors, single_label=single_label, issn=issn, encoding=encoding, quoting=quoting, delimiter=delimiter, ) writer.write(result) filepath = filepath or "publications.csv" elif format == "XLSX": if filepath == "-": raise click.ClickException("Cannot output XLSX to stdout.") writer = publications.writer.XlsxWriter( db, app, all_authors=all_authors, single_label=single_label, issn=issn, encoding=encoding, ) writer.write(result) filepath = filepath or "publications.xlsx" elif format == "TXT": writer = publications.writer.TextWriter( db, app, all_authors=all_authors, issn=issn, encoding=encoding, numbered=numbered, maxline=maxline, doi_url=doi_url, pmid_url=pmid_url, ) writer.write(result) filepath = filepath or "publications.txt" if filepath == "-": sys.stdout.write(writer.get_content().decode(encoding)) elif filepath: with open(filepath, "wb") as outfile: outfile.write(writer.get_content()) click.echo(result)
from publications import utils def init_database(db): "Initialize the database; load design documents." print('wiping out database (slow method)...') for doc in db: del db[doc] print('wiped out database') utils.initialize(db) if __name__ == '__main__': import sys parser = utils.get_command_line_parser( description='Initialize the database, deleting all old data.') parser.add_argument('-f', '--force', action='store_true', dest='force', default=False, help='force deletion; skip question') args = parser.parse_args() utils.load_settings(filepath=args.settings) if not args.force: response = input('about to delete everything; really sure? [n] > ') if not utils.to_bool(response): sys.exit('aborted') init_database(utils.get_db())
def prepare(self): "Get the database connection." self.db = utils.get_db()