Ejemplo n.º 1
0
def show(identifier):
    """Display the JSON for the single item in the database.
    The identifier may be a PMID, DOI, email, API key, label, ISSN, ISSN-L,
    ORCID, or IUID of the document.
    """
    db = utils.get_db()
    for designname, viewname, operation in [
        ("publication", "pmid", _asis),
        ("publication", "doi", _asis),
        ("account", "email", _asis),
        ("account", "api_key", _asis),
        ("label", "normalized_value", _normalized),
        ("journal", "issn", _asis),
        ("journal", "issn_l", _asis),
        ("researcher", "orcid", _asis),
        ("blacklist", "doi", _asis),
        ("blacklist", "pmid", _asis),
    ]:
        try:
            doc = utils.get_doc(db, designname, viewname,
                                operation(identifier))
            break
        except KeyError:
            pass
    else:
        try:
            doc = db[identifier]
        except couchdb2.NotFoundError:
            raise click.ClickException("No such item in the database.")
    click.echo(json.dumps(doc, ensure_ascii=False, indent=2))
Ejemplo n.º 2
0
def find_pmid(csvfilepath):
    """Find the PMID for the publications in the CSV file.
    Search by DOI and title.

    Note that a delay is inserted between each call to PubMed to avoid
    bad behaviour towards the web service.
    """
    db = utils.get_db()
    count = 0
    iuids = get_iuids_from_csv(csvfilepath)
    click.echo(f"{len(iuids)} publications in CSV input file.")
    account = {"email": os.getlogin(), "user_agent": "CLI"}
    for iuid in iuids:
        try:
            publ = db[iuid]
        except KeyError:
            click.echo(f"No such publication {iuid}; skipping.")
            continue
        if publ.get("pmid"):
            continue
        doi = publ.get("doi")
        if doi:
            result = pubmed.search(doi=doi)
        else:
            result = pubmed.search(title=publ["title"])
        if len(result) == 1:
            with PublicationSaver(doc=publ, db=db, account=account) as saver:
                saver["pmid"] = result[0]
            click.echo(f"PMID {result[0]}: {publ['title'][:50]}...")
            count += 1
    click.echo(f"Set PMID for {count} publications.")
Ejemplo n.º 3
0
def xrefs(filepath):
    """Output all xrefs as CSV data to the given file.
    The db and key of the xref form the first two columnds.
    If a URL is defined, it is written to the third column.
    """
    db = utils.get_db()
    dbs = dict()
    for publication in utils.get_docs(db, "publication", "modified"):
        for xref in publication.get("xrefs", []):
            dbs.setdefault(xref["db"], set()).add(xref["key"])
    with open(filepath, "w") as outfile:
        writer = csv.writer(outfile)
        writer.writerow(["db", "key", "url"])
        count = 0
        for db, keys in sorted(dbs.items()):
            for key in sorted(keys):
                row = [db, key]
                try:
                    url = settings["XREF_TEMPLATE_URLS"][db.lower()]
                    if "%-s" in url:  # Use lowercase key
                        url.replace("%-s", "%s")
                        key = key.lower()
                    row.append(url % key)
                except KeyError:
                    pass
                writer.writerow(row)
                count += 1
    click.echo(f"{count} xrefs")
Ejemplo n.º 4
0
def add_label(label, csvfilepath):
    """Add a label to a set of publications."""
    db = utils.get_db()
    parts = label.split("/", 1)
    if len(parts) == 2:
        label = parts[0]
        qualifier = parts[1]
    else:
        qualifier = None
    try:
        label = utils.get_label(db, label)["value"]
    except KeyError as error:
        raise click.ClickException(str(error))
    if qualifier and qualifier not in settings["SITE_LABEL_QUALIFIERS"]:
        raise click.ClickException(f"No such label qualifier {qualifier}.")
    count = 0
    for iuid in get_iuids_from_csv(csvfilepath):
        try:
            publ = db[iuid]
        except KeyError:
            click.echo(f"No such publication {iuid}; skipping.")
        else:
            if add_label_to_publication(db, publ, label, qualifier):
                count += 1
    click.echo(f"Added label to {count} publications.")
Ejemplo n.º 5
0
def dump(dumpfile, dumpdir, progressbar):
    "Dump all data in the database to a .tar.gz dump file."
    db = utils.get_db()
    if not dumpfile:
        dumpfile = "dump_{0}.tar.gz".format(time.strftime("%Y-%m-%d"))
        if dumpdir:
            filepath = os.path.join(dumpdir, dumpfile)
    ndocs, nfiles = db.dump(dumpfile,
                            exclude_designs=True,
                            progressbar=progressbar)
    click.echo(f"Dumped {ndocs} documents and {nfiles} files to '{dumpfile}'.")
Ejemplo n.º 6
0
def admin(email, password):
    "Create a user account having the admin role."
    db = utils.get_db()
    try:
        with AccountSaver(db=db) as saver:
            saver.set_email(email)
            saver["owner"] = email
            if not password:
                password = click.prompt("Password",
                                        hide_input=True,
                                        confirmation_prompt=True)
            saver.set_password(password)
            saver["role"] = constants.ADMIN
            saver["labels"] = []
    except ValueError as error:
        raise click.ClickException(str(error))
    click.echo(f"Created 'admin' role account {email}")
Ejemplo n.º 7
0
def password(email, password):
    "Set the password for the given account."
    db = utils.get_db()
    try:
        user = utils.get_account(db, email)
    except KeyError as error:
        raise click.ClickException(str(error))
    try:
        with AccountSaver(doc=user, db=db) as saver:
            if not password:
                password = click.prompt("Password",
                                        hide_input=True,
                                        confirmation_prompt=True)
            saver.set_password(password)
    except ValueError as error:
        raise click.ClickException(str(error))
    click.echo(f"Password set for account {email}")
Ejemplo n.º 8
0
def update_pubmed(csvfilepath):
    """Use PubMed to update the publications in the CSV file.
    If a publication lacks PMID then that publication is skipped.

    Note that a delay is inserted between each call to PubMed to avoid
    bad behaviour towards the web service.
    """
    db = utils.get_db()
    count = 0
    iuids = get_iuids_from_csv(csvfilepath)
    click.echo(f"{len(iuids)} publications in CSV input file.")
    account = {"email": os.getlogin(), "user_agent": "CLI"}
    for iuid in iuids:
        try:
            publ = db[iuid]
        except KeyError:
            click.echo(f"No such publication {iuid}; skipping.")
            continue
        pmid = publ.get("pmid")
        if not pmid:
            continue
        try:
            new = pubmed.fetch(
                pmid,
                timeout=settings["PUBMED_TIMEOUT"],
                delay=settings["PUBMED_DELAY"],
                api_key=settings["NCBI_API_KEY"],
            )
        except (OSError, IOError):
            click.echo(f"No response from PubMed for {pmid}.")
        except ValueError as error:
            click.echo(f"{pmid}, {error}")
        else:
            with PublicationSaver(doc=publ, db=db, account=account) as saver:
                saver.update(new)
                saver.fix_journal()
            click.echo(f"Updated {iuid} {publ['title'][:50]}...")
            count += 1
    click.echo(f"Updated {count} publications from PubMed.")
Ejemplo n.º 9
0
def update_crossref(csvfilepath):
    """Use Crossref to update the publications in the CSV file.
    If a publication lacks DOI then that publication is skipped.

    Note that a delay is inserted between each call to Crossref to avoid
    bad behaviour towards the web service.
    """
    db = utils.get_db()
    count = 0
    iuids = get_iuids_from_csv(csvfilepath)
    click.echo(f"{len(iuids)} publications in CSV input file.")
    account = {"email": os.getlogin(), "user_agent": "CLI"}
    for iuid in iuids:
        try:
            publ = db[iuid]
        except KeyError:
            click.echo(f"No such publication {iuid}; skipping.")
            continue
        doi = publ.get("doi")
        if not doi:
            continue
        try:
            new = crossref.fetch(
                doi,
                timeout=settings["CROSSREF_TIMEOUT"],
                delay=settings["CROSSREF_DELAY"],
            )
        except (OSError, IOError):
            click.echo(f"No response from Crossref for {doi}.")
        except ValueError as error:
            click.echo(f"{doi}, {error}")
        else:
            with PublicationSaver(doc=publ, db=db, account=account) as saver:
                saver.update(new)
                saver.fix_journal()
            click.echo(f"Updated {iuid} {publ['title'][:50]}...")
            count += 1
    click.echo(f"Updated {count} publications from Crossref.")
Ejemplo n.º 10
0
def remove_label(label, csvfilepath):
    "Remove a label from a set of publications."
    db = utils.get_db()
    try:
        label = utils.get_label(db, label)["value"]
    except KeyError as error:
        raise click.ClickException(str(error))
    count = 0
    for iuid in get_iuids_from_csv(csvfilepath):
        try:
            publ = db[iuid]
        except KeyError:
            click.echo(f"No such publication {iuid}; skipping.")
            continue
        if label not in publ["labels"]:
            continue
        account = {"email": os.getlogin(), "user_agent": "CLI"}
        with PublicationSaver(doc=publ, db=db, account=account) as saver:
            labels = publ["labels"].copy()
            labels.pop(label)
            saver["labels"] = labels
        count += 1
    click.echo(f"Removed label from {count} publications.")
                with PublicationSaver(doc=publ, db=db) as saver:
                    labels = publ['labels'].copy()
                    labels[label] = qualifier
                    saver['labels'] = labels
                count += 1
    print("Label '%s/%s' added to %i publications" % (label, qualifier, count))
    for error in errors:
        print(error)


if __name__ == '__main__':
    parser = utils.get_command_line_parser(
        'Add a label to all publications in a list.')
    parser.add_argument('--label', action='store', dest='label',
                        default=None, help='label to add')
    parser.add_argument('--qualifier', action='store', dest='qualifier',
                        default=None, help='qualifier of label to add')
    parser.add_argument('--file', action='store', dest='idfile',
                        metavar='IDFILE',
                        help='path to file containing publication identifiers')
    args = parser.parse_args()
    utils.load_settings(filepath=args.settings)
    db = utils.get_db()
    identifiers = []
    with open(args.idfile) as infile:
        for line in infile:
            line = line.strip()
            if line: identifiers.append(line)
    print(len(identifiers), 'identifiers')
    add_label_to_publications(db, args.label, args.qualifier, identifiers)
Ejemplo n.º 12
0
def fetch(filepath, label):
    """Fetch publications given a file containing PMIDs and/or DOIs,
    one per line. If the publication is already in the database, the label,
    if given, is added. For a PMID, the publication is fetched from PubMed.
    For a DOI, an attempt is first made to get the publication from PubMed.
    If that does not work, Crossref is tried.
    Delay, timeout and API key for fetching is defined in the settings file.
    """
    db = utils.get_db()
    identifiers = []
    try:
        with open(filepath) as infile:
            for line in infile:
                try:
                    identifiers.append(line.strip().split()[0])
                except IndexError:
                    pass
    except IOError as error:
        raise click.ClickException(str(error))
    if label:
        parts = label.split("/", 1)
        if len(parts) == 2:
            label = parts[0]
            qualifier = parts[1]
        else:
            qualifier = None
        try:
            label = utils.get_label(db, label)["value"]
        except KeyError as error:
            raise click.ClickException(str(error))
        if qualifier and qualifier not in settings["SITE_LABEL_QUALIFIERS"]:
            raise click.ClickException(f"No such label qualifier {qualifier}.")
        labels = {label: qualifier}
    else:
        labels = {}
    account = {"email": os.getlogin(), "user_agent": "CLI"}
    # All labels are allowed from the CLI; as if admin were logged in.
    allowed_labels = set(
        [l["value"] for l in utils.get_docs(db, "label", "value")])
    for identifier in identifiers:
        try:
            publ = utils.get_publication(db, identifier)
        except KeyError:
            try:
                publ = fetch_publication(
                    db,
                    identifier,
                    labels=labels,
                    account=account,
                    allowed_labels=allowed_labels,
                )
            except IOError as error:
                click.echo(f"Error: {error}")
            except KeyError as error:
                click.echo(f"Warning: {error}")
            else:
                click.echo(f"Fetched {publ['title']}")
        else:
            if add_label_to_publication(db, publ, label, qualifier):
                click.echo(f"{identifier} already in database; label updated.")
            else:
                click.echo(f"{identifier} already in database.")
Ejemplo n.º 13
0
def select(
    years,
    labels,
    authors,
    orcids,
    expression,
    format,
    filepath,
    all_authors,
    issn,
    encoding,
    delimiter,
    quoting,
    single_label,
    numbered,
    maxline,
    doi_url,
    pmid_url,
):
    """Select a subset of publications and output to a file.
    The options '--year', '--label' and '--orcid' may be given multiple
    times, giving the union of publications within the option type.
    These separate sets are the intersected to give the final subset.
    """
    db = utils.get_db()
    app = publications.app_publications.get_application()
    subsets = []

    def _union(s, t):
        return s | t

    if years:
        subsets.append(
            functools.reduce(_union, [Subset(db, year=y) for y in years]))
    if labels:
        subsets.append(
            functools.reduce(_union, [Subset(db, label=l) for l in labels]))
    if orcids:
        subsets.append(
            functools.reduce(_union, [Subset(db, orcid=o) for o in orcids]))
    if authors:
        subsets.append(
            functools.reduce(_union, [Subset(db, author=a) for a in authors]))
    if subsets:
        result = functools.reduce(lambda s, t: s & t, subsets)
    else:
        result = Subset(db)

    if expression:
        parser = get_parser()
        try:
            with open(expression) as infile:
                parsed = parser.parseString(infile.read(), parseAll=True)
        except IOError as error:
            raise click.ClickException(str(error))
        except pp.ParseException as error:
            raise click.ClickException(f"Expression invalid: {error}")
        try:
            subset = parsed[0].evaluate(db)
        except Exception as error:
            raise click.ClickException(
                f"Evaluating selection expression: {error}")
        if subsets:  # Were any previous subset(s) defined?
            result = result & subset
        else:
            result = subset

    if format == "CSV":
        writer = publications.writer.CsvWriter(
            db,
            app,
            all_authors=all_authors,
            single_label=single_label,
            issn=issn,
            encoding=encoding,
            quoting=quoting,
            delimiter=delimiter,
        )
        writer.write(result)
        filepath = filepath or "publications.csv"

    elif format == "XLSX":
        if filepath == "-":
            raise click.ClickException("Cannot output XLSX to stdout.")
        writer = publications.writer.XlsxWriter(
            db,
            app,
            all_authors=all_authors,
            single_label=single_label,
            issn=issn,
            encoding=encoding,
        )
        writer.write(result)
        filepath = filepath or "publications.xlsx"

    elif format == "TXT":
        writer = publications.writer.TextWriter(
            db,
            app,
            all_authors=all_authors,
            issn=issn,
            encoding=encoding,
            numbered=numbered,
            maxline=maxline,
            doi_url=doi_url,
            pmid_url=pmid_url,
        )
        writer.write(result)
        filepath = filepath or "publications.txt"

    if filepath == "-":
        sys.stdout.write(writer.get_content().decode(encoding))
    elif filepath:
        with open(filepath, "wb") as outfile:
            outfile.write(writer.get_content())
        click.echo(result)
Ejemplo n.º 14
0
from publications import utils


def init_database(db):
    "Initialize the database; load design documents."
    print('wiping out database (slow method)...')
    for doc in db:
        del db[doc]
    print('wiped out database')
    utils.initialize(db)


if __name__ == '__main__':
    import sys
    parser = utils.get_command_line_parser(
        description='Initialize the database, deleting all old data.')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        dest='force',
                        default=False,
                        help='force deletion; skip question')
    args = parser.parse_args()
    utils.load_settings(filepath=args.settings)
    if not args.force:
        response = input('about to delete everything; really sure? [n] > ')
        if not utils.to_bool(response):
            sys.exit('aborted')
    init_database(utils.get_db())
Ejemplo n.º 15
0
 def prepare(self):
     "Get the database connection."
     self.db = utils.get_db()