Ejemplo n.º 1
0
def xrefs(filepath):
    """Output all xrefs as CSV data to the given file.
    The db and key of the xref form the first two columnds.
    If a URL is defined, it is written to the third column.
    """
    db = utils.get_db()
    dbs = dict()
    for publication in utils.get_docs(db, "publication", "modified"):
        for xref in publication.get("xrefs", []):
            dbs.setdefault(xref["db"], set()).add(xref["key"])
    with open(filepath, "w") as outfile:
        writer = csv.writer(outfile)
        writer.writerow(["db", "key", "url"])
        count = 0
        for db, keys in sorted(dbs.items()):
            for key in sorted(keys):
                row = [db, key]
                try:
                    url = settings["XREF_TEMPLATE_URLS"][db.lower()]
                    if "%-s" in url:  # Use lowercase key
                        url.replace("%-s", "%s")
                        key = key.lower()
                    row.append(url % key)
                except KeyError:
                    pass
                writer.writerow(row)
                count += 1
    click.echo(f"{count} xrefs")
Ejemplo n.º 2
0
 def get_docs(self, designname, viewname, key=None, last=None, **kwargs):
     """Get the list of documents using the named view
     and the given key or interval.
     """
     return utils.get_docs(
         self.db, designname, viewname, key=key, last=last, **kwargs
     )
Ejemplo n.º 3
0
 def select_active_labels(self, year):
     """Select all publications having a label active in the given year.
     If 'current' is given, then all currently active labels.
     If temporal labels are not configured, then produce an empty subset.
     """
     self.iuids = set()
     if not settings["TEMPORAL_LABELS"]:
         return
     if year.lower() == "current":
         labels = set([i.value for i in self.db.view("label", "current")])
     else:
         labels = set()
         for label in utils.get_docs(self.db, "label", "value"):
             started = label.get("started")
             if started and started <= year:  # Year as str
                 ended = label.get("ended")
                 if ended:
                     if year <= ended:  # Year as str
                         labels.add(label["value"])
                 else:  # Current
                     labels.add(label["value"])
     if labels:
         result = functools.reduce(
             lambda s, t: s | t, [Subset(self.db, label=l) for l in labels])
         self.iuids = result.iuids
Ejemplo n.º 4
0
def fetch(filepath, label):
    """Fetch publications given a file containing PMIDs and/or DOIs,
    one per line. If the publication is already in the database, the label,
    if given, is added. For a PMID, the publication is fetched from PubMed.
    For a DOI, an attempt is first made to get the publication from PubMed.
    If that does not work, Crossref is tried.
    Delay, timeout and API key for fetching is defined in the settings file.
    """
    db = utils.get_db()
    identifiers = []
    try:
        with open(filepath) as infile:
            for line in infile:
                try:
                    identifiers.append(line.strip().split()[0])
                except IndexError:
                    pass
    except IOError as error:
        raise click.ClickException(str(error))
    if label:
        parts = label.split("/", 1)
        if len(parts) == 2:
            label = parts[0]
            qualifier = parts[1]
        else:
            qualifier = None
        try:
            label = utils.get_label(db, label)["value"]
        except KeyError as error:
            raise click.ClickException(str(error))
        if qualifier and qualifier not in settings["SITE_LABEL_QUALIFIERS"]:
            raise click.ClickException(f"No such label qualifier {qualifier}.")
        labels = {label: qualifier}
    else:
        labels = {}
    account = {"email": os.getlogin(), "user_agent": "CLI"}
    # All labels are allowed from the CLI; as if admin were logged in.
    allowed_labels = set(
        [l["value"] for l in utils.get_docs(db, "label", "value")])
    for identifier in identifiers:
        try:
            publ = utils.get_publication(db, identifier)
        except KeyError:
            try:
                publ = fetch_publication(
                    db,
                    identifier,
                    labels=labels,
                    account=account,
                    allowed_labels=allowed_labels,
                )
            except IOError as error:
                click.echo(f"Error: {error}")
            except KeyError as error:
                click.echo(f"Warning: {error}")
            else:
                click.echo(f"Fetched {publ['title']}")
        else:
            if add_label_to_publication(db, publ, label, qualifier):
                click.echo(f"{identifier} already in database; label updated.")
            else:
                click.echo(f"{identifier} already in database.")
Ejemplo n.º 5
0
def xrefs_statistics(db, filename, since=None):
    "CSV file of statistics for xrefs editing."
    if since is None:
        since = utils.today()
    with open(filename, 'wb') as outfile:
        writer = csv.writer(outfile)
        write = writer.writerow
        write(('Site', settings['BASE_URL']))
        write(('Date', utils.today()))

        total = 0
        label_count = {}
        qualifier_count = {}
        has_xrefs = 0
        total_xrefs = 0
        xref_count = {}
        label_xref_count = {}

        # Tot up added xrefs per curator.
        # Imperfect: wrongly added xrefs will be counted, even if deleted.
        # Get the original xrefs from the first log entry (first load).
        # For xref-changing log entries after the first:
        # curators[email][iuid] = set(all xrefs except those in orig)
        curators = {}

        for publication in utils.get_docs(db, 'publication/published'):
            print(publication['_id'])
            total += 1
            for label, qualifier in list(
                    publication.get('labels', {}).items()):
                try:
                    qualifiers = label_count[label]
                except KeyError:
                    qualifiers = label_count[label] = {}
                try:
                    qualifiers[qualifier] += 1
                except KeyError:
                    qualifiers[qualifier] = 1
            xrefs = publication.get('xrefs')
            if xrefs:
                has_xrefs += 1
                total_xrefs += len(xrefs)
                for xref in xrefs:
                    try:
                        xref_count[xref['db']] += 1
                    except KeyError:
                        xref_count[xref['db']] = 1
            logs = utils.get_docs(db,
                                  'log/doc',
                                  key=[publication['_id'], ''],
                                  last=[publication['_id'], constants.CEILING],
                                  descending=False)
            # Record any xrefs in initial publication load.
            orig_xrefs = set([
                "%s:%s" % (x['db'], x['key'])
                for x in logs[0].get('changed', {}).get('xrefs', [])
            ])
            # Get rid of irrelevant and too old changes.
            logs = [
                l for l in logs
                if l['modified'] > since and l.get('changed', {}).get('xrefs')
            ]
            if not logs: continue

            for log in logs:
                email = log['account']
                try:
                    account = curators[email]
                except KeyError:
                    account = curators[email] = {}
                xs = [
                    "%s:%s" % (x['db'], x['key'])
                    for x in log['changed']['xrefs']
                ]
                xs = [x for x in xs if x not in orig_xrefs]
                account.setdefault(publication['_id'], set()).update(xs)
            for label in publication.get('labels', {}):
                try:
                    label_xref_count[label] += len(xs)
                except KeyError:
                    label_xref_count[label] = len(xs)

        write(('Total publs', total))
        write(('Publs with xrefs', has_xrefs))
        write(('Total xrefs', total_xrefs))
        write(())
        write(['Labels', 'total', 'xrefs'] + settings['SITE_LABEL_QUALIFIERS'])
        for label, count in sorted(label_count.items()):
            write([
                label,
                sum(label_count[label].values()),
                label_xref_count.get(label, 0)
            ] + [count.get(q, 0) for q in settings['SITE_LABEL_QUALIFIERS']])
        write(())
        write(('Xrefs', since))
        for db in sorted(xref_count):
            write((db, xref_count[db]))
        write(())
        write(('Xref curator', 'Total publs', 'Total xrefs'))
        for email in sorted(curators):
            publs = curators[email]
            total_publs = len(publs)
            new = set()
            for xrefs in list(publs.values()):
                new.update(xrefs)
            total_xrefs = len(new)
            write((email, total_publs, total_xrefs))