Beispiel #1
0
def enrich_release_entity(entity):
    if entity.state in ('redirect', 'deleted'):
        return entity
    if entity.state == "active":
        entity._es = release_to_elasticsearch(entity, force_bool=False)
    if entity.container and entity.container.state == "active":
        entity.container._es = container_to_elasticsearch(entity.container,
                                                          force_bool=False)
    if entity.filesets:
        for fs in entity.filesets:
            fs._total_size = sum([f.size for f in fs.manifest])
    if entity.webcaptures:
        for wc in entity.webcaptures:
            wc._wayback_suffix = wayback_suffix(wc)
    for ref in entity.refs:
        # this is a UI hack to get rid of XML crud in unstructured refs like:
        # LOCKSS (2014) Available: <ext-link
        # xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri"
        # xlink:href="http://lockss.org/"
        # xlink:type="simple">http://lockss.org/</ext-link>. Accessed: 2014
        # November 1.
        if ref.extra and ref.extra.get('unstructured'):
            ref.extra['unstructured'] = strip_extlink_xml(
                ref.extra['unstructured'])
    # author list to display; ensure it's sorted by index (any othors with
    # index=None go to end of list)
    authors = [c for c in entity.contribs if c.role in ('author', None)]
    entity._authors = sorted(authors,
                             key=lambda c:
                             (c.index == None and 99999999) or c.index)
    # hack to show plain text instead of latex abstracts
    if entity.abstracts:
        if 'latex' in entity.abstracts[0].mimetype:
            entity.abstracts.reverse()
    return entity
Beispiel #2
0
def enrich_release_entity(entity):
    if entity.state in ('redirect', 'deleted'):
        return entity
    if entity.state == "active":
        entity._es = release_to_elasticsearch(entity, force_bool=False)
    if entity.container and entity.container.state == "active":
        entity.container._es = container_to_elasticsearch(entity.container, force_bool=False)
    if entity.files:
        # remove shadows-only files with no URLs
        entity.files = [f for f in entity.files
            if not (f.extra and f.extra.get('shadows') and not f.urls)]
    if entity.filesets:
        for fs in entity.filesets:
            fs._total_size = sum([f.size for f in fs.manifest])
    if entity.webcaptures:
        for wc in entity.webcaptures:
            wc._wayback_suffix = wayback_suffix(wc)
    for ref in entity.refs:
        # this is a UI hack to get rid of XML crud in unstructured refs like:
        # LOCKSS (2014) Available: <ext-link
        # xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri"
        # xlink:href="http://lockss.org/"
        # xlink:type="simple">http://lockss.org/</ext-link>. Accessed: 2014
        # November 1.
        if ref.extra and ref.extra.get('unstructured'):
            ref.extra['unstructured'] = strip_extlink_xml(ref.extra['unstructured'])
    # author list to display; ensure it's sorted by index (any othors with
    # index=None go to end of list)
    authors = [c for c in entity.contribs if c.role in ('author', None)]
    entity._authors = sorted(authors, key=lambda c: (c.index == None and 99999999) or c.index)
    if entity.abstracts:
        # hack to show plain text instead of latex abstracts
        if 'latex' in entity.abstracts[0].mimetype:
            entity.abstracts.reverse()
        # hack to (partially) clean up common JATS abstract display case
        if entity.abstracts[0].mimetype == 'application/xml+jats':
            for tag in ('p', 'jats', 'jats:p'):
                entity.abstracts[0].content = entity.abstracts[0].content.replace('<{}>'.format(tag), '')
                entity.abstracts[0].content = entity.abstracts[0].content.replace('</{}>'.format(tag), '')
                # ugh, double encoding happens
                entity.abstracts[0].content = entity.abstracts[0].content.replace('&lt;/{}&gt;'.format(tag), '')
                entity.abstracts[0].content = entity.abstracts[0].content.replace('&lt;{}&gt;'.format(tag), '')
    return entity
Beispiel #3
0
def enrich_webcapture_entity(entity):
    if entity.state in ('redirect', 'deleted'):
        return entity
    entity._wayback_suffix = wayback_suffix(entity)
    return entity
Beispiel #4
0
def enrich_release_entity(entity: ReleaseEntity) -> ReleaseEntity:
    if entity.state in ("redirect", "deleted"):
        return entity
    if entity.state == "active":
        entity._es = release_to_elasticsearch(entity, force_bool=False)
    if entity.container and entity.container.state == "active":
        entity.container._es = container_to_elasticsearch(entity.container,
                                                          force_bool=False)
    if entity.files:
        # remove shadows-only files with no URLs
        entity.files = [
            f for f in entity.files
            if not (f.extra and f.extra.get("shadows") and not f.urls)
        ]
    if entity.filesets:
        for fs in entity.filesets:
            fs._total_size = sum([f.size for f in fs.manifest])
    if entity.webcaptures:
        for wc in entity.webcaptures:
            wc._wayback_suffix = wayback_suffix(wc)
    for ref in entity.refs:
        # this is a UI hack to get rid of XML crud in unstructured refs like:
        # LOCKSS (2014) Available: <ext-link
        # xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri"
        # xlink:href="http://lockss.org/"
        # xlink:type="simple">http://lockss.org/</ext-link>. Accessed: 2014
        # November 1.
        if ref.extra and ref.extra.get("unstructured"):
            ref.extra["unstructured"] = strip_extlink_xml(
                ref.extra["unstructured"])
    # for backwards compatibility, copy extra['subtitle'] to subtitle
    if not entity.subtitle and entity.extra and entity.extra.get("subtitle"):
        if isinstance(entity.extra["subtitle"], str):
            entity.subtitle = entity.extra["subtitle"]
        elif isinstance(entity.extra["subtitle"], list):
            entity.subtitle = entity.extra["subtitle"][0] or None
    # author list to display; ensure it's sorted by index (any othors with
    # index=None go to end of list)
    authors = [
        c for c in entity.contribs if c.role in ("author", None) and (
            c.surname or c.raw_name or (c.creator and c.creator.surname))
    ]
    entity._authors = sorted(authors,
                             key=lambda c:
                             (c.index is None and 99999999) or c.index)
    # need authors, title for citeproc to work
    entity._can_citeproc = bool(entity._authors) and bool(entity.title)
    if entity.abstracts and entity.abstracts[0].mimetype:
        # hack to show plain text instead of latex abstracts
        if "latex" in entity.abstracts[0].mimetype:
            entity.abstracts.reverse()
        # hack to (partially) clean up common JATS abstract display case
        if entity.abstracts[0].mimetype == "application/xml+jats":
            for tag in ("p", "jats", "jats:p"):
                entity.abstracts[0].content = entity.abstracts[
                    0].content.replace("<{}>".format(tag), "")
                entity.abstracts[0].content = entity.abstracts[
                    0].content.replace("</{}>".format(tag), "")
                # ugh, double encoding happens
                entity.abstracts[0].content = entity.abstracts[
                    0].content.replace("&lt;/{}&gt;".format(tag), "")
                entity.abstracts[0].content = entity.abstracts[
                    0].content.replace("&lt;{}&gt;".format(tag), "")
    return entity
Beispiel #5
0
def enrich_webcapture_entity(entity: WebcaptureEntity) -> WebcaptureEntity:
    if entity.state in ("redirect", "deleted"):
        return entity
    entity._wayback_suffix = wayback_suffix(entity)
    return entity