def enrich_release_entity(entity): if entity.state in ('redirect', 'deleted'): return entity if entity.state == "active": entity._es = release_to_elasticsearch(entity, force_bool=False) if entity.container and entity.container.state == "active": entity.container._es = container_to_elasticsearch(entity.container, force_bool=False) if entity.filesets: for fs in entity.filesets: fs._total_size = sum([f.size for f in fs.manifest]) if entity.webcaptures: for wc in entity.webcaptures: wc._wayback_suffix = wayback_suffix(wc) for ref in entity.refs: # this is a UI hack to get rid of XML crud in unstructured refs like: # LOCKSS (2014) Available: <ext-link # xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" # xlink:href="http://lockss.org/" # xlink:type="simple">http://lockss.org/</ext-link>. Accessed: 2014 # November 1. if ref.extra and ref.extra.get('unstructured'): ref.extra['unstructured'] = strip_extlink_xml( ref.extra['unstructured']) # author list to display; ensure it's sorted by index (any othors with # index=None go to end of list) authors = [c for c in entity.contribs if c.role in ('author', None)] entity._authors = sorted(authors, key=lambda c: (c.index == None and 99999999) or c.index) # hack to show plain text instead of latex abstracts if entity.abstracts: if 'latex' in entity.abstracts[0].mimetype: entity.abstracts.reverse() return entity
def enrich_release_entity(entity): if entity.state in ('redirect', 'deleted'): return entity if entity.state == "active": entity._es = release_to_elasticsearch(entity, force_bool=False) if entity.container and entity.container.state == "active": entity.container._es = container_to_elasticsearch(entity.container, force_bool=False) if entity.files: # remove shadows-only files with no URLs entity.files = [f for f in entity.files if not (f.extra and f.extra.get('shadows') and not f.urls)] if entity.filesets: for fs in entity.filesets: fs._total_size = sum([f.size for f in fs.manifest]) if entity.webcaptures: for wc in entity.webcaptures: wc._wayback_suffix = wayback_suffix(wc) for ref in entity.refs: # this is a UI hack to get rid of XML crud in unstructured refs like: # LOCKSS (2014) Available: <ext-link # xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" # xlink:href="http://lockss.org/" # xlink:type="simple">http://lockss.org/</ext-link>. Accessed: 2014 # November 1. if ref.extra and ref.extra.get('unstructured'): ref.extra['unstructured'] = strip_extlink_xml(ref.extra['unstructured']) # author list to display; ensure it's sorted by index (any othors with # index=None go to end of list) authors = [c for c in entity.contribs if c.role in ('author', None)] entity._authors = sorted(authors, key=lambda c: (c.index == None and 99999999) or c.index) if entity.abstracts: # hack to show plain text instead of latex abstracts if 'latex' in entity.abstracts[0].mimetype: entity.abstracts.reverse() # hack to (partially) clean up common JATS abstract display case if entity.abstracts[0].mimetype == 'application/xml+jats': for tag in ('p', 'jats', 'jats:p'): entity.abstracts[0].content = entity.abstracts[0].content.replace('<{}>'.format(tag), '') entity.abstracts[0].content = entity.abstracts[0].content.replace('</{}>'.format(tag), '') # ugh, double encoding happens entity.abstracts[0].content = entity.abstracts[0].content.replace('</{}>'.format(tag), '') entity.abstracts[0].content = entity.abstracts[0].content.replace('<{}>'.format(tag), '') return entity
def enrich_webcapture_entity(entity): if entity.state in ('redirect', 'deleted'): return entity entity._wayback_suffix = wayback_suffix(entity) return entity
def enrich_release_entity(entity: ReleaseEntity) -> ReleaseEntity: if entity.state in ("redirect", "deleted"): return entity if entity.state == "active": entity._es = release_to_elasticsearch(entity, force_bool=False) if entity.container and entity.container.state == "active": entity.container._es = container_to_elasticsearch(entity.container, force_bool=False) if entity.files: # remove shadows-only files with no URLs entity.files = [ f for f in entity.files if not (f.extra and f.extra.get("shadows") and not f.urls) ] if entity.filesets: for fs in entity.filesets: fs._total_size = sum([f.size for f in fs.manifest]) if entity.webcaptures: for wc in entity.webcaptures: wc._wayback_suffix = wayback_suffix(wc) for ref in entity.refs: # this is a UI hack to get rid of XML crud in unstructured refs like: # LOCKSS (2014) Available: <ext-link # xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" # xlink:href="http://lockss.org/" # xlink:type="simple">http://lockss.org/</ext-link>. Accessed: 2014 # November 1. if ref.extra and ref.extra.get("unstructured"): ref.extra["unstructured"] = strip_extlink_xml( ref.extra["unstructured"]) # for backwards compatibility, copy extra['subtitle'] to subtitle if not entity.subtitle and entity.extra and entity.extra.get("subtitle"): if isinstance(entity.extra["subtitle"], str): entity.subtitle = entity.extra["subtitle"] elif isinstance(entity.extra["subtitle"], list): entity.subtitle = entity.extra["subtitle"][0] or None # author list to display; ensure it's sorted by index (any othors with # index=None go to end of list) authors = [ c for c in entity.contribs if c.role in ("author", None) and ( c.surname or c.raw_name or (c.creator and c.creator.surname)) ] entity._authors = sorted(authors, key=lambda c: (c.index is None and 99999999) or c.index) # need authors, title for citeproc to work entity._can_citeproc = bool(entity._authors) and bool(entity.title) if entity.abstracts and entity.abstracts[0].mimetype: # hack to show plain text instead of latex abstracts if "latex" in entity.abstracts[0].mimetype: entity.abstracts.reverse() # hack to (partially) clean up common JATS abstract display case if entity.abstracts[0].mimetype == "application/xml+jats": for tag in ("p", "jats", "jats:p"): entity.abstracts[0].content = entity.abstracts[ 0].content.replace("<{}>".format(tag), "") entity.abstracts[0].content = entity.abstracts[ 0].content.replace("</{}>".format(tag), "") # ugh, double encoding happens entity.abstracts[0].content = entity.abstracts[ 0].content.replace("</{}>".format(tag), "") entity.abstracts[0].content = entity.abstracts[ 0].content.replace("<{}>".format(tag), "") return entity
def enrich_webcapture_entity(entity: WebcaptureEntity) -> WebcaptureEntity: if entity.state in ("redirect", "deleted"): return entity entity._wayback_suffix = wayback_suffix(entity) return entity