def to_normalized(self, doc): # make the new dict actually contain real items normed = {} do_not_include = [ 'docID', 'doc', 'filetype', 'timestamps', 'source', 'versions', 'key' ] for key, value in dict(doc).items(): if value and key not in do_not_include: try: normed[key] = json.loads(value) except (ValueError, TypeError): normed[key] = value if normed.get('versions'): normed['versions'] = list(map(str, normed['versions'])) # No datetime means the document wasn't normalized (probably wasn't on the approved list) # TODO - fix odd circular import that makes us import this here from scrapi.base.helpers import datetime_formatter if normed.get('providerUpdatedDateTime'): normed['providerUpdatedDateTime'] = datetime_formatter( normed['providerUpdatedDateTime'].isoformat()) else: return None return NormalizedDocument(normed, validate=False, clean=False)
def to_normalized(self, doc): # make the new dict actually contain real items normed = {} do_not_include = ['docID', 'doc', 'filetype', 'timestamps', 'source'] for key, value in dict(doc).items(): if value and key not in do_not_include: try: normed[key] = json.loads(value) except (ValueError, TypeError): normed[key] = value if normed.get('versions'): normed['versions'] = list(map(str, normed['versions'])) # No datetime means the document wasn't normalized (probably wasn't on the approved list) # TODO - fix odd circular import that makes us import this here from scrapi.base.helpers import datetime_formatter if normed.get('providerUpdatedDateTime'): normed['providerUpdatedDateTime'] = datetime_formatter(normed['providerUpdatedDateTime'].isoformat()) else: return None return NormalizedDocument(normed, validate=False, clean=False)
def schema(self): return { 'title': ('/title', lambda x: x[0] if x else ''), 'description': ('/subtitle', lambda x: x[0] if (isinstance(x, list) and x) else x or ''), 'providerUpdatedDateTime': ('/issued/date-parts', lambda x: datetime_formatter(' '.join([part for part in x[0]]))), 'uris': { 'canonicalUri': '/URL' }, 'contributors': ('/author', lambda x: [ process_contributor(*[ '{} {}'.format(entry.get('given'), entry.get('family')), entry.get('ORCID') ]) for entry in x ]), 'otherProperties': build_properties(('referenceCount', '/reference-count'), ('updatePolicy', '/update-policy'), ('depositedTimestamp', '/deposited/timestamp'), ('Empty', '/trash/not-here'), ('Empty2', '/')) }
def schema(self): return { 'title': ('/title', lambda x: x[0] if x else ''), 'description': ('/subtitle', lambda x: x[0] if (isinstance(x, list) and x) else x or ''), 'providerUpdatedDateTime': ('/issued/date-parts', lambda x: datetime_formatter(' '.join( [part for part in x[0]]) )), 'uris': { 'canonicalUri': '/URL' }, 'contributors': ('/author', lambda x: [ process_contributor(*[ '{} {}'.format(entry.get('given'), entry.get('family')), entry.get('ORCID') ]) for entry in x ]), 'otherProperties': build_properties( ('referenceCount', '/reference-count'), ('updatePolicy', '/update-policy'), ('depositedTimestamp', '/deposited/timestamp'), ('Empty', '/trash/not-here'), ('Empty2', '/') ) }