'research_country': record.find(str(etree.QName(terms_url, 'publisherCountry'))).text or 'Not provided', 'identifier_info': { 'identifier': record.find(str(etree.QName(elements_url, 'identifier'))).text or "Not provided", 'identifier_report': record.find(str(etree.QName(elements_url, 'identifierReport'))).text or "Not provided", 'identifier_contract': record.find(str(etree.QName(terms_url, 'identifierDOEcontract'))) or "Not provided", 'identifier_citation': record.find(str(etree.QName(terms_url, 'identifier-citation'))) or "Not provided", 'identifier_other': record.find(str(etree.QName(elements_url, 'identifierOther'))) or "Not provided" }, 'relation': record.find(str(etree.QName(elements_url, 'relation'))).text or "Not provided", 'coverage': record.find(str(etree.QName(elements_url, 'coverage'))).text or "Not provided", 'format': record.find(str(etree.QName(elements_url, 'format'))).text or "Not provided", 'language': record.find(str(etree.QName(elements_url, 'language'))).text or "Not provided" }, 'meta': {}, 'id': { 'service_id': record.find(str(etree.QName(elements_url, 'ostiId'))).text, 'doi': record.find(str(etree.QName(elements_url, 'doi'))).text or 'Not provided', 'url': record.find(str(etree.QName(terms_url, 'identifier-purl'))).text or "Not provided", }, 'source': NAME, 'timestamp': str(timestamp), 'date_created': record.find(str(etree.QName(elements_url, 'date'))).text, 'description': record.find(str(etree.QName(elements_url, 'description'))).text or 'No description provided', 'tags': tags or [], } return NormalizedDocument(normalized_dict) if __name__ == '__main__': print(lint(consume, normalize))
'published-in': { 'journal-ids': journal_ids, 'journal-title': journal_title, 'issn': issn, 'volume': volume, 'issue': issue }, 'author-affiliations': affliations, 'publisher': publisher, 'permissions': { 'copyright-statement': statement, 'copyright-year': copyright_year, 'copyright-holder': copyright_holder, 'license': license } }, 'description': description, 'meta': {}, 'id': ids, 'tags': tags, 'source': NAME, 'date_created': date_created, 'timestamp': str(timestamp) } #print(json.dumps(normalized_dict, sort_keys=True, indent=4, separators=(',', ': '))) return NormalizedDocument(normalized_dict) if __name__ == '__main__': logger.error(lint(consume, normalize))
def normalize(raw_doc, timestamp): doc = raw_doc.get('doc') normalized_dict = { 'title': doc['title'], 'contributors': [{ 'full_name': doc['author'], 'email': '' }], 'properties': { 'abstract': doc['abstract'] }, 'meta': {}, 'id': { 'service_id': doc['id'], 'doi': 'Not provided', 'url': 'fake.stuff.org/{}'.format(doc['id']) }, 'source': NAME, 'timestamp': str(timestamp), 'date_created': str(timestamp), 'description': doc['abstract'], 'tags': ['Test1', 'Test2'] } return NormalizedDocument(normalized_dict) if __name__ == '__main__': print(lint(consume, normalize))