Ejemplo n.º 1
0
            'research_country': record.find(str(etree.QName(terms_url, 'publisherCountry'))).text or 'Not provided',
            'identifier_info': {
                'identifier': record.find(str(etree.QName(elements_url, 'identifier'))).text or "Not provided",
                'identifier_report': record.find(str(etree.QName(elements_url, 'identifierReport'))).text or "Not provided",
                'identifier_contract': record.find(str(etree.QName(terms_url, 'identifierDOEcontract'))) or "Not provided",
                'identifier_citation': record.find(str(etree.QName(terms_url, 'identifier-citation'))) or "Not provided",
                'identifier_other': record.find(str(etree.QName(elements_url, 'identifierOther'))) or "Not provided"
            },
            'relation': record.find(str(etree.QName(elements_url, 'relation'))).text or "Not provided",
            'coverage': record.find(str(etree.QName(elements_url, 'coverage'))).text or "Not provided",
            'format': record.find(str(etree.QName(elements_url, 'format'))).text or "Not provided",
            'language': record.find(str(etree.QName(elements_url, 'language'))).text or "Not provided"
        },
        'meta': {},
        'id': {
            'service_id': record.find(str(etree.QName(elements_url, 'ostiId'))).text,
            'doi': record.find(str(etree.QName(elements_url, 'doi'))).text or 'Not provided',
            'url': record.find(str(etree.QName(terms_url, 'identifier-purl'))).text or "Not provided",
        },
        'source': NAME,
        'timestamp': str(timestamp),
        'date_created': record.find(str(etree.QName(elements_url, 'date'))).text,
        'description': record.find(str(etree.QName(elements_url, 'description'))).text or 'No description provided',
        'tags': tags or [],
    }
    return NormalizedDocument(normalized_dict)


if __name__ == '__main__':
    print(lint(consume, normalize))
Ejemplo n.º 2
0
                'published-in': {
                    'journal-ids': journal_ids,
                    'journal-title': journal_title,
                    'issn': issn,
                    'volume': volume,
                    'issue': issue
                },
                'author-affiliations': affliations,
                'publisher': publisher,
                'permissions': {
                    'copyright-statement': statement,
                    'copyright-year': copyright_year,
                    'copyright-holder': copyright_holder,
                    'license': license
                }
            },
            'description': description,
            'meta': {},
            'id': ids,
            'tags': tags,
            'source': NAME,
            'date_created': date_created,
            'timestamp': str(timestamp)
    }
    #print(json.dumps(normalized_dict, sort_keys=True, indent=4, separators=(',', ': ')))
    return NormalizedDocument(normalized_dict)
    

if __name__ == '__main__':
    logger.error(lint(consume, normalize))
Ejemplo n.º 3
0
def normalize(raw_doc, timestamp):
    doc = raw_doc.get('doc')

    normalized_dict = {
        'title': doc['title'],
        'contributors': [{
            'full_name': doc['author'],
            'email': ''
        }],
        'properties': {
            'abstract': doc['abstract']
        },
        'meta': {},
        'id': {
            'service_id': doc['id'],
            'doi': 'Not provided',
            'url': 'fake.stuff.org/{}'.format(doc['id'])
        },
        'source': NAME,
        'timestamp': str(timestamp),
        'date_created': str(timestamp),
        'description': doc['abstract'],
        'tags': ['Test1', 'Test2']
    }

    return NormalizedDocument(normalized_dict)


if __name__ == '__main__':
    print(lint(consume, normalize))
Ejemplo n.º 4
0
            'published-in': {
                'journal-ids': journal_ids,
                'journal-title': journal_title,
                'issn': issn,
                'volume': volume,
                'issue': issue
            },
            'author-affiliations': affliations,
            'publisher': publisher,
            'permissions': {
                'copyright-statement': statement,
                'copyright-year': copyright_year,
                'copyright-holder': copyright_holder,
                'license': license
            }
        },
        'description': description,
        'meta': {},
        'id': ids,
        'tags': tags,
        'source': NAME,
        'date_created': date_created,
        'timestamp': str(timestamp)
    }
    #print(json.dumps(normalized_dict, sort_keys=True, indent=4, separators=(',', ': ')))
    return NormalizedDocument(normalized_dict)


if __name__ == '__main__':
    logger.error(lint(consume, normalize))