def test_process_legal(self): raw_doc = RawDocument({ 'doc': json.dumps({'Hello': 'world'}), 'source': 'TEST', 'doc_id': 37, 'filetype': 'json' }) ts = str(process_docs.process_raw(raw_doc, 'test-version')) timestamp = None for dirname, dirnames, filenames in os.walk('archive/TEST/{0}'.format(raw_doc.get('doc_id'))): if os.path.isfile(dirname + '/raw.json'): timestamp = dirname.split('/')[-1] assert timestamp == ts doc = NormalizedDocument({ 'title': "TEST PROJECT", 'contributors': [ { 'full_name': 'Me, Myself', 'email': '*****@*****.**' }, { 'full_name': 'And I', 'email': '*****@*****.**' } ], 'properties': { }, 'meta': {}, 'id': { 'service_id': raw_doc.get('doc_id'), 'doi': 'Not available', 'url': 'fake.stuff.org/{}'.format(raw_doc.get('doc_id')), }, 'source': raw_doc.get('source'), 'timestamp': str(timestamp), 'tags': ['1', '2', '3'], 'date_created': str(timestamp), 'description': 'science stuff', }) assert process_docs.process(doc, timestamp) found = False for dirname, dirnames, filenames in os.walk('archive/TEST/{0}'.format(raw_doc.get('doc_id'))): if os.path.isfile(dirname + '/normalized.json'): found = True assert found
def test_process_raw(self): raw_file = RawDocument({ 'doc': json.dumps({'Hello': 'world'}), 'source': "TEST", 'doc_id': 37, 'filetype': "json" }) assert process_docs.process_raw(raw_file, 'test-version') found = False for dirname, dirnames, filenames in os.walk('archive/TEST/{0}'.format(raw_file.get('doc_id'))): if os.path.isfile(dirname + '/raw.json'): found = True assert found