Beispiel #1
0
    def test_process_legal(self):
        raw_doc = RawDocument({
            'doc': json.dumps({'Hello': 'world'}),
            'source': 'TEST',
            'doc_id': 37,
            'filetype': 'json'
        })
        ts = str(process_docs.process_raw(raw_doc, 'test-version'))
        timestamp = None
        for dirname, dirnames, filenames in os.walk('archive/TEST/{0}'.format(raw_doc.get('doc_id'))):
            if os.path.isfile(dirname + '/raw.json'):
                timestamp = dirname.split('/')[-1]
        assert timestamp == ts

        doc = NormalizedDocument({
            'title': "TEST PROJECT",
            'contributors': [
                {
                    'full_name': 'Me, Myself',
                    'email': '*****@*****.**'
                },
                {
                    'full_name': 'And I',
                    'email': '*****@*****.**'
                }
            ],
            'properties': {
            },
            'meta': {},
            'id': {
                'service_id': raw_doc.get('doc_id'),
                'doi': 'Not available',
                'url': 'fake.stuff.org/{}'.format(raw_doc.get('doc_id')),
            },
            'source': raw_doc.get('source'),
            'timestamp': str(timestamp),
            'tags': ['1', '2', '3'],
            'date_created': str(timestamp),
            'description': 'science stuff',
        })

        assert process_docs.process(doc, timestamp)

        found = False
        for dirname, dirnames, filenames in os.walk('archive/TEST/{0}'.format(raw_doc.get('doc_id'))):
            if os.path.isfile(dirname + '/normalized.json'):
                found = True
        assert found
Beispiel #2
0
    def test_process_raw(self):
        raw_file = RawDocument({
            'doc': json.dumps({'Hello':  'world'}),
            'source': "TEST",
            'doc_id': 37,
            'filetype': "json"
        })

        assert process_docs.process_raw(raw_file, 'test-version')

        found = False
        for dirname, dirnames, filenames in os.walk('archive/TEST/{0}'.format(raw_file.get('doc_id'))):
            if os.path.isfile(dirname + '/raw.json'):
                found = True
        assert found