Beispiel #1
0
def test_blob_locations(client, fakedata, taskmanager):
    root_directory = fakedata.init()
    dir1 = fakedata.directory(root_directory, 'dir1')
    dir2 = fakedata.directory(root_directory, 'dir2')
    blob = fakedata.blob(b'hello world')
    fakedata.file(dir1, 'foo', blob)
    fakedata.file(dir2, 'bar', blob)

    taskmanager.run()

    def directory_id(directory):
        return f'_directory_{directory.pk}'

    api = CollectionApiClient(client)
    resp = api.get_locations(blob.pk)
    assert resp['locations'] == [
        {
            'filename': 'foo',
            'parent_id': directory_id(dir1),
            'parent_path': '/dir1',
        },
        {
            'filename': 'bar',
            'parent_id': directory_id(dir2),
            'parent_path': '/dir2',
        },
    ]
Beispiel #2
0
def test_gpg_digest(gpg_blob, configure_gpg, client, fakedata, taskmanager):
    collection = fakedata.collection()
    fakedata.file(collection.root_directory, 'email', gpg_blob)

    taskmanager.run()

    api = CollectionApiClient(collection, client)
    digest = api.get_digest(gpg_blob.pk)['content']
    assert digest['pgp']
Beispiel #3
0
def test_gpg_digest(gpg_blob, client, fakedata, taskmanager):
    root = fakedata.init()
    fakedata.file(root, 'email', gpg_blob)

    taskmanager.run()

    api = CollectionApiClient(client)
    digest = api.get_digest(gpg_blob.pk)['content']
    assert digest['pgp']
Beispiel #4
0
def test_digest_image_exif(client, fakedata, taskmanager):
    collection = fakedata.collection()
    with (TESTDATA / PATH_IMAGE).open('rb') as f:
        blob = fakedata.blob(f.read())
    fakedata.file(collection.root_directory, 'bikes.jpg', blob)

    taskmanager.run()

    api = CollectionApiClient(collection, client)
    digest = api.get_digest(blob.pk)['content']

    assert digest['date-created'] == '2006-02-11T11:06:37Z'
    assert digest['location'] == '33.87546081542969, -116.3016196017795'
Beispiel #5
0
def test_tika_digested(fakedata, taskmanager, client):
    root = fakedata.init()
    legea_pdf = TESTDATA / './no-extension/file_doc'
    with legea_pdf.open('rb') as f:
        blob = fakedata.blob(f.read())
    fakedata.file(root, 'file.doc', blob)

    taskmanager.run()

    api = CollectionApiClient(client)
    digest = api.get_digest(blob.pk)['content']

    assert "Colors and Lines to choose" in digest['text']
    assert digest['date'] == '2016-01-13T11:05:00Z'
    assert digest['date-created'] == '2016-01-13T11:00:00Z'
Beispiel #6
0
def test_digest_with_broken_dependency(fakedata, taskmanager, client):
    root_directory = fakedata.init()
    mof1_1992_233 = TESTDATA / 'disk-files/broken.pdf'
    with mof1_1992_233.open('rb') as f:
        blob = fakedata.blob(f.read())
    assert blob.mime_type == 'application/pdf'
    fakedata.file(root_directory, 'broken.pdf', blob)

    taskmanager.run()

    api = CollectionApiClient(client)
    digest = api.get_digest(blob.pk)['content']

    assert digest['md5'] == 'f6e0d13c5c3aaab75b4febced3e72ae0'
    assert digest['size'] == 1000
    assert digest['text'] is None
    assert digest['broken'] == ['tika_http_422']
Beispiel #7
0
def test_digest_msg(fakedata, taskmanager, client):
    root_directory = fakedata.init()
    msg = TESTDATA / 'msg-5-outlook/DISEARĂ-Te-așteptăm-la-discuția-despre-finanțarea-culturii.msg'
    with msg.open('rb') as f:
        blob = fakedata.blob(f.read())
    msg_file = fakedata.file(root_directory, 'the.msg', blob)

    taskmanager.run()

    msg_file.refresh_from_db()
    api = CollectionApiClient(client)
    digest = api.get_digest(msg_file.blob.pk)['content']

    assert digest['content-type'] == 'application/vnd.ms-outlook'
    assert digest['filename'] == 'the.msg'
    assert digest['filetype'] == 'email'
    assert digest['md5'] == '38385c4487719fa9dd0fb695d3aad0ee'
    assert digest['sha1'] == '90548132e18bfc3088e81918bbcaf887a68c6acc'
    assert digest['size'] == 19968
Beispiel #8
0
def test_pdf_ocr(fakedata, taskmanager, client):
    ocr1_path = TESTDATA.parent / 'ocr/one'
    ocr.create_ocr_source('ocr1', ocr1_path)

    collection = fakedata.collection()
    mof1_1992_233 = TESTDATA / 'disk-files/pdf-for-ocr/mof1_1992_233.pdf'
    with mof1_1992_233.open('rb') as f:
        blob = fakedata.blob(f.read())
    fakedata.file(collection.root_directory, 'mof1_1992_233.pdf', blob)

    taskmanager.run()

    api = CollectionApiClient(collection, client)
    digest = api.get_digest(blob.pk)['content']
    assert "Hotărlre privind stabilirea cantităţii de gaze" in digest['text']

    ocr_pdf = ocr1_path / 'foo/bar/f/d/fd41b8f1fe19c151517b3cda2a615fa8.pdf'
    with ocr_pdf.open('rb') as f:
        ocr_pdf_data = f.read()

    resp = client.get(f'/collections/testdata/{blob.pk}/ocr/ocr1/')
    assert b''.join(resp.streaming_content) == ocr_pdf_data
    assert resp['Content-Type'] == 'application/pdf'