Beispiel #1
0
def test_upload_one(local_files, azure):
    with azure_teardown(azure):
        bigfile, littlefile, emptyfile, a, b, c = local_files

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure,
                                   transfer=put_chunk,
                                   unique_temporary=False)

        # single chunk
        up = ADLUploader(azure,
                         test_dir / 'littlefile',
                         littlefile,
                         nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / 'littlefile')['length'] == 10

        # multiple chunks, one thread
        size = 10000
        up = ADLUploader(azure,
                         test_dir / 'bigfile',
                         bigfile,
                         nthreads=1,
                         chunksize=size // 5,
                         client=client,
                         run=False,
                         overwrite=True)
        up.run()

        assert azure.info(test_dir / 'bigfile')['length'] == size

        azure.rm(test_dir / 'bigfile')
Beispiel #2
0
def test_set_expiry(azure):
    with azure_teardown(azure):
        # this future time gives the milliseconds since the epoch that have occured as of 01/31/2030 at noon
        epoch_time = datetime.datetime.utcfromtimestamp(0)
        final_time = datetime.datetime(2030, 1, 31, 12)
        time_in_milliseconds = (final_time - epoch_time).total_seconds() * 1000

        # create the file
        azure.touch(a)

        # first get the existing expiry, which should be never
        initial_expiry = azure.info(a,
                                    invalidate_cache=True)['msExpirationTime']
        azure.set_expiry(a, 'Absolute', time_in_milliseconds)
        cur_expiry = azure.info(a, invalidate_cache=True)['msExpirationTime']
        # this is a range of +- 100ms because the service does a best effort to set it precisely, but there is
        # no guarantee that the expiry will be to the exact millisecond
        assert time_in_milliseconds - 100 <= cur_expiry <= time_in_milliseconds + 100
        assert initial_expiry != cur_expiry

        # now set it back to never expire and validate it is the same
        azure.set_expiry(a, 'NeverExpire')
        cur_expiry = azure.info(a)['msExpirationTime']
        assert initial_expiry == cur_expiry

        # now validate the fail cases
        # bad enum
        with pytest.raises(ValueError):
            azure.set_expiry(a, 'BadEnumValue')

        # missing time
        with pytest.raises(ValueError):
            azure.set_expiry(a, 'Absolute')
Beispiel #3
0
def test_set_expiry(azure):
    with azure_teardown(azure):
        azure.touch(a)

        # first get the existing expiry, which should be never
        initial_expiry = azure.info(a)['msExpirationTime']

        # this future time gives the milliseconds since the epoch that have occured as of 01/31/2030 at noon
        epoch_time = datetime.datetime.utcfromtimestamp(0)
        final_time = datetime.datetime(2030, 1, 31, 12)
        time_in_milliseconds = (final_time - epoch_time).total_seconds() * 1000
        azure.set_expiry(a, 'Absolute', time_in_milliseconds)

        cur_expiry = azure.info(a)['msExpirationTime']
        assert time_in_milliseconds == cur_expiry
        assert initial_expiry != cur_expiry

        # now set it back to never expire and validate it is the same
        azure.set_expiry(a, 'NeverExpire')
        cur_expiry = azure.info(a)['msExpirationTime']
        assert initial_expiry == cur_expiry

        # now validate the fail cases
        # bad enum
        with pytest.raises(ValueError):
            azure.set_expiry(a, 'BadEnumValue')

        # missing time
        with pytest.raises(ValueError):
            azure.set_expiry(a, 'Absolute')
Beispiel #4
0
def test_chmod(azure):
    with azure_teardown(azure):
        azure.touch(a)

        assert azure.info(a)['permission'] == '770'

        azure.chmod(a, '0555')
        assert azure.info(a)['permission'] == '555'

        with pytest.raises((OSError, IOError)):
            with azure.open(a, 'ab') as f:
                try:
                    f.write(b'data')
                except Exception as e:
                    print(e)
                    raise e

        azure.chmod(a, '0770')
        azure.rm(a)

        azure.mkdir(test_dir / 'deep')
        azure.touch(test_dir / 'deep' / 'file')
        azure.chmod(test_dir / 'deep', '660')

        with pytest.raises((OSError, IOError)):
            azure.ls(test_dir / 'deep')

        azure.chmod(test_dir / 'deep', '770')
Beispiel #5
0
def test_info(azure):
    with azure_teardown(azure):
        with azure.open(a, 'wb') as f:
            f.write(b'a' * 5)

        info = azure.info(a, invalidate_cache=False)
        assert info['length'] == 5
        assert info['name'] == a
        assert info['type'] == 'FILE'

        assert azure.info(test_dir, invalidate_cache=True)['type'] == 'DIRECTORY'
Beispiel #6
0
def test_info(azure):
    with azure_teardown(azure):
        with azure.open(a, 'wb') as f:
            f.write(b'a' * 5)

        info = azure.info(a)
        assert info['length'] == 5
        assert info['name'] == a
        assert info['type'] == 'FILE'

        assert azure.info(test_dir)['type'] == 'DIRECTORY'
Beispiel #7
0
def test_info_invalidate_cache(azure, second_azure):
    with azure_teardown(azure):
        # construct initial cache and ensure the file doesn't already exist
        assert not azure.exists(a, invalidate_cache=False)
        assert not second_azure.exists(a, invalidate_cache=False)

        with azure.open(a, 'wb') as f:
            f.write(b'a' * 5)

        # verify that it works in the fs that did the write and not on the other
        info = azure.info(a, invalidate_cache=False)
        with pytest.raises(FileNotFoundError):
            second_azure.info(a, invalidate_cache=False)

        # then invalidate
        second_info = second_azure.info(a, invalidate_cache=True)
        assert info['length'] == 5
        assert info['name'] == a
        assert info['type'] == 'FILE'

        assert info['length'] == second_info['length']
        assert info['name'] == second_info['name']
        assert info['type'] == second_info['type']

        # assure that the cache was properly repopulated on the info call
        assert second_azure.info(test_dir,
                                 invalidate_cache=False)['type'] == 'DIRECTORY'
Beispiel #8
0
def test_file_creation_open(azure):
    with azure_teardown(azure):
        if azure.exists(a):
            azure.rm(a)
        assert not azure.exists(a)
        f = azure.open(a, "wb")
        assert azure.exists(a)
        f.close()
        assert azure.info(a)['length'] == 0
Beispiel #9
0
def test_upload_many(local_files, azure):
    with azure_teardown(azure):
        bigfile, littlefile, emptyfile, a, b, c = local_files
        root = os.path.dirname(bigfile)

        # single thread
        up = ADLUploader(azure, test_dir, root, nthreads=1, overwrite=True)
        assert azure.info(test_dir / 'littlefile')['length'] == 10
        assert azure.cat(test_dir / 'nested1/nested2/a') == b'0123456789'
        assert len(azure.du(test_dir, deep=True)) == 6
        assert azure.du(test_dir, deep=True, total=True) == 10000 + 40
Beispiel #10
0
def test_chown(azure):
    with azure_teardown(azure):
        azure.touch(a)
        # fake IDs
        user_id = '470c0ccf-c91a-4597-98cd-48507d2f1486'
        group_id = '6b190b7a-0acf-43c8-ab14-965f5aea6243'

        # Account doesn't have permission to change owner
        owner = azure.info(a)['owner']
        with pytest.raises(PermissionError):
            azure.chown(a, owner=user_id)

        assert owner == azure.info(a)['owner']

        # Account doesn't have permission to change group
        group = azure.info(a)['group']
        with pytest.raises(PermissionError):
            azure.chown(a, group=group_id)

        assert group == azure.info(a)['group']

        # Account doesn't have permission to change owner/group
        with pytest.raises(PermissionError):
            azure.chown(a, owner=owner, group=group)

        assert owner == azure.info(a)['owner']
        assert group == azure.info(a)['group']
def test_upload_one_empty_file(local_files, azure):
    with azure_teardown(azure):
        bigfile, littlefile, emptyfile, a, b, c = local_files

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure, transfer=put_chunk,
                                   unique_temporary=False)

        # single chunk, empty file
        up = ADLUploader(azure, test_dir / 'emptyfile', emptyfile, nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / 'emptyfile')['length'] == 0
        azure.rm(test_dir / 'emptyfile')
def test_upload_single_file_in_dir(tempdir, azure):
    with azure_teardown(azure):
        lpath_dir = tempdir
        lfilename = os.path.join(lpath_dir, 'singlefile')
        with open(lfilename, 'wb') as f:
            f.write(b'0123456789')

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure, transfer=put_chunk,
                                   unique_temporary=False)

        up = ADLUploader(azure, test_dir / 'singlefiledir', lpath_dir, nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / 'singlefiledir' / 'singlefile')['length'] == 10
        azure.rm(test_dir / 'singlefiledir' / 'singlefile')
Beispiel #13
0
def test_upload_empty_folder(tempdir, azure):
    with azure_teardown(azure):
        os.mkdir(os.path.join(tempdir, "dir1"))
        os.mkdir(os.path.join(tempdir, "dir1", "b"))

        with open(os.path.join(tempdir, "dir1", "file.txt"), 'wb') as f:
            f.write(b'0123456789')

        # transfer client w/ deterministic temporary directory
        from azure.datalake.store.multithread import put_chunk
        client = ADLTransferClient(azure, transfer=put_chunk,
                                   unique_temporary=False)

        # single chunk, empty file
        up = ADLUploader(azure, test_dir / "dir1", os.path.join(tempdir, "dir1") , nthreads=1,
                         overwrite=True)
        assert azure.info(test_dir / "dir1" /"b")['type'] == 'DIRECTORY'
        azure.rm(test_dir / "dir1", recursive=True)