Exemple #1
0
def test_current(token_restore):
    from google.oauth2.credentials import Credentials

    with gcs_maker() as gcs:
        assert GCSFileSystem.current() is gcs
        gcs2 = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN)
        assert gcs2.session is gcs.session
        gcs2 = GCSFileSystem(TEST_PROJECT,
                             token=GOOGLE_TOKEN,
                             secure_serialize=False)
        assert isinstance(gcs2.token, Credentials)
Exemple #2
0
def test_array(token_restore):
    with gcs_maker() as gcs:
        from array import array
        data = array('B', [65] * 1000)

        with gcs.open(a, 'wb') as f:
            f.write(data)

        with gcs.open(a, 'rb') as f:
            out = f.read()
            assert out == b'A' * 1000
Exemple #3
0
def test_rm(token_restore):
    with gcs_maker() as gcs:
        assert not gcs.exists(a)
        gcs.touch(a)
        assert gcs.exists(a)
        gcs.rm(a)
        assert not gcs.exists(a)
        with pytest.raises((OSError, IOError)):
            gcs.rm(TEST_BUCKET + '/nonexistent')
        with pytest.raises((OSError, IOError)):
            gcs.rm('nonexistent')
def test_bigger_than_block_read():
    with gcs_maker(True) as gcs:
        with gcs.open(TEST_BUCKET + "/2014-01-01.csv", "rb",
                      block_size=3) as f:
            out = []
            while True:
                data = f.read(20)
                out.append(data)
                if len(data) == 0:
                    break
        assert b"".join(out) == csv_files["2014-01-01.csv"]
Exemple #5
0
def test_read_keys_from_bucket():
    with gcs_maker(True) as gcs:
        for k, data in files.items():
            file_contents = gcs.cat("/".join([TEST_BUCKET, k]))
            assert file_contents == data

        assert all(
            gcs.cat("/".join([TEST_BUCKET, k]))
            == gcs.cat("gcs://" + "/".join([TEST_BUCKET, k]))
            for k in files
        )
Exemple #6
0
def test_readline(token_restore):
    with gcs_maker(True) as gcs:
        all_items = chain.from_iterable([
            files.items(), csv_files.items(), text_files.items()
        ])
        for k, data in all_items:
            with gcs.open('/'.join([TEST_BUCKET, k]), 'rb') as f:
                result = f.readline()
                expected = data.split(b'\n')[0] + (b'\n' if data.count(b'\n')
                                                   else b'')
            assert result == expected
Exemple #7
0
def test_map_pickle():
    with gcs_maker() as gcs:
        d = gcs.get_mapper(root)
        d["x"] = b"1"
        assert d["x"] == b"1"

        import pickle

        d2 = pickle.loads(pickle.dumps(d))

        assert d2["x"] == b"1"
Exemple #8
0
def test_bigger_than_block_read(token_restore):
    with gcs_maker(True) as gcs:
        with gcs.open(TEST_BUCKET + '/2014-01-01.csv', 'rb',
                      block_size=3) as f:
            out = []
            while True:
                data = f.read(20)
                out.append(data)
                if len(data) == 0:
                    break
        assert b''.join(out) == csv_files['2014-01-01.csv']
Exemple #9
0
def test_rm():
    with gcs_maker() as gcs:
        assert not gcs.exists(a)
        gcs.touch(a)
        assert gcs.exists(a)
        gcs.rm(a)
        assert not gcs.exists(a)
        with pytest.raises((OSError, IOError)):
            gcs.rm(TEST_BUCKET + "/nonexistent")
        with pytest.raises((OSError, IOError)):
            gcs.rm("nonexistent")
Exemple #10
0
def test_gcs_glob(token_restore):
    with gcs_maker(True) as gcs:
        fn = TEST_BUCKET+'/nested/file1'
        assert fn not in gcs.glob(TEST_BUCKET+'/')
        assert fn not in gcs.glob(TEST_BUCKET+'/*')
        assert fn in gcs.glob(TEST_BUCKET+'/nested/')
        assert fn in gcs.glob(TEST_BUCKET+'/nested/*')
        assert fn in gcs.glob(TEST_BUCKET+'/nested/file*')
        assert fn in gcs.glob(TEST_BUCKET+'/*/*')
        assert fn in gcs.glob(TEST_BUCKET+'/**')
        assert all(f in gcs.find(TEST_BUCKET) for f in
                   gcs.glob(TEST_BUCKET+'/nested/*') if gcs.isfile(f))
Exemple #11
0
def test_array():
    with gcs_maker() as gcs:
        from array import array

        data = array("B", [65] * 1000)

        with gcs.open(a, "wb") as f:
            f.write(data)

        with gcs.open(a, "rb") as f:
            out = f.read()
            assert out == b"A" * 1000
Exemple #12
0
def test_readline():
    with gcs_maker(True) as gcs:
        all_items = chain.from_iterable(
            [files.items(),
             csv_files.items(),
             text_files.items()])
        for k, data in all_items:
            with gcs.open("/".join([TEST_BUCKET, k]), "rb") as f:
                result = f.readline()
                expected = data.split(b"\n")[0] + (b"\n" if data.count(b"\n")
                                                   else b"")
            assert result == expected
Exemple #13
0
def test_file_info():
    with gcs_maker() as gcs:
        fn = TEST_BUCKET + "/nested/file1"
        data = b"hello\n"
        with gcs.open(fn, "wb") as f:
            f.write(data)
        assert fn in gcs.find(TEST_BUCKET)
        assert gcs.exists(fn)
        assert not gcs.exists(fn + "another")
        assert gcs.info(fn)["size"] == len(data)
        with pytest.raises((OSError, IOError)):
            gcs.info(fn + "another")
Exemple #14
0
def test_get_put(consistency):
    if consistency == "crc32c" and gcsfs.checkers.crcmod is None:
        pytest.skip("No CRC")
    with gcs_maker(True) as gcs:
        gcs.consistency = consistency
        with tmpfile() as fn:
            gcs.get(TEST_BUCKET + "/test/accounts.1.json", fn)
            data = files["test/accounts.1.json"]
            assert open(fn, "rb").read() == data
            gcs.put(fn, TEST_BUCKET + "/temp")
            assert gcs.du(TEST_BUCKET + "/temp") == len(data)
            assert gcs.cat(TEST_BUCKET + "/temp") == data
Exemple #15
0
def test_ls_touch(token_restore):
    with gcs_maker() as gcs:
        assert not gcs.exists(TEST_BUCKET + '/tmp/test')

        gcs.touch(a)
        gcs.touch(b)

        L = gcs.ls(TEST_BUCKET + '/tmp/test', False)
        assert set(L) == set([a, b])

        L_d = gcs.ls(TEST_BUCKET + '/tmp/test', True)
        assert set(d['path'] for d in L_d) == set([a, b])
Exemple #16
0
def test_map_pickle(token_restore):
    import pickle
    with gcs_maker() as gcs:
        d = gcs.get_mapper(root)
        d['x'] = b'1234567890'

        b = pickle.dumps(d)
        assert b'1234567890' not in b

        e = pickle.loads(b)

        assert dict(e) == {'x': b'1234567890'}
Exemple #17
0
def test_file_info(token_restore):
    with gcs_maker() as gcs:
        fn = TEST_BUCKET + '/nested/file1'
        data = b'hello\n'
        with gcs.open(fn, 'wb') as f:
            f.write(data)
        assert fn in gcs.walk(TEST_BUCKET)
        assert gcs.exists(fn)
        assert not gcs.exists(fn + 'another')
        assert gcs.info(fn)['size'] == len(data)
        with pytest.raises((OSError, IOError)):
            gcs.info(fn + 'another')
Exemple #18
0
def test_ls_touch():
    with gcs_maker() as gcs:
        assert not gcs.exists(TEST_BUCKET + "/tmp/test")

        gcs.touch(a)
        gcs.touch(b)

        L = gcs.ls(TEST_BUCKET + "/tmp/test", False)
        assert set(L) == set([a, b])

        L_d = gcs.ls(TEST_BUCKET + "/tmp/test", True)
        assert set(d["name"] for d in L_d) == set([a, b])
Exemple #19
0
def test_large_upload():
    orig = gcsfs.core.GCS_MAX_BLOCK_SIZE
    gcsfs.core.GCS_MAX_BLOCK_SIZE = 262144  # minimum block size
    try:
        with gcs_maker() as gcs:
            fn = TEST_BUCKET + "/test"
            d = b"7123" * 262144
            with gcs.open(fn, "wb",
                          content_type="application/octet-stream") as f:
                f.write(d)
            assert gcs.cat(fn) == d
    finally:
        gcsfs.core.GCS_MAX_BLOCK_SIZE = orig
Exemple #20
0
def test_gcs_glob():
    with gcs_maker(True) as gcs:
        fn = TEST_BUCKET + "/nested/file1"
        assert fn not in gcs.glob(TEST_BUCKET + "/")
        assert fn not in gcs.glob(TEST_BUCKET + "/*")
        assert fn in gcs.glob(TEST_BUCKET + "/nested/")
        assert fn in gcs.glob(TEST_BUCKET + "/nested/*")
        assert fn in gcs.glob(TEST_BUCKET + "/nested/file*")
        assert fn in gcs.glob(TEST_BUCKET + "/*/*")
        assert fn in gcs.glob(TEST_BUCKET + "/**")
        assert all(f in gcs.find(TEST_BUCKET)
                   for f in gcs.glob(TEST_BUCKET + "/nested/*")
                   if gcs.isfile(f))
Exemple #21
0
def test_map_pickle():
    import pickle

    with gcs_maker() as gcs:
        d = gcs.get_mapper(root)
        d["x"] = b"1234567890"

        b = pickle.dumps(d)
        assert b"1234567890" not in b

        e = pickle.loads(b)

        assert dict(e) == {"x": b"1234567890"}
def test_gcs_glob(token_restore):
    with gcs_maker(True) as gcs:
        fn = TEST_BUCKET + '/nested/file1'
        assert fn not in gcs.glob(TEST_BUCKET + '/')
        assert fn not in gcs.glob(TEST_BUCKET + '/*')
        assert fn in gcs.glob(TEST_BUCKET + '/nested')
        assert fn in gcs.glob(TEST_BUCKET + '/nested/*')
        assert fn in gcs.glob(TEST_BUCKET + '/nested/file*')
        assert fn in gcs.glob(TEST_BUCKET + '/*/*')
        assert all(f in gcs.walk(TEST_BUCKET)
                   for f in gcs.glob(TEST_BUCKET + '/nested/*'))
        with pytest.raises(ValueError):
            gcs.glob('*')
Exemple #23
0
def test_read_small(token_restore):
    with gcs_maker(True) as gcs:
        fn = TEST_BUCKET + '/2014-01-01.csv'
        with gcs.open(fn, 'rb', block_size=10) as f:
            out = []
            while True:
                data = f.read(3)
                if data == b'':
                    break
                out.append(data)
            assert gcs.cat(fn) == b''.join(out)
            # cache drop
            assert len(f.cache) < len(out)
Exemple #24
0
def test_pseudo_dir_find():
    with gcs_maker(False) as fs:
        fs.touch(f"{TEST_BUCKET}/a/b/file")
        b = set(fs.glob(f"{TEST_BUCKET}/a/*"))
        assert f"{TEST_BUCKET}/a/b" in b
        a = set(fs.glob(f"{TEST_BUCKET}/*"))
        assert f"{TEST_BUCKET}/a" in a
        assert fs.find(TEST_BUCKET) == [f"{TEST_BUCKET}/a/b/file"]
        assert fs.find(f"{TEST_BUCKET}/a", withdirs=True) == [
            f"{TEST_BUCKET}/a",
            f"{TEST_BUCKET}/a/b",
            f"{TEST_BUCKET}/a/b/file",
        ]
Exemple #25
0
def test_multi_upload(token_restore):
    with gcs_maker() as gcs:
        fn = TEST_BUCKET + '/test'
        d = b'01234567' * 2**15

        # something to write on close
        with gcs.open(fn, 'wb', content_type='text/plain',
                      block_size=2**18) as f:
            f.write(d)
            f.write(b'xx')
        assert gcs.cat(fn) == d + b'xx'
        assert gcs.info(fn)['contentType'] == 'text/plain'
        # empty buffer on close
        with gcs.open(fn, 'wb', content_type='text/plain',
                      block_size=2**19) as f:
            f.write(d)
            f.write(b'xx')
            f.write(d)
        assert gcs.cat(fn) == d + b'xx' + d
        assert gcs.info(fn)['contentType'] == 'text/plain'

    # if content-type is not provided then default should be application/octet-stream
    with gcs_maker() as gcs:
        fn = TEST_BUCKET + '/test'
        d = b'01234567' * 2**15

        # something to write on close
        with gcs.open(fn, 'wb', block_size=2**18) as f:
            f.write(d)
            f.write(b'xx')
        assert gcs.cat(fn) == d + b'xx'
        assert gcs.info(fn)['contentType'] == 'application/octet-stream'
        # empty buffer on close
        with gcs.open(fn, 'wb', block_size=2**19) as f:
            f.write(d)
            f.write(b'xx')
            f.write(d)
        assert gcs.cat(fn) == d + b'xx' + d
        assert gcs.info(fn)['contentType'] == 'application/octet-stream'
Exemple #26
0
def test_multi_upload():
    with gcs_maker() as gcs:
        fn = TEST_BUCKET + "/test"
        d = b"01234567" * 2**15

        # something to write on close
        with gcs.open(fn, "wb", content_type="text/plain",
                      block_size=2**18) as f:
            f.write(d)
            f.write(b"xx")
        assert gcs.cat(fn) == d + b"xx"
        assert gcs.info(fn)["contentType"] == "text/plain"
        # empty buffer on close
        with gcs.open(fn, "wb", content_type="text/plain",
                      block_size=2**19) as f:
            f.write(d)
            f.write(b"xx")
            f.write(d)
        assert gcs.cat(fn) == d + b"xx" + d
        assert gcs.info(fn)["contentType"] == "text/plain"

    # if content-type is not provided then default should be application/octet-stream
    with gcs_maker() as gcs:
        fn = TEST_BUCKET + "/test"
        d = b"01234567" * 2**15

        # something to write on close
        with gcs.open(fn, "wb", block_size=2**18) as f:
            f.write(d)
            f.write(b"xx")
        assert gcs.cat(fn) == d + b"xx"
        assert gcs.info(fn)["contentType"] == "application/octet-stream"
        # empty buffer on close
        with gcs.open(fn, "wb", block_size=2**19) as f:
            f.write(d)
            f.write(b"xx")
            f.write(d)
        assert gcs.cat(fn) == d + b"xx" + d
        assert gcs.info(fn)["contentType"] == "application/octet-stream"
Exemple #27
0
def test_read_small():
    with gcs_maker(True) as gcs:
        fn = TEST_BUCKET + "/2014-01-01.csv"
        with gcs.open(fn, "rb", block_size=10) as f:
            out = []
            while True:
                data = f.read(3)
                if data == b"":
                    break
                out.append(data)
            assert gcs.cat(fn) == b"".join(out)
            # cache drop
            assert len(f.cache.cache) < len(out)
Exemple #28
0
def test_readline_partial(token_restore):
    with gcs_maker() as gcs:
        data = b'aaaaa,bbbbb\n12345,6789\n'
        with gcs.open(a, 'wb') as f:
            f.write(data)
        with gcs.open(a, 'rb') as f:
            result = f.readline(5)
            assert result == b'aaaaa'
            result = f.readline(5)
            assert result == b',bbbb'
            result = f.readline(5)
            assert result == b'b\n'
            result = f.readline()
            assert result == b'12345,6789\n'
Exemple #29
0
def test_request_header():
    with gcs_maker():
        gcs = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN, requester_pays=True)
        # test directly against `_call` to inspect the result
        r = gcs.call(
            "GET",
            "b/{}/o/",
            TEST_REQUESTER_PAYS_BUCKET,
            delimiter="/",
            prefix="test",
            maxResults=100,
            info_out=True,
        )
        assert r.headers["User-Agent"] == "python-gcsfs/" + version
Exemple #30
0
def test_zero_cache_timeout():
    with gcs_maker(True, cache_timeout=0) as gcs:
        gcs.touch(f"gs://{TEST_BUCKET}/a/file")
        gcs.find(f"gs://{TEST_BUCKET}/a/")
        gcs.info(f"gs://{TEST_BUCKET}/a/file")
        gcs.ls(f"gs://{TEST_BUCKET}/a/")

        # The _times entry and exception below should only be present after
        # https://github.com/intake/filesystem_spec/pull/513.
        if f"{TEST_BUCKET}/a" not in gcs.dircache._times:
            pytest.skip("fsspec version too early")

        with pytest.raises(KeyError):
            gcs.dircache[f"{TEST_BUCKET}/a"]