Esempio n. 1
0
def exists(path, fs_options={}):
    """Checks if file exists.

    >>> vaex.file.exists('/you/do/not')
    False

    >>> vaex.file.exists('s3://vaex/taxi/nyc_taxi_2015_mini.parquet', fs_options={'anon': True})
    True
    """
    fs, path = parse(path, fs_options=fs_options)
    if fs is None:
        return os.path.exists(path)
    else:
        return fs.get_file_info(path).type != pa.fs.FileType.NotFound
Esempio n. 2
0
def size(path, fs_options={}, fs=None):
    """Gives the file size in bytes

    >>> size(os.path.expanduser('~/.vaex/data/helmi-dezeeuw-2000-FeH-v2.hdf5'))  # doctest: +SKIP
    135323168

    >>> size('s3://vaex/taxi/nyc_taxi_2015_mini.parquet', fs_options={'anon': True})
    9820562
    """
    fs, path = parse(path, fs_options, fs=fs)
    path = stringyfy(path)
    if fs is None:
        return os.path.getsize(path)
    else:
        info = fs.get_file_info([path])[0]
        return info.size
Esempio n. 3
0
def tokenize(path, fs_options={}):
    """Deterministic token for a file, useful in combination with dask or detecting file changes.

    Based on mtime (modification time), file size, and the path. May lead to
    false negative if the path changes, but not the content.

    >>> tokenize('/data/taxi.parquet')  # doctest: +SKIP
    '0171ec50cb2cf71b8e4f813212063a19'

    >>> tokenize('s3://vaex/taxi/nyc_taxi_2015_mini.parquet', fs_options={'anon': True})  # doctest: +SKIP
    '7c962e2d8c21b6a3681afb682d3bf91b'
    """
    fs, path = parse(path, fs_options)
    path = stringyfy(path)
    if fs is None:
        mtime = os.path.getmtime(path)
        size = os.path.getsize(path)
    else:
        info = fs.get_file_info(path)
        mtime = info.mtime
        size = info.size
    import vaex.cache
    return vaex.cache.tokenize(('file', (path, mtime, size)))