Ejemplo n.º 1
0
def exists(path, fs_options={}):
    """Checks if file exists.

    >>> vaex.file.exists('/you/do/not')
    False

    >>> vaex.file.exists('s3://vaex/taxi/nyc_taxi_2015_mini.parquet', fs_options={'anon': True})
    True
    """
    fs, path = parse(path, fs_options=fs_options)
    if fs is None:
        return os.path.exists(path)
    else:
        return fs.get_file_info(path).type != pa.fs.FileType.NotFound
Ejemplo n.º 2
0
def size(path, fs_options={}, fs=None):
    """Gives the file size in bytes

    >>> size(os.path.expanduser('~/.vaex/data/helmi-dezeeuw-2000-FeH-v2.hdf5'))  # doctest: +SKIP
    135323168

    >>> size('s3://vaex/taxi/nyc_taxi_2015_mini.parquet', fs_options={'anon': True})
    9820562
    """
    fs, path = parse(path, fs_options, fs=fs)
    path = stringyfy(path)
    if fs is None:
        return os.path.getsize(path)
    else:
        info = fs.get_file_info([path])[0]
        return info.size
Ejemplo n.º 3
0
def tokenize(path, fs_options={}):
    """Deterministic token for a file, useful in combination with dask or detecting file changes.

    Based on mtime (modification time), file size, and the path. May lead to
    false negative if the path changes, but not the content.

    >>> tokenize('/data/taxi.parquet')  # doctest: +SKIP
    '0171ec50cb2cf71b8e4f813212063a19'

    >>> tokenize('s3://vaex/taxi/nyc_taxi_2015_mini.parquet', fs_options={'anon': True})  # doctest: +SKIP
    '7c962e2d8c21b6a3681afb682d3bf91b'
    """
    fs, path = parse(path, fs_options)
    path = stringyfy(path)
    if fs is None:
        mtime = os.path.getmtime(path)
        size = os.path.getsize(path)
    else:
        info = fs.get_file_info(path)
        mtime = info.mtime
        size = info.size
    import vaex.cache
    return vaex.cache.tokenize(('file', (path, mtime, size)))