def exists(path, fs_options={}): """Checks if file exists. >>> vaex.file.exists('/you/do/not') False >>> vaex.file.exists('s3://vaex/taxi/nyc_taxi_2015_mini.parquet', fs_options={'anon': True}) True """ fs, path = parse(path, fs_options=fs_options) if fs is None: return os.path.exists(path) else: return fs.get_file_info(path).type != pa.fs.FileType.NotFound
def size(path, fs_options={}, fs=None): """Gives the file size in bytes >>> size(os.path.expanduser('~/.vaex/data/helmi-dezeeuw-2000-FeH-v2.hdf5')) # doctest: +SKIP 135323168 >>> size('s3://vaex/taxi/nyc_taxi_2015_mini.parquet', fs_options={'anon': True}) 9820562 """ fs, path = parse(path, fs_options, fs=fs) path = stringyfy(path) if fs is None: return os.path.getsize(path) else: info = fs.get_file_info([path])[0] return info.size
def tokenize(path, fs_options={}): """Deterministic token for a file, useful in combination with dask or detecting file changes. Based on mtime (modification time), file size, and the path. May lead to false negative if the path changes, but not the content. >>> tokenize('/data/taxi.parquet') # doctest: +SKIP '0171ec50cb2cf71b8e4f813212063a19' >>> tokenize('s3://vaex/taxi/nyc_taxi_2015_mini.parquet', fs_options={'anon': True}) # doctest: +SKIP '7c962e2d8c21b6a3681afb682d3bf91b' """ fs, path = parse(path, fs_options) path = stringyfy(path) if fs is None: mtime = os.path.getmtime(path) size = os.path.getsize(path) else: info = fs.get_file_info(path) mtime = info.mtime size = info.size import vaex.cache return vaex.cache.tokenize(('file', (path, mtime, size)))