def can_open(cls, path, *args, **kwargs): h5file = None # before we try to open it with h5py, we check the signature (quicker) if path.startswith('gs://'): try: with gcs.open(path, "rb") as f: signature = f.read(4) hdf5file = signature == b"\x89\x48\x44\x46" except: logger.exception("could not read 4 bytes from %r", path) return else: try: with s3.open(path, "rb") as f: signature = f.read(4) hdf5file = signature == b"\x89\x48\x44\x46" except: logger.exception("could not read 4 bytes from %r", path) return if hdf5file: if path.startswith('gs://'): with gcs.open(path, "rb") as f: try: h5file = h5py.File(f, "r") except: logger.exception("could not open file as hdf5") return False if h5file is not None: with h5file: root_datasets = [ dataset for name, dataset in h5file.items() if isinstance(dataset, h5py.Dataset) ] return ("data" in h5file) or ("columns" in h5file) or ("table" in h5file) or \ len(root_datasets) > 0 else: logger.debug("file %s has no data or columns group" % path) else: with s3.open(path, "rb") as f: try: h5file = h5py.File(f, "r") except: logger.exception("could not open file as hdf5") return False if h5file is not None: with h5file: root_datasets = [ dataset for name, dataset in h5file.items() if isinstance(dataset, h5py.Dataset) ] return ("data" in h5file) or ("columns" in h5file) or ("table" in h5file) or \ len(root_datasets) > 0 else: logger.debug("file %s has no data or columns group" % path) return False
def __init__(self, filename, write=False): if isinstance(filename, six.string_types): nommap = s3.is_s3_path(filename) or gcs.is_gs_path(filename) super(Hdf5MemoryMapped, self).__init__(filename, write=write, nommap=nommap) else: super(Hdf5MemoryMapped, self).__init__(filename.name, write=write, nommap=True) if hasattr(filename, 'read'): fp = filename # support file handle for testing self.file_map[self.filename] = fp else: mode = 'rb+' if write else 'rb' if s3.is_s3_path(filename): fp = s3.open(self.filename) self.file_map[self.filename] = fp elif gcs.is_gs_path(filename): fp = gcs.open(filename) self.file_map[self.filename] = fp else: if self.nommap: fp = open(self.filename, mode) self.file_map[self.filename] = fp else: # this is the only path that will have regular mmapping fp = self.filename self.h5file = h5py.File(fp, "r+" if write else "r") self.h5table_root_name = None self._version = 1 self._load()
def _open(self, path): if hasattr(path, 'read'): fp = path # support file handle for testing self.file_map[self.path] = fp else: mode = 'rb+' if self.write else 'rb' if s3.is_s3_path(path): fp = s3.open(self.path) self.file_map[self.path] = fp elif gcs.is_gs_path(path): fp = gcs.open(self.path) self.file_map[self.path] = fp else: if self.nommap: fp = open(self.path, mode) self.file_map[self.path] = fp else: # this is the only path that will have regular mmapping fp = self.path self.h5file = h5py.File(fp, "r+" if self.write else "r")