Beispiel #1
0
 def can_open(cls, path, *args, **kwargs):
     h5file = None
     # before we try to open it with h5py, we check the signature (quicker)
     try:
         with s3.open(path, "rb") as f:
             signature = f.read(4)
             hdf5file = signature == b"\x89\x48\x44\x46"
     except:
         logger.exception("could not read 4 bytes from %r", path)
         return
     if hdf5file:
         with s3.open(path, "rb") as f:
             try:
                 h5file = h5py.File(f, "r")
             except:
                 logger.exception("could not open file as hdf5")
                 return False
             if h5file is not None:
                 with h5file:
                     root_datasets = [
                         dataset for name, dataset in h5file.items()
                         if isinstance(dataset, h5py.Dataset)
                     ]
                     return ("data" in h5file) or ("columns" in h5file) or ("table" in h5file) or \
                         len(root_datasets) > 0
             else:
                 logger.debug("file %s has no data or columns group" % path)
     return False
Beispiel #2
0
 def __init__(self, filename, write=False):
     if isinstance(filename, six.string_types):
         super(Hdf5MemoryMapped,
               self).__init__(filename,
                              write=write,
                              nommap=filename.startswith('s3://'))
     else:
         super(Hdf5MemoryMapped, self).__init__(filename.name,
                                                write=write,
                                                nommap=True)
     if hasattr(filename, 'read'):
         fp = filename  # support file handle for testing
         self.file_map[self.filename] = fp
     else:
         mode = 'rb+' if write else 'rb'
         if s3.is_s3_path(filename):
             fp = s3.open(self.filename)
             self.file_map[self.filename] = fp
         else:
             if self.nommap:
                 fp = open(self.filename, mode)
                 self.file_map[self.filename] = fp
             else:
                 # this is the only path that will have regular mmapping
                 fp = self.filename
     self.h5file = h5py.File(fp, "r+" if write else "r")
     self.h5table_root_name = None
     self._version = 1
     self._load()
Beispiel #3
0
 def _open(self, path):
     if hasattr(path, 'read'):
         fp = path  # support file handle for testing
         self.file_map[self.path] = fp
     else:
         mode = 'rb+' if self.write else 'rb'
         if s3.is_s3_path(path):
             fp = s3.open(self.path)
             self.file_map[self.path] = fp
         elif gcs.is_gs_path(path):
             fp = gcs.open(self.path)
             self.file_map[self.path] = fp
         else:
             if self.nommap:
                 fp = open(self.path, mode)
                 self.file_map[self.path] = fp
             else:
                 # this is the only path that will have regular mmapping
                 fp = self.path
     self.h5file = h5py.File(fp, "r+" if self.write else "r")