Esempio n. 1
0
 def can_open(cls, path, *args, **kwargs):
     h5file = None
     # before we try to open it with h5py, we check the signature (quicker)
     if path.startswith('gs://'):
         try:
             with gcs.open(path, "rb") as f:
                 signature = f.read(4)
                 hdf5file = signature == b"\x89\x48\x44\x46"
         except:
             logger.exception("could not read 4 bytes from %r", path)
             return
     else:
         try:
             with s3.open(path, "rb") as f:
                 signature = f.read(4)
                 hdf5file = signature == b"\x89\x48\x44\x46"
         except:
             logger.exception("could not read 4 bytes from %r", path)
             return
     if hdf5file:
         if path.startswith('gs://'):
             with gcs.open(path, "rb") as f:
                 try:
                     h5file = h5py.File(f, "r")
                 except:
                     logger.exception("could not open file as hdf5")
                     return False
                 if h5file is not None:
                     with h5file:
                         root_datasets = [
                             dataset for name, dataset in h5file.items()
                             if isinstance(dataset, h5py.Dataset)
                         ]
                         return ("data" in h5file) or ("columns" in h5file) or ("table" in h5file) or \
                             len(root_datasets) > 0
                 else:
                     logger.debug("file %s has no data or columns group" %
                                  path)
         else:
             with s3.open(path, "rb") as f:
                 try:
                     h5file = h5py.File(f, "r")
                 except:
                     logger.exception("could not open file as hdf5")
                     return False
                 if h5file is not None:
                     with h5file:
                         root_datasets = [
                             dataset for name, dataset in h5file.items()
                             if isinstance(dataset, h5py.Dataset)
                         ]
                         return ("data" in h5file) or ("columns" in h5file) or ("table" in h5file) or \
                             len(root_datasets) > 0
                 else:
                     logger.debug("file %s has no data or columns group" %
                                  path)
     return False
Esempio n. 2
0
 def __init__(self, filename, write=False):
     if isinstance(filename, six.string_types):
         nommap = s3.is_s3_path(filename) or gcs.is_gs_path(filename)
         super(Hdf5MemoryMapped, self).__init__(filename,
                                                write=write,
                                                nommap=nommap)
     else:
         super(Hdf5MemoryMapped, self).__init__(filename.name,
                                                write=write,
                                                nommap=True)
     if hasattr(filename, 'read'):
         fp = filename  # support file handle for testing
         self.file_map[self.filename] = fp
     else:
         mode = 'rb+' if write else 'rb'
         if s3.is_s3_path(filename):
             fp = s3.open(self.filename)
             self.file_map[self.filename] = fp
         elif gcs.is_gs_path(filename):
             fp = gcs.open(filename)
             self.file_map[self.filename] = fp
         else:
             if self.nommap:
                 fp = open(self.filename, mode)
                 self.file_map[self.filename] = fp
             else:
                 # this is the only path that will have regular mmapping
                 fp = self.filename
     self.h5file = h5py.File(fp, "r+" if write else "r")
     self.h5table_root_name = None
     self._version = 1
     self._load()
Esempio n. 3
0
 def _open(self, path):
     if hasattr(path, 'read'):
         fp = path  # support file handle for testing
         self.file_map[self.path] = fp
     else:
         mode = 'rb+' if self.write else 'rb'
         if s3.is_s3_path(path):
             fp = s3.open(self.path)
             self.file_map[self.path] = fp
         elif gcs.is_gs_path(path):
             fp = gcs.open(self.path)
             self.file_map[self.path] = fp
         else:
             if self.nommap:
                 fp = open(self.path, mode)
                 self.file_map[self.path] = fp
             else:
                 # this is the only path that will have regular mmapping
                 fp = self.path
     self.h5file = h5py.File(fp, "r+" if self.write else "r")