def selection(self, ref): """ Get the shape of the target dataspace selection referred to by *ref* """ with phil: from . import selections sid = h5r.get_region(ref, self.id) return selections.guess_shape(sid)
def __getitem__(self, args): """ Read a slice from the HDF5 dataset. Takes slices and recarray-style field names (more than one is allowed!) in any order. Obeys basic NumPy rules, including broadcasting. Also supports: * Boolean "mask" array indexing """ args = args if isinstance(args, tuple) else (args, ) # Sort field indices from the rest of the args. names = tuple(x for x in args if isinstance(x, str)) args = tuple(x for x in args if not isinstance(x, str)) def strip_fields(basetype): """ Strip extra dtype information from special types """ if basetype.kind == 'O': return numpy.dtype('O') if basetype.fields is not None: if basetype.kind in ('i', 'u'): return basetype.fields['enum'][0] fields = [] for name in basetype.names: fff = basetype.fields[name] if len(fff) == 3: (subtype, offset, meta) = fff else: subtype, meta = fff offset = 0 subtype = strip_fields(subtype) fields.append((name, subtype)) return numpy.dtype(fields) return basetype def readtime_dtype(basetype, names): """ Make a NumPy dtype appropriate for reading """ basetype = strip_fields(basetype) if len(names) == 0: # Not compound, or we want all fields return basetype if basetype.names is None: # Names provided, but not compound raise ValueError("Field names only allowed for compound types") for name in names: # Check all names are legal if not name in basetype.names: raise ValueError("Field %s does not appear in this type." % name) return numpy.dtype([(name, basetype.fields[name][0]) for name in names]) if self._local.astype is not None: new_dtype = readtime_dtype(self._local.astype, names) else: # This is necessary because in the case of array types, NumPy # discards the array information at the top level. new_dtype = readtime_dtype(self.id.dtype, names) mtype = h5t.py_create(new_dtype) # === Special-case region references ==== if len(args) == 1 and isinstance(args[0], h5r.RegionReference): obj = h5r.dereference(args[0], self.id) if obj != self.id: raise ValueError("Region reference must point to this dataset") sid = h5r.get_region(args[0], self.id) mshape = sel.guess_shape(sid) if mshape is None: return numpy.array((0, ), dtype=new_dtype) if numpy.product(mshape) == 0: return numpy.array(mshape, dtype=new_dtype) out = numpy.empty(mshape, dtype=new_dtype) sid_out = h5s.create_simple(mshape) sid_out.select_all() self.id.read(sid_out, sid, out, mtype) return out # === Check for zero-sized datasets ===== if numpy.product(self.shape) == 0: # These are the only access methods NumPy allows for such objects if args == (Ellipsis, ) or args == tuple(): return numpy.empty(self.shape, dtype=new_dtype) # === Scalar dataspaces ================= if self.shape == (): fspace = self.id.get_space() selection = sel2.select_read(fspace, args) arr = numpy.ndarray(selection.mshape, dtype=new_dtype) for mspace, fspace in selection: self.id.read(mspace, fspace, arr, mtype) if len(names) == 1: arr = arr[names[0]] if selection.mshape is None: return arr[()] return arr # === Everything else =================== # Perform the dataspace selection. selection = sel.select(self.shape, args, dsid=self.id) if selection.nselect == 0: return numpy.ndarray(selection.mshape, dtype=new_dtype) # Up-converting to (1,) so that numpy.ndarray correctly creates # np.void rows in case of multi-field dtype. (issue 135) single_element = selection.mshape == () mshape = (1, ) if single_element else selection.mshape arr = numpy.ndarray(mshape, new_dtype, order='C') # HDF5 has a bug where if the memory shape has a different rank # than the dataset, the read is very slow if len(mshape) < len(self.shape): # pad with ones mshape = (1, ) * (len(self.shape) - len(mshape)) + mshape # Perfom the actual read mspace = h5s.create_simple(mshape) fspace = selection._id self.id.read(mspace, fspace, arr, mtype) # Patch up the output for NumPy if len(names) == 1: arr = arr[names[0]] # Single-field recarray convention if arr.shape == (): arr = numpy.asscalar(arr) if single_element: arr = arr[0] return arr
def select(shape, args, dsid): """ High-level routine to generate a selection from arbitrary arguments to __getitem__. The arguments should be the following: shape Shape of the "source" dataspace. args Either a single argument or a tuple of arguments. See below for supported classes of argument. dsid A h5py.h5d.DatasetID instance representing the source dataset. Argument classes: Single Selection instance Returns the argument. numpy.ndarray Must be a boolean mask. Returns a PointSelection instance. RegionReference Returns a Selection instance. Indices, slices, ellipses only Returns a SimpleSelection instance Indices, slices, ellipses, lists or boolean index arrays Returns a FancySelection instance. """ if not isinstance(args, tuple): args = (args, ) # "Special" indexing objects if len(args) == 1: arg = args[0] if isinstance(arg, Selection): if arg.shape != shape: raise TypeError("Mismatched selection shape") return arg elif isinstance(arg, np.ndarray): sel = PointSelection(shape) sel[arg] return sel elif isinstance(arg, h5r.RegionReference): sid = h5r.get_region(arg, dsid) if shape != sid.shape: raise TypeError("Reference shape does not match dataset shape") return Selection(shape, spaceid=sid) for a in args: if not isinstance(a, slice) and a is not Ellipsis: try: int(a) except Exception: sel = FancySelection(shape) sel[args] return sel sel = SimpleSelection(shape) sel[args] return sel
def __getitem__(self, args): """ Read a slice from the HDF5 dataset. Takes slices and recarray-style field names (more than one is allowed!) in any order. Obeys basic NumPy rules, including broadcasting. Also supports: * Boolean "mask" array indexing """ args = args if isinstance(args, tuple) else (args,) # Sort field indices from the rest of the args. names = tuple(x for x in args if isinstance(x, str)) args = tuple(x for x in args if not isinstance(x, str)) def strip_fields(basetype): """ Strip extra dtype information from special types """ if basetype.kind == 'O': return numpy.dtype('O') if basetype.fields is not None: if basetype.kind in ('i','u'): return basetype.fields['enum'][0] fields = [] for name in basetype.names: fff = basetype.fields[name] if len(fff) == 3: (subtype, offset, meta) = fff else: subtype, meta = fff offset = 0 subtype = strip_fields(subtype) fields.append((name, subtype)) return numpy.dtype(fields) return basetype def readtime_dtype(basetype, names): """ Make a NumPy dtype appropriate for reading """ basetype = strip_fields(basetype) if len(names) == 0: # Not compound, or we want all fields return basetype if basetype.names is None: # Names provided, but not compound raise ValueError("Field names only allowed for compound types") for name in names: # Check all names are legal if not name in basetype.names: raise ValueError("Field %s does not appear in this type." % name) return numpy.dtype([(name, basetype.fields[name][0]) for name in names]) if self._local.astype is not None: new_dtype = readtime_dtype(self._local.astype, names) else: # This is necessary because in the case of array types, NumPy # discards the array information at the top level. new_dtype = readtime_dtype(self.id.dtype, names) mtype = h5t.py_create(new_dtype) # === Special-case region references ==== if len(args) == 1 and isinstance(args[0], h5r.RegionReference): obj = h5r.dereference(args[0], self.id) if obj != self.id: raise ValueError("Region reference must point to this dataset") sid = h5r.get_region(args[0], self.id) mshape = sel.guess_shape(sid) if mshape is None: return np.array((0,), dtype=new_dtype) if numpy.product(mshape) == 0: return np.array(mshape, dtype=new_dtype) out = numpy.empty(mshape, dtype=new_dtype) sid_out = h5s.create_simple(mshape) sid_out.select_all() self.id.read(sid_out, sid, out, mtype) return out # === Check for zero-sized datasets ===== if numpy.product(self.shape) == 0: # These are the only access methods NumPy allows for such objects if args == (Ellipsis,) or args == tuple(): return numpy.empty(self.shape, dtype=new_dtype) # === Scalar dataspaces ================= if self.shape == (): fspace = self.id.get_space() selection = sel2.select_read(fspace, args) arr = numpy.ndarray(selection.mshape, dtype=new_dtype) for mspace, fspace in selection: self.id.read(mspace, fspace, arr, mtype) if len(names) == 1: arr = arr[names[0]] if selection.mshape is None: return arr[()] return arr # === Everything else =================== # Perform the dataspace selection. selection = sel.select(self.shape, args, dsid=self.id) if selection.nselect == 0: return numpy.ndarray(selection.mshape, dtype=new_dtype) # Up-converting to (1,) so that numpy.ndarray correctly creates # np.void rows in case of multi-field dtype. (issue 135) single_element = selection.mshape == () mshape = (1,) if single_element else selection.mshape arr = numpy.ndarray(mshape, new_dtype, order='C') # HDF5 has a bug where if the memory shape has a different rank # than the dataset, the read is very slow if len(mshape) < len(self.shape): # pad with ones mshape = (1,)*(len(self.shape)-len(mshape)) + mshape # Perfom the actual read mspace = h5s.create_simple(mshape) fspace = selection._id self.id.read(mspace, fspace, arr, mtype) # Patch up the output for NumPy if len(names) == 1: arr = arr[names[0]] # Single-field recarray convention if arr.shape == (): arr = numpy.asscalar(arr) if single_element: arr = arr[0] return arr
def select(shape, args, dsid): """ High-level routine to generate a selection from arbitrary arguments to __getitem__. The arguments should be the following: shape Shape of the "source" dataspace. args Either a single argument or a tuple of arguments. See below for supported classes of argument. dsid A h5py.h5d.DatasetID instance representing the source dataset. Argument classes: Single Selection instance Returns the argument. numpy.ndarray Must be a boolean mask. Returns a PointSelection instance. RegionReference Returns a Selection instance. Indices, slices, ellipses only Returns a SimpleSelection instance Indices, slices, ellipses, lists or boolean index arrays Returns a FancySelection instance. """ if not isinstance(args, tuple): args = (args,) # "Special" indexing objects if len(args) == 1: arg = args[0] if isinstance(arg, Selection): if arg.shape != shape: raise TypeError("Mismatched selection shape") return arg elif isinstance(arg, np.ndarray): sel = PointSelection(shape) sel[arg] return sel elif isinstance(arg, h5r.RegionReference): sid = h5r.get_region(arg, dsid) if shape != sid.shape: raise TypeError("Reference shape does not match dataset shape") return Selection(shape, spaceid=sid) for a in args: if not isinstance(a, slice) and a is not Ellipsis: try: int(a) except Exception: sel = FancySelection(shape) sel[args] return sel sel = SimpleSelection(shape) sel[args] return sel
def __getitem__(self, args, new_dtype=None): """ Read a slice from the HDF5 dataset. Takes slices and recarray-style field names (more than one is allowed!) in any order. Obeys basic NumPy rules, including broadcasting. """ # This boilerplate code is based on h5py.Dataset.__getitem__ args = args if isinstance(args, tuple) else (args, ) if new_dtype is None: new_dtype = getattr(self._local, 'astype', None) # Sort field names from the rest of the args. names = tuple(x for x in args if isinstance(x, str)) if names: # Read a subset of the fields in this structured dtype if len(names) == 1: names = names[0] # Read with simpler dtype of this field args = tuple(x for x in args if not isinstance(x, str)) return self.fields(names, _prior_dtype=new_dtype)[args] if new_dtype is None: new_dtype = self.dtype mtype = h5t.py_create(new_dtype) # === Special-case region references ==== if len(args) == 1 and isinstance(args[0], h5r.RegionReference): obj = h5r.dereference(args[0], self.id) if obj != self.id: raise ValueError("Region reference must point to this dataset") sid = h5r.get_region(args[0], self.id) mshape = guess_shape(sid) if mshape is None: # 0D with no data (NULL or deselected SCALAR) return Empty(new_dtype) out = np.empty(mshape, dtype=new_dtype) if out.size == 0: return out sid_out = h5s.create_simple(mshape) sid_out.select_all() self.id.read(sid_out, sid, out, mtype) return out # === END CODE FROM h5py.Dataset.__getitem__ === idx = ndindex(args).reduce(self.shape) arr = np.ndarray(idx.newshape(self.shape), new_dtype, order='C') for c, index in as_subchunks(idx, self.shape, self.chunks): if isinstance(self.id.data_dict[c], (slice, Slice, tuple, Tuple)): raw_idx = Tuple(self.id.data_dict[c], *[slice(0, len(i)) for i in c.args[1:]]).raw a = self.id._read_chunk(raw_idx) self.id.data_dict[c] = a if self.id.data_dict[c].size != 0: arr_idx = c.as_subindex(idx) arr[arr_idx.raw] = self.id.data_dict[c][index.raw] return arr
def shape(self, ref): """ Get the shape of the target dataspace referred to by *ref*. """ with phil: sid = h5r.get_region(ref, self.id) return sid.shape
def shape(self, ref): """ Get the shape of the target dataspace referred to by *ref*. """ sid = h5r.get_region(ref, self.id) return sid.shape