def __getitem__(self, slice_): """| Gets a slice or slices from dataset | Usage: >>> return ds["image", 5, 0:1920, 0:1080, 0:3].compute() # returns numpy array >>> images = ds["image"] >>> return images[5].compute() # returns numpy array >>> images = ds["image"] >>> image = images[5] >>> return image[0:1920, 0:1080, 0:3].compute() """ if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str): slice_ = [slice_] slice_ = list(slice_) subpath, slice_list = slice_split(slice_) if not subpath: if len(slice_list) > 1: raise ValueError( "Can't slice a dataset with multiple slices without subpath" ) num, ofs = slice_extract_info(slice_list[0], self.shape[0]) return DatasetView( dataset=self, num_samples=num, offset=ofs, squeeze_dim=isinstance(slice_list[0], int), lazy=self.lazy, ) elif not slice_list: if subpath in self._tensors.keys(): tensorview = TensorView( dataset=self, subpath=subpath, slice_=slice(0, self.shape[0]), lazy=self.lazy, ) if self.lazy: return tensorview else: return tensorview.compute() return self._get_dictionary(subpath) else: num, ofs = slice_extract_info(slice_list[0], self.shape[0]) if subpath in self._tensors.keys(): tensorview = TensorView(dataset=self, subpath=subpath, slice_=slice_list, lazy=self.lazy) if self.lazy: return tensorview else: return tensorview.compute() if len(slice_list) > 1: raise ValueError("You can't slice a dictionary of Tensors") return self._get_dictionary(subpath, slice_list[0])
def _get_dictionary(self, subpath, slice_=None): """Gets dictionary from dataset given incomplete subpath""" tensor_dict = {} subpath = subpath if subpath.endswith("/") else subpath + "/" for key in self.keys: if key.startswith(subpath): suffix_key = key[len(subpath) :] split_key = suffix_key.split("/") cur = tensor_dict for i in range(len(split_key) - 1): if split_key[i] not in cur.keys(): cur[split_key[i]] = {} cur = cur[split_key[i]] slice_ = slice_ or slice(0, self._shape[0]) tensorview = TensorView( dataset=self, subpath=key, slice_=slice_, lazy=self.lazy ) cur[split_key[-1]] = tensorview if self.lazy else tensorview.compute() if not tensor_dict: raise KeyError(f"Key {subpath} was not found in dataset") return tensor_dict
def _get_dictionary(self, subpath, slice_): """Gets dictionary from dataset given incomplete subpath""" tensor_dict = {} subpath = subpath if subpath.endswith("/") else subpath + "/" for key in self.keys: if key.startswith(subpath): suffix_key = key[len(subpath) :] split_key = suffix_key.split("/") cur = tensor_dict for sub_key in split_key[:-1]: if sub_key not in cur.keys(): cur[sub_key] = {} cur = cur[sub_key] tensorview = TensorView( dataset=self.dataset, subpath=key, slice_=slice_, lazy=self.lazy, ) cur[split_key[-1]] = tensorview if self.lazy else tensorview.compute() if not tensor_dict: raise KeyError(f"Key {subpath} was not found in dataset") return tensor_dict
def __getitem__(self, slice_): """| Gets a slice or slices from DatasetView | Usage: >>> ds_view = ds[5:15] >>> return ds_view["image", 7, 0:1920, 0:1080, 0:3].compute() # returns numpy array of 12th image """ if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str): slice_ = [slice_] slice_ = list(slice_) subpath, slice_list = slice_split(slice_) slice_list = [0] + slice_list if isinstance(self.indexes, int) else slice_list if not subpath: if len(slice_list) > 1: raise ValueError( "Can't slice dataset with multiple slices without key") indexes = self.indexes[slice_list[0]] return DatasetView(dataset=self.dataset, lazy=self.lazy, indexes=indexes) elif not slice_list: slice_ = ([slice(self.indexes[0], self.indexes[-1] + 1)] if self.is_contiguous else [self.indexes]) if subpath in self.keys: tensorview = TensorView( dataset=self.dataset, subpath=subpath, slice_=slice_, lazy=self.lazy, ) return tensorview if self.lazy else tensorview.compute() for key in self.keys: if subpath.startswith(key): objectview = ObjectView( dataset=self.dataset, subpath=subpath, slice_=slice_, lazy=self.lazy, ) return objectview if self.lazy else objectview.compute() return self._get_dictionary(subpath, slice_) else: if isinstance(self.indexes, list): indexes = self.indexes[slice_list[0]] if self.is_contiguous and isinstance(indexes, list) and indexes: indexes = slice(indexes[0], indexes[-1] + 1) else: indexes = self.indexes slice_list[0] = indexes schema_obj = self.dataset.schema.dict_[subpath.split("/")[1]] if subpath in self.keys and (not isinstance(schema_obj, Sequence) or len(slice_list) <= 1): tensorview = TensorView( dataset=self.dataset, subpath=subpath, slice_=slice_list, lazy=self.lazy, ) return tensorview if self.lazy else tensorview.compute() for key in self.keys: if subpath.startswith(key): objectview = ObjectView( dataset=self.dataset, subpath=subpath, slice_=slice_list, lazy=self.lazy, ) return objectview if self.lazy else objectview.compute() if len(slice_list) > 1: raise ValueError("You can't slice a dictionary of Tensors") return self._get_dictionary(subpath, slice_list[0])
def __getitem__(self, slice_): """| Gets a slice or slices from dataset | Usage: >>> return ds["image", 5, 0:1920, 0:1080, 0:3].compute() # returns numpy array >>> images = ds["image"] >>> return images[5].compute() # returns numpy array >>> images = ds["image"] >>> image = images[5] >>> return image[0:1920, 0:1080, 0:3].compute() """ if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str): slice_ = [slice_] slice_ = list(slice_) subpath, slice_list = slice_split(slice_) if not subpath: if len(slice_list) > 1: raise ValueError( "Can't slice a dataset with multiple slices without key") indexes = self.indexes[slice_list[0]] return DatasetView( dataset=self, indexes=indexes, lazy=self.lazy, ) elif not slice_list: if subpath in self.keys: tensorview = TensorView( dataset=self, subpath=subpath, slice_=slice(0, self._shape[0]), lazy=self.lazy, ) return tensorview if self.lazy else tensorview.compute() for key in self.keys: if subpath.startswith(key): objectview = ObjectView( dataset=self, subpath=subpath, lazy=self.lazy, slice_=[slice(0, self._shape[0])], ) return objectview if self.lazy else objectview.compute() return self._get_dictionary(subpath) else: schema_obj = self.schema.dict_[subpath.split("/")[1]] if subpath in self.keys and (not isinstance(schema_obj, Sequence) or len(slice_list) <= 1): tensorview = TensorView(dataset=self, subpath=subpath, slice_=slice_list, lazy=self.lazy) return tensorview if self.lazy else tensorview.compute() for key in self.keys: if subpath.startswith(key): objectview = ObjectView( dataset=self, subpath=subpath, slice_=slice_list, lazy=self.lazy, ) return objectview if self.lazy else objectview.compute() if len(slice_list) > 1: raise ValueError("You can't slice a dictionary of Tensors") return self._get_dictionary(subpath, slice_list[0])
def __getitem__(self, slice_): """| Gets a slice or slices from DatasetView | Usage: >>> ds_view = ds[5:15] >>> return ds_view["image", 7, 0:1920, 0:1080, 0:3].compute() # returns numpy array of 12th image """ if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str): slice_ = [slice_] slice_ = list(slice_) subpath, slice_list = slice_split(slice_) slice_list = [0] + slice_list if self.squeeze_dim else slice_list if not subpath: if len(slice_list) > 1: raise ValueError( "Can't slice a dataset with multiple slices without subpath" ) num, ofs = slice_extract_info(slice_list[0], self.num_samples) return DatasetView( dataset=self.dataset, num_samples=num, offset=ofs + self.offset, squeeze_dim=isinstance(slice_list[0], int), lazy=self.lazy, ) elif not slice_list: slice_ = (slice(self.offset, self.offset + self.num_samples) if not self.squeeze_dim else self.offset) if subpath in self.dataset._tensors.keys(): tensorview = TensorView( dataset=self.dataset, subpath=subpath, slice_=slice_, lazy=self.lazy, ) return tensorview if self.lazy else tensorview.compute() for key in self.dataset._tensors.keys(): if subpath.startswith(key): objectview = objv.ObjectView( dataset=self.dataset, subpath=subpath, slice_list=[slice_], lazy=self.lazy, ) return objectview if self.lazy else objectview.compute() return self._get_dictionary(self.dataset, subpath, slice=slice_) else: num, ofs = slice_extract_info(slice_list[0], self.num_samples) slice_list[0] = (ofs + self.offset if isinstance( slice_list[0], int) else slice(ofs + self.offset, ofs + self.offset + num)) schema_obj = self.dataset.schema.dict_[subpath.split("/")[1]] if subpath in self.dataset._tensors.keys() and (not isinstance( schema_obj, objv.Sequence) or len(slice_list) <= 1): tensorview = TensorView( dataset=self.dataset, subpath=subpath, slice_=slice_list, lazy=self.lazy, ) return tensorview if self.lazy else tensorview.compute() for key in self.dataset._tensors.keys(): if subpath.startswith(key): objectview = objv.ObjectView( dataset=self.dataset, subpath=subpath, slice_list=slice_list, lazy=self.lazy, ) return objectview if self.lazy else objectview.compute() if len(slice_list) > 1: raise ValueError("You can't slice a dictionary of Tensors") return self._get_dictionary(subpath, slice_list[0])