def __getitem__(self, slice_): """| Gets a slice or slices from DatasetView | Usage: >>> ds_view = ds[5:15] >>> return ds_view["image", 7, 0:1920, 0:1080, 0:3].compute() # returns numpy array of 12th image """ if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str): slice_ = [slice_] slice_ = list(slice_) subpath, slice_list = slice_split(slice_) slice_list = [0] + slice_list if self.squeeze_dim else slice_list if not subpath: if len(slice_list) > 1: raise ValueError( "Can't slice a dataset with multiple slices without subpath" ) num, ofs = slice_extract_info(slice_list[0], self.num_samples) return DatasetView( dataset=self.dataset, num_samples=num, offset=ofs + self.offset, squeeze_dim=isinstance(slice_list[0], int), ) elif not slice_list: slice_ = slice(self.offset, self.offset + self.num_samples) if subpath in self.dataset._tensors.keys(): return TensorView( dataset=self.dataset, subpath=subpath, slice_=slice_, squeeze_dims=[True] if self.squeeze_dim else [], ) return self._get_dictionary(self.dataset, subpath, slice=slice_) else: num, ofs = slice_extract_info(slice_list[0], self.num_samples) slice_list[0] = ( ofs + self.offset if num == 1 else slice(ofs + self.offset, ofs + self.offset + num) ) if subpath in self.dataset._tensors.keys(): return TensorView( dataset=self.dataset, subpath=subpath, slice_=slice_list, squeeze_dims=[True] if self.squeeze_dim else [], ) if len(slice_list) > 1: raise ValueError("You can't slice a dictionary of Tensors") return self._get_dictionary(subpath, slice_list[0])
def __getitem__(self, slice_): """| Gets a slice or slices from dataset | Usage: >>> return ds["image", 5, 0:1920, 0:1080, 0:3].compute() # returns numpy array >>> images = ds["image"] >>> return images[5].compute() # returns numpy array >>> images = ds["image"] >>> image = images[5] >>> return image[0:1920, 0:1080, 0:3].compute() """ if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str): slice_ = [slice_] slice_ = list(slice_) subpath, slice_list = slice_split(slice_) if not subpath: if len(slice_list) > 1: raise ValueError( "Can't slice a dataset with multiple slices without subpath" ) num, ofs = slice_extract_info(slice_list[0], self.shape[0]) return DatasetView( dataset=self, num_samples=num, offset=ofs, squeeze_dim=isinstance(slice_list[0], int), lazy=self.lazy, ) elif not slice_list: if subpath in self._tensors.keys(): tensorview = TensorView( dataset=self, subpath=subpath, slice_=slice(0, self.shape[0]), lazy=self.lazy, ) if self.lazy: return tensorview else: return tensorview.compute() return self._get_dictionary(subpath) else: num, ofs = slice_extract_info(slice_list[0], self.shape[0]) if subpath in self._tensors.keys(): tensorview = TensorView(dataset=self, subpath=subpath, slice_=slice_list, lazy=self.lazy) if self.lazy: return tensorview else: return tensorview.compute() if len(slice_list) > 1: raise ValueError("You can't slice a dictionary of Tensors") return self._get_dictionary(subpath, slice_list[0])
def __getitem__(self, slice_): """| Get an item to be computed without iterating on the whole dataset. | Creates a dataset view, then a temporary dataset to apply the transform. Parameters: ---------- slice_: slice Gets a slice or slices from dataset """ if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str): slice_ = [slice_] slice_ = list(slice_) subpath, slice_list = slice_split(slice_) if len(slice_list) == 0: slice_list = [slice(None, None, None)] num, ofs = slice_extract_info(slice_list[0], self.shape[0]) ds_view = DatasetView( dataset=self._ds, num_samples=num, offset=ofs, squeeze_dim=isinstance(slice_list[0], int), ) path = posixpath.expanduser("~/.activeloop/tmparray") new_ds = self.store(path, length=num, ds=ds_view, progressbar=False) index = 1 if len(slice_) > 1 else 0 slice_[index] = (slice(None, None, None) if not isinstance(slice_list[0], int) else 0 ) # Get all shape dimension since we already sliced return new_ds[slice_]
def __setitem__(self, slice_, value): """| Sets a slice or slices with a value | Usage: >>> ds_view = ds[5:15] >>> ds_view["image", 3, 0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8") # sets the 8th image """ # handling strings and bytes assign_value = value assign_value = str_to_int(assign_value, self.dataset.tokenizer) if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str): slice_ = [slice_] slice_ = list(slice_) subpath, slice_list = slice_split(slice_) slice_list = [0] + slice_list if self.squeeze_dim else slice_list if not subpath: raise ValueError("Can't assign to dataset sliced without subpath") elif not slice_list: slice_ = (self.offset if self.num_samples == 1 else slice( self.offset, self.offset + self.num_samples)) self.dataset._tensors[subpath][ slice_] = assign_value # Add path check else: num, ofs = (slice_extract_info(slice_list[0], self.num_samples) if isinstance(slice_list[0], slice) else (1, slice_list[0])) slice_list[0] = (slice(ofs + self.offset, ofs + self.offset + num) if num > 1 else ofs + self.offset) self.dataset._tensors[subpath][slice_list] = assign_value
def test_dataset_utils(): with pytest.raises(TypeError): slice_split([5.3]) with pytest.raises(IndexError): slice_extract_info(5, 3) with pytest.raises(ValueError): slice_extract_info(slice(2, 10, -2), 3) with pytest.raises(IndexError): slice_extract_info(slice(20, 100), 3) with pytest.raises(IndexError): slice_extract_info(slice(1, 20), 3) with pytest.raises(IndexError): slice_extract_info(slice(4, 1), 10) slice_extract_info(slice(None, 10), 20) slice_extract_info(slice(20, None), 50)
def __getitem__(self, slice_): """| Gets a slice from an objectview""" if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str): slice_ = [slice_] slice_ = list(slice_) subpath, slice_list = slice_split(slice_) dataset = self.dataset nums, offsets, squeeze_dims, inner_schema_obj = ( self.nums.copy(), self.offsets.copy(), self.squeeze_dims.copy(), self.inner_schema_obj, ) if subpath: inner_schema_obj, nums, offsets, squeeze_dims = self.process_path( subpath, inner_schema_obj, nums, offsets, squeeze_dims) subpath = self.subpath + subpath if len(slice_list) >= 1: # Slice first dim if isinstance(self.dataset, DatasetView) and not self.dataset.squeeze_dim: dataset = self.dataset[slice_list[0]] slice_list = slice_list[1:] elif not isinstance(self.dataset, DatasetView): num, ofs = slice_extract_info(slice_list[0], self.dataset.shape[0]) dataset = DatasetView(self.dataset, num, ofs, isinstance(slice_list[0], int)) slice_list = slice_list[1:] # Expand slice list for rest of dims if len(slice_list) >= 1: exp_slice_list = [] for squeeze in squeeze_dims: if squeeze: exp_slice_list += [None] else: if len(slice_list) > 0: exp_slice_list += [slice_list.pop(0)] else: # slice list smaller than max exp_slice_list += [None] if len(slice_list) > 0: # slice list longer than max raise IndexError("Too many indices") for i, it in enumerate(exp_slice_list): if it is not None: num, ofs = slice_extract_info(it, nums[i]) nums[i] = num offsets[i] += ofs squeeze_dims[i] = num == 1 objectview = ObjectView( dataset=dataset, subpath=subpath, slice_list=None, nums=nums, offsets=offsets, squeeze_dims=squeeze_dims, inner_schema_obj=inner_schema_obj, lazy=self.lazy, new=False, ) return objectview if self.lazy else objectview.compute()
def __init__( self, dataset, subpath=None, slice_list=None, nums=[], offsets=[], squeeze_dims=[], inner_schema_obj=None, lazy=True, new=True, ): """Creates an ObjectView object for dataset from a Dataset, DatasetView or TensorView object, or creates a different ObjectView from an existing one Parameters ---------- These parameters are used to create a new ObjectView. dataset: hub.api.dataset.Dataset object The dataset whose ObjectView is being created, or its DatasetView subpath: str (optional) A potentially incomplete path to any element in the Dataset slice_list: optional The `slice_` of this Tensor that needs to be accessed lazy: bool, optional Setting this to False will stop lazy computation and will allow items to be accessed without .compute() These parameters are also needed to create an ObjectView from an existing one. nums: List[int] Number of elements in each dimension of the ObjectView to be created offsets: List[int] Starting element in each dimension of the ObjectView to be created squeeze_dims: List[bool] Whether each dimension can be squeezed or not inner_schema_obj: Child of hub.schema.Tensor or hub.schema.SchemaDict The deepest element in the schema upto which the previous ObjectView had been processed new: bool Whether to create a new ObjectView object from a Dataset, DatasetView or TensorView or create a different ObjectView from an existing one """ self.dataset = dataset self.schema = (dataset.schema.dict_ if not isinstance(dataset, DatasetView) else dataset.dataset.schema.dict_) self.subpath = subpath self.nums = nums self.offsets = offsets self.squeeze_dims = squeeze_dims self.inner_schema_obj = inner_schema_obj self.lazy = lazy if new: # Creating new obj if self.subpath: ( self.inner_schema_obj, self.nums, self.offsets, self.squeeze_dims, ) = self.process_path( self.subpath, self.inner_schema_obj, self.nums.copy(), self.offsets.copy(), self.squeeze_dims.copy(), ) # Check if dataset view needs to be made if slice_list and len(slice_list) >= 1: num, ofs = slice_extract_info(slice_list[0], dataset.shape[0]) self.dataset = DatasetView(dataset, num, ofs, isinstance(slice_list[0], int)) if slice_list and len(slice_list) > 1: slice_list = slice_list[1:] if len(slice_list) > len(self.nums): raise IndexError("Too many indices") for i, it in enumerate(slice_list): num, ofs = slice_extract_info(it, self.nums[i]) self.nums[i] = num self.offsets[i] += ofs self.squeeze_dims[i] = num == 1
def __getitem__(self, slice_): """| Gets a slice or slices from DatasetView | Usage: >>> ds_view = ds[5:15] >>> return ds_view["image", 7, 0:1920, 0:1080, 0:3].compute() # returns numpy array of 12th image """ if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str): slice_ = [slice_] slice_ = list(slice_) subpath, slice_list = slice_split(slice_) slice_list = [0] + slice_list if self.squeeze_dim else slice_list if not subpath: if len(slice_list) > 1: raise ValueError( "Can't slice a dataset with multiple slices without subpath" ) num, ofs = slice_extract_info(slice_list[0], self.num_samples) return DatasetView( dataset=self.dataset, num_samples=num, offset=ofs + self.offset, squeeze_dim=isinstance(slice_list[0], int), lazy=self.lazy, ) elif not slice_list: slice_ = ( slice(self.offset, self.offset + self.num_samples) if not self.squeeze_dim else self.offset ) if subpath in self.dataset._tensors.keys(): tensorview = TensorView( dataset=self.dataset, subpath=subpath, slice_=slice_, lazy=self.lazy, ) return tensorview if self.lazy else tensorview.compute() for key in self.dataset._tensors.keys(): if subpath.startswith(key): objectview = ObjectView( dataset=self.dataset, subpath=subpath, slice_list=[slice_], lazy=self.lazy, ) return objectview if self.lazy else objectview.compute() return self._get_dictionary(self.dataset, subpath, slice=slice_) else: num, ofs = slice_extract_info(slice_list[0], self.num_samples) slice_list[0] = ( ofs + self.offset if isinstance(slice_list[0], int) else slice(ofs + self.offset, ofs + self.offset + num) ) schema_obj = self.dataset.schema.dict_[subpath.split("/")[1]] if subpath in self.dataset._tensors.keys() and ( not isinstance(schema_obj, Sequence) or len(slice_list) <= 1 ): tensorview = TensorView( dataset=self.dataset, subpath=subpath, slice_=slice_list, lazy=self.lazy, ) return tensorview if self.lazy else tensorview.compute() for key in self.dataset._tensors.keys(): if subpath.startswith(key): objectview = ObjectView( dataset=self.dataset, subpath=subpath, slice_list=slice_list, lazy=self.lazy, ) return objectview if self.lazy else objectview.compute() if len(slice_list) > 1: raise ValueError("You can't slice a dictionary of Tensors") return self._get_dictionary(subpath, slice_list[0])
def __getitem__(self, slice_): """| Gets a slice from an objectview""" if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str): slice_ = [slice_] slice_ = list(slice_) subpath, slice_list = slice_split(slice_) nums, offsets, squeeze_dims, inner_schema_obj = ( self.nums.copy(), self.offsets.copy(), self.squeeze_dims.copy(), self.inner_schema_obj, ) if subpath: inner_schema_obj, nums, offsets, squeeze_dims = self.process_path( subpath, inner_schema_obj, nums, offsets, squeeze_dims) subpath = self.subpath + subpath new_indexes = self.indexes if len(slice_list) >= 1: if isinstance(self.indexes, list): new_indexes = self.indexes[slice_list[0]] if self.is_contiguous and new_indexes: new_indexes = slice(new_indexes[0], new_indexes[-1] + 1) slice_list = slice_list[1:] elif isinstance(self.indexes, slice): ofs = self.indexes.start or 0 num = self.indexes.stop - ofs if self.indexes.stop else None num, ofs_temp = slice_extract_info(slice_list[0], num) new_indexes = (ofs + ofs_temp if isinstance( slice_list[0], int) else slice(ofs + ofs_temp, ofs + ofs_temp + num)) slice_list = slice_list[1:] if len(slice_list) >= 1: # Expand slice list exp_slice_list = [] for squeeze in squeeze_dims: if squeeze: exp_slice_list += [None] else: if len(slice_list) > 0: exp_slice_list += [slice_list.pop(0)] else: # slice list smaller than max exp_slice_list += [None] if len(slice_list) > 0: # slice list longer than max raise IndexError("Too many indices") for i, it in enumerate(exp_slice_list): if it is not None: num, ofs = slice_extract_info(it, nums[i]) nums[i] = num offsets[i] += ofs squeeze_dims[i] = isinstance(it, int) objectview = ObjectView( dataset=self.dataset, subpath=subpath, slice_=None, indexes=new_indexes, nums=nums, offsets=offsets, squeeze_dims=squeeze_dims, inner_schema_obj=inner_schema_obj, lazy=self.lazy, check_bounds=False, ) return objectview if self.lazy else objectview.compute()