Esempio n. 1
0
def test_dataset_utils():
    with pytest.raises(TypeError):
        slice_split([5.3])
    with pytest.raises(IndexError):
        slice_extract_info(5, 3)
    with pytest.raises(ValueError):
        slice_extract_info(slice(2, 10, -2), 3)
    with pytest.raises(IndexError):
        slice_extract_info(slice(20, 100), 3)
    with pytest.raises(IndexError):
        slice_extract_info(slice(1, 20), 3)
    with pytest.raises(IndexError):
        slice_extract_info(slice(4, 1), 10)
    slice_extract_info(slice(None, 10), 20)
    slice_extract_info(slice(20, None), 50)
Esempio n. 2
0
 def __setitem__(self, slice_, value) -> None:
     if not isinstance(slice_, Iterable) or isinstance(slice_, str):
         slice_ = [slice_]
     slice_ = list(slice_)
     subpath, slice_list = slice_split(slice_)
     slice_list = slice_list or [slice(0, self.num_samples)]
     if isinstance(slice_list[0], int):
         # if integer it assigns the data to the corresponding dataset
         slice_list, shard_id = self.slicing(slice_list)
         slice_ = slice_list + [subpath] if subpath else slice_list
         self.datasets[shard_id][slice_] = value
     else:
         # if slice it finds all the corresponding datasets and assigns slices of the value one by one
         cur_index = slice_list[0].start or 0
         cur_index = cur_index + self.num_samples if cur_index < 0 else cur_index
         cur_index = max(cur_index, 0)
         start_index = cur_index
         stop_index = slice_list[0].stop or self.num_samples
         stop_index = min(stop_index, self.num_samples)
         while cur_index < stop_index:
             shard_id, offset = self.identify_shard(cur_index)
             end_index = min(offset + len(self.datasets[shard_id]), stop_index)
             cur_slice_list = [
                 slice(cur_index - offset, end_index - offset)
             ] + slice_list[1:]
             current_slice = (
                 cur_slice_list + [subpath] if subpath else cur_slice_list
             )
             self.datasets[shard_id][current_slice] = value[
                 cur_index - start_index : end_index - start_index
             ]
             cur_index = end_index
Esempio n. 3
0
    def __setitem__(self, slice_, value):
        """| Sets a slice or slices with a value
        | Usage:

        >>> ds_view = ds[5:15]
        >>> ds_view["image", 3, 0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8") # sets the 8th image
        """
        # handling strings and bytes
        assign_value = value
        assign_value = str_to_int(assign_value, self.dataset.tokenizer)

        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)
        slice_list = [0] + slice_list if self.squeeze_dim else slice_list
        if not subpath:
            raise ValueError("Can't assign to dataset sliced without subpath")
        elif not slice_list:
            slice_ = (self.offset if self.num_samples == 1 else slice(
                self.offset, self.offset + self.num_samples))
            self.dataset._tensors[subpath][
                slice_] = assign_value  # Add path check
        else:
            num, ofs = (slice_extract_info(slice_list[0], self.num_samples)
                        if isinstance(slice_list[0], slice) else
                        (1, slice_list[0]))
            slice_list[0] = (slice(ofs + self.offset, ofs + self.offset +
                                   num) if num > 1 else ofs + self.offset)
            self.dataset._tensors[subpath][slice_list] = assign_value
Esempio n. 4
0
    def __getitem__(self, slice_):
        """| Gets a slice or slices from tensorview
        | Usage:

        >>> images_tensorview = ds["image"]
        >>> return images_tensorview[7, 0:1920, 0:1080, 0:3].compute() # returns numpy array of 7th image
        """
        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        slice_ = self.slice_fill(slice_)
        subpath, slice_list = slice_split(slice_)

        if subpath:
            raise ValueError("Can't slice a Tensor with string")
        else:
            new_nums = self.nums.copy()
            new_offsets = self.offsets.copy()
            if len(new_nums) < len(slice_list):
                new_nums.extend([None] * (len(slice_list) - len(new_nums)))
                new_offsets.extend([0] * (len(slice_list) - len(new_offsets)))
            for i in range(len(slice_list)):
                slice_list[i] = self._combine(slice_list[i], new_nums[i],
                                              new_offsets[i])
            for i in range(len(slice_list), len(new_nums)):
                cur_slice = (slice(new_offsets[i], new_offsets[i] +
                                   new_nums[i])
                             if new_nums[i] > 1 else new_offsets[i])
                slice_list.append(cur_slice)
            return TensorView(dataset=self.dataset,
                              subpath=self.subpath,
                              slice_=slice_list)
Esempio n. 5
0
 def __getitem__(self, slice_):
     if not isinstance(slice_, Iterable) or isinstance(slice_, str):
         slice_ = [slice_]
     slice_ = list(slice_)
     subpath, slice_list = slice_split(slice_)
     slice_list = slice_list or [slice(0, self.num_samples)]
     if isinstance(slice_list[0], int):
         # if integer it fetches the data from the corresponding dataset
         slice_list, shard_id = self.slicing(slice_list)
         slice_ = slice_list + [subpath] if subpath else slice_list
         return self.datasets[shard_id][slice_]
     else:
         # if slice it finds all the corresponding datasets included in the slice and generates tensorviews or datasetviews (depending on slice)
         # these views are stored in a ComputeList, calling compute on which will fetch data from all corresponding datasets and return a single result
         results = []
         cur_index = slice_list[0].start or 0
         cur_index = cur_index + self.num_samples if cur_index < 0 else cur_index
         cur_index = max(cur_index, 0)
         stop_index = slice_list[0].stop or self.num_samples
         stop_index = min(stop_index, self.num_samples)
         while cur_index < stop_index:
             shard_id, offset = self.identify_shard(cur_index)
             end_index = min(offset + len(self.datasets[shard_id]), stop_index)
             cur_slice_list = [
                 slice(cur_index - offset, end_index - offset)
             ] + slice_list[1:]
             current_slice = (
                 cur_slice_list + [subpath] if subpath else cur_slice_list
             )
             results.append(self.datasets[shard_id][current_slice])
             cur_index = end_index
         return ComputeList(results)
Esempio n. 6
0
    def __setitem__(self, slice_, value):
        """| Sets a slice or slices with a value
        | Usage:
        >>> ds["image", 5, 0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8")
        >>> images = ds["image"]
        >>> image = images[5]
        >>> image[0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8")
        """
        assign_value = get_value(value)
        # handling strings and bytes
        assign_value = str_to_int(assign_value, self.tokenizer)

        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)

        if not subpath:
            raise ValueError("Can't assign to dataset sliced without subpath")
        elif subpath not in self.keys:
            raise KeyError(f"Key {subpath} not found in the dataset")

        if not slice_list:
            self._tensors[subpath][:] = assign_value
        else:
            self._tensors[subpath][slice_list] = assign_value
Esempio n. 7
0
    def __getitem__(self, slice_):
        """| Get an item to be computed without iterating on the whole dataset.
        | Creates a dataset view, then a temporary dataset to apply the transform.
        Parameters:
        ----------
        slice_: slice
            Gets a slice or slices from dataset
        """
        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]

        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)

        if len(slice_list) == 0:
            slice_list = [slice(None, None, None)]

        num, ofs = slice_extract_info(slice_list[0], self.shape[0])

        ds_view = DatasetView(
            dataset=self._ds,
            num_samples=num,
            offset=ofs,
            squeeze_dim=isinstance(slice_list[0], int),
        )

        path = posixpath.expanduser("~/.activeloop/tmparray")
        new_ds = self.store(path, length=num, ds=ds_view, progressbar=False)

        index = 1 if len(slice_) > 1 else 0
        slice_[index] = (slice(None, None, None)
                         if not isinstance(slice_list[0], int) else 0
                         )  # Get all shape dimension since we already sliced
        return new_ds[slice_]
Esempio n. 8
0
    def __setitem__(self, slice_, value):
        """| Sets a slice or slices with a value
        | Usage:

        >>> images_tensorview = ds["image"]
        >>> images_tensorview[7, 0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8") # sets 7th image
        """
        # handling strings and bytes
        assign_value = value
        assign_value = str_to_int(assign_value, self.dataset.tokenizer)

        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        slice_ = self.slice_fill(slice_)
        subpath, slice_list = slice_split(slice_)

        if subpath:
            raise ValueError(
                "Can't slice a Tensor with multiple slices without subpath")
        else:
            new_nums = self.nums.copy()
            new_offsets = self.offsets.copy()
            if len(new_nums) < len(slice_list):
                new_nums.extend([None] * (len(slice_list) - len(new_nums)))
                new_offsets.extend([0] * (len(slice_list) - len(new_offsets)))
            for i in range(len(slice_list)):
                slice_list[i] = self._combine(slice_[i], new_nums[i],
                                              new_offsets[i])
            for i in range(len(slice_list), len(new_nums)):
                cur_slice = (slice(new_offsets[i], new_offsets[i] +
                                   new_nums[i])
                             if new_nums[i] > 1 else new_offsets[i])
                slice_list.append(cur_slice)
            self.dataset._tensors[self.subpath][slice_list] = value
Esempio n. 9
0
    def __getitem__(self, slice_):
        """| Gets a slice or slices from tensorview
        | Usage:

        >>> images_tensorview = ds["image"]
        >>> return images_tensorview[7, 0:1920, 0:1080, 0:3].compute() # returns numpy array of 7th image
        """
        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        slice_ = self.slice_fill(slice_)
        subpath, slice_list = slice_split(slice_)
        new_nums = self.nums.copy()
        new_offsets = self.offsets.copy()
        if isinstance(self.indexes, list):
            new_indexes = self.indexes[slice_list[0]]
            if self.is_contiguous and new_indexes:
                new_indexes = slice(new_indexes[0], new_indexes[-1] + 1)
        elif isinstance(self.indexes, int):
            new_indexes = self.indexes
        else:
            ofs = self.indexes.start or 0
            num = self.indexes.stop - ofs if self.indexes.stop else None
            new_indexes = self._combine(slice_list[0], num, ofs)
        slice_list[0] = new_indexes
        # new_indexes = self.indexes[]
        if len(new_nums) < len(slice_list):
            new_nums.extend([None] * (len(slice_list) - len(new_nums)))
            new_offsets.extend([0] * (len(slice_list) - len(new_offsets)))
        for i in range(1, len(slice_list)):
            slice_list[i] = self._combine(slice_list[i], new_nums[i], new_offsets[i])
        for i in range(len(slice_list), len(new_nums)):
            cur_slice = (
                slice(new_offsets[i], new_offsets[i] + new_nums[i])
                if not self.squeeze_dims[i]
                else new_offsets[i]
            )
            slice_list.append(cur_slice)
        if subpath or (
            len(slice_list) > len(self.nums) and isinstance(self.dtype, objv.Sequence)
        ):
            objectview = objv.ObjectView(
                dataset=self.dataset,
                subpath=self.subpath + subpath,
                slice_=slice_list,
                lazy=self.lazy,
            )
            return objectview if self.lazy else objectview.compute()
        else:
            tensorview = TensorView(
                dataset=self.dataset,
                subpath=self.subpath,
                slice_=slice_list,
                lazy=self.lazy,
            )
            return tensorview if self.lazy else tensorview.compute()
Esempio n. 10
0
    def __getitem__(self, slice_):
        """| Gets a slice or slices from DatasetView
        | Usage:

        >>> ds_view = ds[5:15]
        >>> return ds_view["image", 7, 0:1920, 0:1080, 0:3].compute() # returns numpy array of 12th image
        """
        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]

        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)

        slice_list = [0] + slice_list if self.squeeze_dim else slice_list

        if not subpath:
            if len(slice_list) > 1:
                raise ValueError(
                    "Can't slice a dataset with multiple slices without subpath"
                )
            num, ofs = slice_extract_info(slice_list[0], self.num_samples)
            return DatasetView(
                dataset=self.dataset,
                num_samples=num,
                offset=ofs + self.offset,
                squeeze_dim=isinstance(slice_list[0], int),
            )
        elif not slice_list:
            slice_ = slice(self.offset, self.offset + self.num_samples)
            if subpath in self.dataset._tensors.keys():
                return TensorView(
                    dataset=self.dataset,
                    subpath=subpath,
                    slice_=slice_,
                    squeeze_dims=[True] if self.squeeze_dim else [],
                )
            return self._get_dictionary(self.dataset, subpath, slice=slice_)
        else:
            num, ofs = slice_extract_info(slice_list[0], self.num_samples)
            slice_list[0] = (
                ofs + self.offset
                if num == 1
                else slice(ofs + self.offset, ofs + self.offset + num)
            )
            if subpath in self.dataset._tensors.keys():
                return TensorView(
                    dataset=self.dataset,
                    subpath=subpath,
                    slice_=slice_list,
                    squeeze_dims=[True] if self.squeeze_dim else [],
                )
            if len(slice_list) > 1:
                raise ValueError("You can't slice a dictionary of Tensors")
            return self._get_dictionary(subpath, slice_list[0])
Esempio n. 11
0
 def __getitem__(self, slice_):
     """| Gets a slice or slices from dataset
     | Usage:
     >>> return ds["image", 5, 0:1920, 0:1080, 0:3].compute() # returns numpy array
     >>> images = ds["image"]
     >>> return images[5].compute() # returns numpy array
     >>> images = ds["image"]
     >>> image = images[5]
     >>> return image[0:1920, 0:1080, 0:3].compute()
     """
     if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
         slice_ = [slice_]
     slice_ = list(slice_)
     subpath, slice_list = slice_split(slice_)
     if not subpath:
         if len(slice_list) > 1:
             raise ValueError(
                 "Can't slice a dataset with multiple slices without subpath"
             )
         num, ofs = slice_extract_info(slice_list[0], self.shape[0])
         return DatasetView(
             dataset=self,
             num_samples=num,
             offset=ofs,
             squeeze_dim=isinstance(slice_list[0], int),
             lazy=self.lazy,
         )
     elif not slice_list:
         if subpath in self._tensors.keys():
             tensorview = TensorView(
                 dataset=self,
                 subpath=subpath,
                 slice_=slice(0, self.shape[0]),
                 lazy=self.lazy,
             )
             if self.lazy:
                 return tensorview
             else:
                 return tensorview.compute()
         return self._get_dictionary(subpath)
     else:
         num, ofs = slice_extract_info(slice_list[0], self.shape[0])
         if subpath in self._tensors.keys():
             tensorview = TensorView(dataset=self,
                                     subpath=subpath,
                                     slice_=slice_list,
                                     lazy=self.lazy)
             if self.lazy:
                 return tensorview
             else:
                 return tensorview.compute()
         if len(slice_list) > 1:
             raise ValueError("You can't slice a dictionary of Tensors")
         return self._get_dictionary(subpath, slice_list[0])
Esempio n. 12
0
    def __setitem__(self, slice_, value):
        """| Sets a slice or slices with a value
        | Usage:

        >>> ds_view = ds[5:15]
        >>> ds_view["image", 3, 0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8") # sets the 8th image
        """
        self.dataset._auto_checkout()
        assign_value = get_value(value)
        assign_value = str_to_int(
            assign_value, self.dataset.tokenizer)  # handling strings and bytes

        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)
        slice_list = [0] + slice_list if isinstance(self.indexes,
                                                    int) else slice_list

        if not subpath:
            raise ValueError("Can't assign to dataset sliced without key")
        elif subpath not in self.keys:
            raise KeyError(f"Key {subpath} not found in dataset")

        if not slice_list:
            slice_ = (slice(self.indexes[0], self.indexes[-1] +
                            1) if self.is_contiguous else self.indexes)
            if not isinstance(slice_, list):
                self.dataset._tensors[subpath][slice_] = assign_value
            else:
                for i, index in enumerate(slice_):
                    self.dataset._tensors[subpath][index] = assign_value[i]
        else:
            if isinstance(self.indexes, list):
                indexes = self.indexes[slice_list[0]]
                if self.is_contiguous and isinstance(indexes,
                                                     list) and indexes:
                    slice_list[0] = slice(indexes[0], indexes[-1] + 1)
                else:
                    slice_list[0] = indexes
            else:
                slice_list[0] = self.indexes

            if not isinstance(slice_list[0], list):
                self.dataset._tensors[subpath][slice_list] = assign_value
            else:
                for i, index in enumerate(slice_list[0]):
                    current_slice = [index] + slice_list[1:]
                    self.dataset._tensors[subpath][
                        current_slice] = assign_value[i]
Esempio n. 13
0
    def __setitem__(self, slice_, value):
        """| Sets a slice or slices with a value
        | Usage:

        >>> images_tensorview = ds["image"]
        >>> images_tensorview[7, 0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8") # sets 7th image
        """
        assign_value = get_value(value)
        # handling strings and bytes
        assign_value = str_to_int(assign_value, self.dataset.tokenizer)

        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        slice_ = self.slice_fill(slice_)
        subpath, slice_list = slice_split(slice_)
        if subpath:
            raise ValueError("Can't setitem of TensorView with subpath")
        new_nums = self.nums.copy()
        new_offsets = self.offsets.copy()
        if isinstance(self.indexes, list):
            new_indexes = self.indexes[slice_list[0]]
            if self.is_contiguous and new_indexes:
                new_indexes = slice(new_indexes[0], new_indexes[-1] + 1)
        elif isinstance(self.indexes, int):
            new_indexes = self.indexes
        else:
            ofs = self.indexes.start or 0
            num = self.indexes.stop - ofs if self.indexes.stop else None
            new_indexes = self._combine(slice_list[0], num, ofs)
        slice_list[0] = new_indexes
        if len(new_nums) < len(slice_list):
            new_nums.extend([None] * (len(slice_list) - len(new_nums)))
            new_offsets.extend([0] * (len(slice_list) - len(new_offsets)))
        for i in range(1, len(slice_list)):
            slice_list[i] = self._combine(slice_list[i], new_nums[i], new_offsets[i])
        for i in range(len(slice_list), len(new_nums)):
            cur_slice = (
                slice(new_offsets[i], new_offsets[i] + new_nums[i])
                if not self.squeeze_dims[i]
                else new_offsets[i]
            )
            slice_list.append(cur_slice)

        if isinstance(slice_list[0], (int, slice)):
            self.dataset._tensors[self.subpath][slice_list] = assign_value
        else:
            for i, index in enumerate(slice_list[0]):
                current_slice = [index] + slice_list[1:]
                self.dataset._tensors[subpath][current_slice] = assign_value[i]
Esempio n. 14
0
    def __getitem__(self, slice_):
        """| Gets a slice or slices from tensorview
        | Usage:

        >>> images_tensorview = ds["image"]
        >>> return images_tensorview[7, 0:1920, 0:1080, 0:3].compute() # returns numpy array of 7th image
        """
        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        slice_ = self.slice_fill(slice_)
        subpath, slice_list = slice_split(slice_)

        new_nums = self.nums.copy()
        new_offsets = self.offsets.copy()
        if len(new_nums) < len(slice_list):
            new_nums.extend([None] * (len(slice_list) - len(new_nums)))
            new_offsets.extend([0] * (len(slice_list) - len(new_offsets)))
        for i in range(len(slice_list)):
            slice_list[i] = self._combine(slice_list[i], new_nums[i], new_offsets[i])
        for i in range(len(slice_list), len(new_nums)):
            cur_slice = (
                slice(new_offsets[i], new_offsets[i] + new_nums[i])
                if new_nums[i] > 1
                else new_offsets[i]
            )
            slice_list.append(cur_slice)
        if subpath or (
            len(slice_list) > len(self.nums) and isinstance(self.dtype, Sequence)
        ):
            objectview = ObjectView(
                dataset=self.dataset,
                subpath=self.subpath + subpath,
                slice_list=slice_list,
                lazy=self.lazy,
            )
            return objectview if self.lazy else objectview.compute()
        else:
            tensorview = TensorView(
                dataset=self.dataset,
                subpath=self.subpath,
                slice_=slice_list,
                lazy=self.lazy,
            )
            return tensorview if self.lazy else tensorview.compute()
Esempio n. 15
0
    def __setitem__(self, slice_, value):
        """| Sets a slice or slices with a value
        | Usage:
        >>> ds["image", 5, 0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8")
        >>> images = ds["image"]
        >>> image = images[5]
        >>> image[0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8")
        """
        if "r" in self._mode:
            raise ReadModeException("__setitem__")
        self._auto_checkout()

        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)

        if not subpath:
            raise ValueError("Can't assign to dataset sliced without subpath")
        elif subpath not in self.keys:
            raise KeyError(f"Key {subpath} not found in the dataset")

        assign_value = get_value(value)
        schema_dict = self.schema
        if subpath[1:] in schema_dict.dict_.keys():
            schema_key = schema_dict.dict_.get(subpath[1:], None)
        else:
            for schema_key in subpath[1:].split("/"):
                schema_dict = schema_dict.dict_.get(schema_key, None)
                if not isinstance(schema_dict, SchemaDict):
                    schema_key = schema_dict
        if isinstance(schema_key, ClassLabel):
            assign_value = check_class_label(assign_value, schema_key)
        if isinstance(
                schema_key,
            (Text, bytes)) or (isinstance(assign_value, Iterable) and any(
                isinstance(val, str) for val in assign_value)):
            # handling strings and bytes
            assign_value = str_to_int(assign_value, self.tokenizer)

        if not slice_list:
            self._tensors[subpath][:] = assign_value
        else:
            self._tensors[subpath][slice_list] = assign_value
Esempio n. 16
0
    def __setitem__(self, slice_, value):
        """| Sets a slice or slices with a value
        | Usage:

        >>> images_tensorview = ds["image"]
        >>> images_tensorview[7, 0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8") # sets 7th image
        """
        assign_value = get_value(value)
        # handling strings and bytes
        assign_value = str_to_int(assign_value, self.dataset.tokenizer)

        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        slice_ = self.slice_fill(slice_)
        subpath, slice_list = slice_split(slice_)
        new_nums = self.nums.copy()
        new_offsets = self.offsets.copy()
        if len(new_nums) < len(slice_list):
            new_nums.extend([None] * (len(slice_list) - len(new_nums)))
            new_offsets.extend([0] * (len(slice_list) - len(new_offsets)))
        for i in range(len(slice_list)):
            slice_list[i] = self._combine(slice_list[i], new_nums[i], new_offsets[i])
        for i in range(len(slice_list), len(new_nums)):
            cur_slice = (
                slice(new_offsets[i], new_offsets[i] + new_nums[i])
                if new_nums[i] > 1
                else new_offsets[i]
            )
            slice_list.append(cur_slice)
        if subpath or (
            len(slice_list) > len(self.nums) and isinstance(self.dtype, Sequence)
        ):
            ObjectView(
                dataset=self.dataset,
                subpath=self.subpath + subpath,
                slice_list=slice_list,
            )[:] = assign_value
        else:
            self.dataset._tensors[self.subpath][slice_list] = assign_value
Esempio n. 17
0
    def __getitem__(self, slice_):
        """| Gets a slice from an objectview"""
        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)

        nums, offsets, squeeze_dims, inner_schema_obj = (
            self.nums.copy(),
            self.offsets.copy(),
            self.squeeze_dims.copy(),
            self.inner_schema_obj,
        )

        if subpath:
            inner_schema_obj, nums, offsets, squeeze_dims = self.process_path(
                subpath, inner_schema_obj, nums, offsets, squeeze_dims)
        subpath = self.subpath + subpath

        new_indexes = self.indexes
        if len(slice_list) >= 1:
            if isinstance(self.indexes, list):
                new_indexes = self.indexes[slice_list[0]]
                if self.is_contiguous and new_indexes:
                    new_indexes = slice(new_indexes[0], new_indexes[-1] + 1)
                slice_list = slice_list[1:]
            elif isinstance(self.indexes, slice):
                ofs = self.indexes.start or 0
                num = self.indexes.stop - ofs if self.indexes.stop else None
                num, ofs_temp = slice_extract_info(slice_list[0], num)
                new_indexes = (ofs + ofs_temp if isinstance(
                    slice_list[0], int) else slice(ofs + ofs_temp, ofs +
                                                   ofs_temp + num))
                slice_list = slice_list[1:]

        if len(slice_list) >= 1:
            # Expand slice list
            exp_slice_list = []
            for squeeze in squeeze_dims:
                if squeeze:
                    exp_slice_list += [None]
                else:
                    if len(slice_list) > 0:
                        exp_slice_list += [slice_list.pop(0)]
                    else:
                        # slice list smaller than max
                        exp_slice_list += [None]
            if len(slice_list) > 0:
                # slice list longer than max
                raise IndexError("Too many indices")
            for i, it in enumerate(exp_slice_list):
                if it is not None:
                    num, ofs = slice_extract_info(it, nums[i])
                    nums[i] = num
                    offsets[i] += ofs
                    squeeze_dims[i] = isinstance(it, int)

        objectview = ObjectView(
            dataset=self.dataset,
            subpath=subpath,
            slice_=None,
            indexes=new_indexes,
            nums=nums,
            offsets=offsets,
            squeeze_dims=squeeze_dims,
            inner_schema_obj=inner_schema_obj,
            lazy=self.lazy,
            check_bounds=False,
        )
        return objectview if self.lazy else objectview.compute()
Esempio n. 18
0
    def __getitem__(self, slice_):
        """| Gets a slice or slices from DatasetView
        | Usage:

        >>> ds_view = ds[5:15]
        >>> return ds_view["image", 7, 0:1920, 0:1080, 0:3].compute() # returns numpy array of 12th image
        """
        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)
        slice_list = [0] + slice_list if isinstance(self.indexes,
                                                    int) else slice_list
        if not subpath:
            if len(slice_list) > 1:
                raise ValueError(
                    "Can't slice dataset with multiple slices without key")
            indexes = self.indexes[slice_list[0]]
            return DatasetView(dataset=self.dataset,
                               lazy=self.lazy,
                               indexes=indexes)
        elif not slice_list:
            slice_ = ([slice(self.indexes[0], self.indexes[-1] +
                             1)] if self.is_contiguous else [self.indexes])
            if subpath in self.keys:
                tensorview = TensorView(
                    dataset=self.dataset,
                    subpath=subpath,
                    slice_=slice_,
                    lazy=self.lazy,
                )
                return tensorview if self.lazy else tensorview.compute()
            for key in self.keys:
                if subpath.startswith(key):
                    objectview = ObjectView(
                        dataset=self.dataset,
                        subpath=subpath,
                        slice_=slice_,
                        lazy=self.lazy,
                    )
                    return objectview if self.lazy else objectview.compute()
            return self._get_dictionary(subpath, slice_)
        else:
            if isinstance(self.indexes, list):
                indexes = self.indexes[slice_list[0]]
                if self.is_contiguous and isinstance(indexes,
                                                     list) and indexes:
                    indexes = slice(indexes[0], indexes[-1] + 1)
            else:
                indexes = self.indexes
            slice_list[0] = indexes
            schema_obj = self.dataset.schema.dict_[subpath.split("/")[1]]

            if subpath in self.keys and (not isinstance(schema_obj, Sequence)
                                         or len(slice_list) <= 1):
                tensorview = TensorView(
                    dataset=self.dataset,
                    subpath=subpath,
                    slice_=slice_list,
                    lazy=self.lazy,
                )
                return tensorview if self.lazy else tensorview.compute()
            for key in self.keys:
                if subpath.startswith(key):
                    objectview = ObjectView(
                        dataset=self.dataset,
                        subpath=subpath,
                        slice_=slice_list,
                        lazy=self.lazy,
                    )
                    return objectview if self.lazy else objectview.compute()
            if len(slice_list) > 1:
                raise ValueError("You can't slice a dictionary of Tensors")
            return self._get_dictionary(subpath, slice_list[0])
Esempio n. 19
0
    def __setitem__(self, slice_, value):
        """| Sets a slice or slices with a value
        | Usage:

        >>> ds_view = ds[5:15]
        >>> ds_view["image", 3, 0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8") # sets the 8th image
        """
        self.dataset._auto_checkout()

        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)
        slice_list = [0] + slice_list if isinstance(self.indexes,
                                                    int) else slice_list

        assign_value = get_value(value)
        schema_dict = self.dataset.schema
        if subpath[1:] in schema_dict.dict_.keys():
            schema_key = schema_dict.dict_.get(subpath[1:], None)
        else:
            for schema_key in subpath[1:].split("/"):
                schema_dict = schema_dict.dict_.get(schema_key, None)
                if not isinstance(schema_dict, SchemaDict):
                    schema_key = schema_dict
        if isinstance(schema_key, ClassLabel):
            assign_value = check_class_label(assign_value, schema_key)
        if isinstance(
                schema_key,
            (Text, bytes)) or (isinstance(assign_value, Iterable) and any(
                isinstance(val, str) for val in assign_value)):
            # handling strings and bytes
            assign_value = str_to_int(assign_value, self.dataset.tokenizer)

        if not subpath:
            raise ValueError("Can't assign to dataset sliced without key")
        elif subpath not in self.keys:
            raise KeyError(f"Key {subpath} not found in dataset")

        if not slice_list:
            slice_ = (slice(self.indexes[0], self.indexes[-1] +
                            1) if self.is_contiguous else self.indexes)
            if not isinstance(slice_, list):
                self.dataset._tensors[subpath][slice_] = assign_value
            else:
                for i, index in enumerate(slice_):
                    self.dataset._tensors[subpath][index] = assign_value[i]
        else:
            if isinstance(self.indexes, list):
                indexes = self.indexes[slice_list[0]]
                if self.is_contiguous and isinstance(indexes,
                                                     list) and indexes:
                    slice_list[0] = slice(indexes[0], indexes[-1] + 1)
                else:
                    slice_list[0] = indexes
            else:
                slice_list[0] = self.indexes

            if not isinstance(slice_list[0], list):
                self.dataset._tensors[subpath][slice_list] = assign_value
            else:
                for i, index in enumerate(slice_list[0]):
                    current_slice = [index] + slice_list[1:]
                    self.dataset._tensors[subpath][
                        current_slice] = assign_value[i]
Esempio n. 20
0
    def __getitem__(self, slice_):
        """| Gets a slice or slices from DatasetView
        | Usage:

        >>> ds_view = ds[5:15]
        >>> return ds_view["image", 7, 0:1920, 0:1080, 0:3].compute() # returns numpy array of 12th image
        """
        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]

        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)

        slice_list = [0] + slice_list if self.squeeze_dim else slice_list

        if not subpath:
            if len(slice_list) > 1:
                raise ValueError(
                    "Can't slice a dataset with multiple slices without subpath"
                )
            num, ofs = slice_extract_info(slice_list[0], self.num_samples)
            return DatasetView(
                dataset=self.dataset,
                num_samples=num,
                offset=ofs + self.offset,
                squeeze_dim=isinstance(slice_list[0], int),
                lazy=self.lazy,
            )
        elif not slice_list:
            slice_ = (
                slice(self.offset, self.offset + self.num_samples)
                if not self.squeeze_dim
                else self.offset
            )
            if subpath in self.dataset._tensors.keys():
                tensorview = TensorView(
                    dataset=self.dataset,
                    subpath=subpath,
                    slice_=slice_,
                    lazy=self.lazy,
                )
                return tensorview if self.lazy else tensorview.compute()
            for key in self.dataset._tensors.keys():
                if subpath.startswith(key):
                    objectview = ObjectView(
                        dataset=self.dataset,
                        subpath=subpath,
                        slice_list=[slice_],
                        lazy=self.lazy,
                    )
                    return objectview if self.lazy else objectview.compute()
            return self._get_dictionary(self.dataset, subpath, slice=slice_)
        else:
            num, ofs = slice_extract_info(slice_list[0], self.num_samples)
            slice_list[0] = (
                ofs + self.offset
                if isinstance(slice_list[0], int)
                else slice(ofs + self.offset, ofs + self.offset + num)
            )
            schema_obj = self.dataset.schema.dict_[subpath.split("/")[1]]

            if subpath in self.dataset._tensors.keys() and (
                not isinstance(schema_obj, Sequence) or len(slice_list) <= 1
            ):
                tensorview = TensorView(
                    dataset=self.dataset,
                    subpath=subpath,
                    slice_=slice_list,
                    lazy=self.lazy,
                )
                return tensorview if self.lazy else tensorview.compute()
            for key in self.dataset._tensors.keys():
                if subpath.startswith(key):
                    objectview = ObjectView(
                        dataset=self.dataset,
                        subpath=subpath,
                        slice_list=slice_list,
                        lazy=self.lazy,
                    )
                    return objectview if self.lazy else objectview.compute()
            if len(slice_list) > 1:
                raise ValueError("You can't slice a dictionary of Tensors")
            return self._get_dictionary(subpath, slice_list[0])
Esempio n. 21
0
 def __getitem__(self, slice_):
     """| Gets a slice or slices from dataset
     | Usage:
     >>> return ds["image", 5, 0:1920, 0:1080, 0:3].compute() # returns numpy array
     >>> images = ds["image"]
     >>> return images[5].compute() # returns numpy array
     >>> images = ds["image"]
     >>> image = images[5]
     >>> return image[0:1920, 0:1080, 0:3].compute()
     """
     if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
         slice_ = [slice_]
     slice_ = list(slice_)
     subpath, slice_list = slice_split(slice_)
     if not subpath:
         if len(slice_list) > 1:
             raise ValueError(
                 "Can't slice a dataset with multiple slices without key")
         indexes = self.indexes[slice_list[0]]
         return DatasetView(
             dataset=self,
             indexes=indexes,
             lazy=self.lazy,
         )
     elif not slice_list:
         if subpath in self.keys:
             tensorview = TensorView(
                 dataset=self,
                 subpath=subpath,
                 slice_=slice(0, self._shape[0]),
                 lazy=self.lazy,
             )
             return tensorview if self.lazy else tensorview.compute()
         for key in self.keys:
             if subpath.startswith(key):
                 objectview = ObjectView(
                     dataset=self,
                     subpath=subpath,
                     lazy=self.lazy,
                     slice_=[slice(0, self._shape[0])],
                 )
                 return objectview if self.lazy else objectview.compute()
         return self._get_dictionary(subpath)
     else:
         schema_obj = self.schema.dict_[subpath.split("/")[1]]
         if subpath in self.keys and (not isinstance(schema_obj, Sequence)
                                      or len(slice_list) <= 1):
             tensorview = TensorView(dataset=self,
                                     subpath=subpath,
                                     slice_=slice_list,
                                     lazy=self.lazy)
             return tensorview if self.lazy else tensorview.compute()
         for key in self.keys:
             if subpath.startswith(key):
                 objectview = ObjectView(
                     dataset=self,
                     subpath=subpath,
                     slice_=slice_list,
                     lazy=self.lazy,
                 )
                 return objectview if self.lazy else objectview.compute()
         if len(slice_list) > 1:
             raise ValueError("You can't slice a dictionary of Tensors")
         return self._get_dictionary(subpath, slice_list[0])
Esempio n. 22
0
    def __getitem__(self, slice_):
        """| Gets a slice from an objectview"""
        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)

        dataset = self.dataset
        nums, offsets, squeeze_dims, inner_schema_obj = (
            self.nums.copy(),
            self.offsets.copy(),
            self.squeeze_dims.copy(),
            self.inner_schema_obj,
        )

        if subpath:
            inner_schema_obj, nums, offsets, squeeze_dims = self.process_path(
                subpath, inner_schema_obj, nums, offsets, squeeze_dims)
        subpath = self.subpath + subpath
        if len(slice_list) >= 1:
            # Slice first dim
            if isinstance(self.dataset,
                          DatasetView) and not self.dataset.squeeze_dim:
                dataset = self.dataset[slice_list[0]]
                slice_list = slice_list[1:]
            elif not isinstance(self.dataset, DatasetView):
                num, ofs = slice_extract_info(slice_list[0],
                                              self.dataset.shape[0])
                dataset = DatasetView(self.dataset, num, ofs,
                                      isinstance(slice_list[0], int))
                slice_list = slice_list[1:]

            # Expand slice list for rest of dims
            if len(slice_list) >= 1:
                exp_slice_list = []
                for squeeze in squeeze_dims:
                    if squeeze:
                        exp_slice_list += [None]
                    else:
                        if len(slice_list) > 0:
                            exp_slice_list += [slice_list.pop(0)]
                        else:
                            # slice list smaller than max
                            exp_slice_list += [None]
                if len(slice_list) > 0:
                    # slice list longer than max
                    raise IndexError("Too many indices")
                for i, it in enumerate(exp_slice_list):
                    if it is not None:
                        num, ofs = slice_extract_info(it, nums[i])
                        nums[i] = num
                        offsets[i] += ofs
                        squeeze_dims[i] = num == 1
        objectview = ObjectView(
            dataset=dataset,
            subpath=subpath,
            slice_list=None,
            nums=nums,
            offsets=offsets,
            squeeze_dims=squeeze_dims,
            inner_schema_obj=inner_schema_obj,
            lazy=self.lazy,
            new=False,
        )
        return objectview if self.lazy else objectview.compute()