Beispiel #1
0
    def __setitem__(self, slice_, value):
        """| Sets a slice or slices with a value
        | Usage:
        >>> ds["image", 5, 0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8")
        >>> images = ds["image"]
        >>> image = images[5]
        >>> image[0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8")
        """
        if "r" in self._mode:
            raise ReadModeException("__setitem__")
        self._auto_checkout()
        assign_value = get_value(value)
        # handling strings and bytes
        assign_value = str_to_int(assign_value, self.tokenizer)

        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)

        if not subpath:
            raise ValueError("Can't assign to dataset sliced without subpath")
        elif subpath not in self.keys:
            raise KeyError(f"Key {subpath} not found in the dataset")

        if not slice_list:
            self._tensors[subpath][:] = assign_value
        else:
            self._tensors[subpath][slice_list] = assign_value
Beispiel #2
0
    def commit(self, message: str = "") -> str:
        """| Saves the current state of the dataset and returns the commit id.
        Checks out automatically to an auto branch if the current commit is not the head of the branch

        Only saves the dataset without any version control information if the dataset was created before Hub v1.3.0

        Parameters
        ----------
        message: str, optional
            The commit message to store along with the commit
        """
        if self._commit_id is None:
            warnings.warn(
                "This dataset was created before version control, it does not support it. commit will behave same as flush"
            )
            self.flush()
        elif "r" in self._mode:
            raise ReadModeException("commit")
        else:
            self._auto_checkout()
            stored_commit_id = self._commit_id
            self._commit_id = generate_hash()
            new_node = VersionNode(self._commit_id, self._branch)
            self._version_node.insert(new_node, message)
            self._version_node = new_node
            self._branch_node_map[self._branch] = new_node
            self._commit_node_map[self._commit_id] = new_node
            self.flush()
            return stored_commit_id
Beispiel #3
0
    def checkout(self, address: str, create: bool = False) -> str:
        """| Changes the state of the dataset to the address mentioned. Creates a new branch if address isn't a commit id or branch name and create is True.
        Always checks out to the head of a branch if the address specified is a branch name.

        Returns the commit id of the commit that has been switched to.

        Only works if dataset was created on or after Hub v1.3.0

        Parameters
        ----------
        address: str
            The branch name or commit id to checkout to
        create: bool, optional
            Specifying create as True creates a new branch from the current commit if the address isn't an existing branch name or commit id
        """
        if self._commit_id is None:
            raise VersioningNotSupportedException("checkout")
        self.flush()
        if address in self._branch_node_map.keys():
            self._branch = address
            self._version_node = self._branch_node_map[address]
            self._commit_id = self._version_node.commit_id
        elif address in self._commit_node_map.keys():
            self._version_node = self._commit_node_map[address]
            self._branch = self._version_node.branch
            self._commit_id = self._version_node.commit_id
        elif create:
            if "r" in self._mode:
                raise ReadModeException("checkout to create new branch")
            self._branch = address
            new_commit_id = generate_hash()
            new_node = VersionNode(new_commit_id, self._branch)
            if not self._version_node.children:
                for key in self.keys:
                    self._tensors[key].fs_map.copy_all_chunks(
                        self._commit_id, new_commit_id
                    )
                if self._version_node.parent is not None:
                    self._version_node.parent.insert(
                        new_node, f"switched to new branch {address}"
                    )
            else:
                self._version_node.insert(new_node, f"switched to new branch {address}")
            self._version_node = new_node
            self._commit_id = new_commit_id
            self._branch_node_map[self._branch] = new_node
            self._commit_node_map[self._commit_id] = new_node
            self.flush()
        else:
            raise AddressNotFound(address)
        return self._commit_id
Beispiel #4
0
    def __setitem__(self, slice_, value):
        """| Sets a slice or slices with a value
        | Usage:
        >>> ds["image", 5, 0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8")
        >>> images = ds["image"]
        >>> image = images[5]
        >>> image[0:1920, 0:1080, 0:3] = np.zeros((1920, 1080, 3), "uint8")
        """
        if "r" in self._mode:
            raise ReadModeException("__setitem__")
        self._auto_checkout()

        if not isinstance(slice_, abc.Iterable) or isinstance(slice_, str):
            slice_ = [slice_]
        slice_ = list(slice_)
        subpath, slice_list = slice_split(slice_)

        if not subpath:
            raise ValueError("Can't assign to dataset sliced without subpath")
        elif subpath not in self.keys:
            raise KeyError(f"Key {subpath} not found in the dataset")

        assign_value = get_value(value)
        schema_dict = self.schema
        if subpath[1:] in schema_dict.dict_.keys():
            schema_key = schema_dict.dict_.get(subpath[1:], None)
        else:
            for schema_key in subpath[1:].split("/"):
                schema_dict = schema_dict.dict_.get(schema_key, None)
                if not isinstance(schema_dict, SchemaDict):
                    schema_key = schema_dict
        if isinstance(schema_key, ClassLabel):
            assign_value = check_class_label(assign_value, schema_key)
        if isinstance(
                schema_key,
            (Text, bytes)) or (isinstance(assign_value, Iterable) and any(
                isinstance(val, str) for val in assign_value)):
            # handling strings and bytes
            assign_value = str_to_int(assign_value, self.tokenizer)

        if not slice_list:
            self._tensors[subpath][:] = assign_value
        else:
            self._tensors[subpath][slice_list] = assign_value