Exemple #1
0
    def __call__(self, ids):
        """
        For each shard, transform each sample and then store inside shared memory of ray
        """
        for index in ids:
            item = self._ds[index]
            item = self._func(item, **self.kwargs)

            for item in Transform._unwrap(item):
                yield Transform._flatten_dict(item, schema=self.schema)
Exemple #2
0
    def _func_argd(_func, index, _ds, schema, **kwargs):
        """
        Remote wrapper for user defined function
        """
        if isinstance(_ds, Dataset) or isinstance(_ds, DatasetView):
            _ds.squeeze_dim = False

        item = _ds[index]
        item = _func(item, **kwargs)
        # item = Transform._flatten(item, schema)
        item = Transform._flatten_dict(item, schema=schema)
        return list(item.values())
Exemple #3
0
    def __call__(self, ids):
        """
        For each shard, transform each sample and then store inside shared memory of ray
        """
        for index in ids:
            item = self._ds[index]
            if isinstance(item, DatasetView) or isinstance(item, Dataset):
                item = item.compute()

            items = self._func(0, item)
            if not isinstance(items, list):
                items = [items]

            for item in items:
                yield Transform._flatten_dict(item, schema=self.schema)
Exemple #4
0
    def _func_argd(_func, index, _ds, schema, kwargs):
        """
        Remote wrapper for user defined function
        """

        if isinstance(_ds, (Dataset, DatasetView)) and isinstance(_ds.indexes, int):
            _ds.indexes = [_ds.indexes]

        item = _ds[index]
        if isinstance(item, DatasetView) or isinstance(item, Dataset):
            item = item.compute()

        item = _func(0, item)
        item = Transform._flatten_dict(item, schema=schema)

        return list(item.values())
Exemple #5
0
    def upload_chunk(i_batch, key, ds):
        """
        Remote function to upload a chunk
        Returns the shape of dynamic tensor to upload all in once after upload is completed

        Parameters
        ----------
        i_batch: Tuple
            Tuple composed of (index, batch)
        key: str
            Key of the tensor
        ds:
            Dataset to set to upload
        Returns
        ----------
        (key, slice_, shape) to set the shape later

        """
        i, batch = i_batch
        if not isinstance(batch, dict) and isinstance(batch[0], ray.ObjectRef):
            batch = ray.get(batch)
            # FIXME an ugly hack to unwrap elements with a schema that has one tensor
            num_returns = len(
                Transform._flatten_dict(ds.schema.dict_,
                                        schema=ds.schema.dict_).keys())
            if num_returns == 1:
                batch = [item for sublist in batch for item in sublist]

        shape = None
        length = len(batch)

        slice_ = slice(i * length, (i + 1) * length)
        if ds[key].is_dynamic:
            # Sometimes ds._tensor slice_ gets out of the shape value
            shape = ds._tensors[f"/{key}"].get_shape_from_value([slice_],
                                                                batch)
        ds[key, slice_] = batch

        return (key, [slice_], shape)