def __call__(self, ids): """ For each shard, transform each sample and then store inside shared memory of ray """ for index in ids: item = self._ds[index] item = self._func(item, **self.kwargs) for item in Transform._unwrap(item): yield Transform._flatten_dict(item, schema=self.schema)
def _func_argd(_func, index, _ds, schema, **kwargs): """ Remote wrapper for user defined function """ if isinstance(_ds, Dataset) or isinstance(_ds, DatasetView): _ds.squeeze_dim = False item = _ds[index] item = _func(item, **kwargs) # item = Transform._flatten(item, schema) item = Transform._flatten_dict(item, schema=schema) return list(item.values())
def __call__(self, ids): """ For each shard, transform each sample and then store inside shared memory of ray """ for index in ids: item = self._ds[index] if isinstance(item, DatasetView) or isinstance(item, Dataset): item = item.compute() items = self._func(0, item) if not isinstance(items, list): items = [items] for item in items: yield Transform._flatten_dict(item, schema=self.schema)
def _func_argd(_func, index, _ds, schema, kwargs): """ Remote wrapper for user defined function """ if isinstance(_ds, (Dataset, DatasetView)) and isinstance(_ds.indexes, int): _ds.indexes = [_ds.indexes] item = _ds[index] if isinstance(item, DatasetView) or isinstance(item, Dataset): item = item.compute() item = _func(0, item) item = Transform._flatten_dict(item, schema=schema) return list(item.values())
def upload_chunk(i_batch, key, ds): """ Remote function to upload a chunk Returns the shape of dynamic tensor to upload all in once after upload is completed Parameters ---------- i_batch: Tuple Tuple composed of (index, batch) key: str Key of the tensor ds: Dataset to set to upload Returns ---------- (key, slice_, shape) to set the shape later """ i, batch = i_batch if not isinstance(batch, dict) and isinstance(batch[0], ray.ObjectRef): batch = ray.get(batch) # FIXME an ugly hack to unwrap elements with a schema that has one tensor num_returns = len( Transform._flatten_dict(ds.schema.dict_, schema=ds.schema.dict_).keys()) if num_returns == 1: batch = [item for sublist in batch for item in sublist] shape = None length = len(batch) slice_ = slice(i * length, (i + 1) * length) if ds[key].is_dynamic: # Sometimes ds._tensor slice_ gets out of the shape value shape = ds._tensors[f"/{key}"].get_shape_from_value([slice_], batch) ds[key, slice_] = batch return (key, [slice_], shape)