def range_table(n: int, *, parallelism: int = -1) -> Dataset[ArrowRow]: """Create a tabular dataset from a range of integers [0..n). Examples: >>> import ray >>> ds = ray.data.range_table(1000) # doctest: +SKIP >>> ds # doctest: +SKIP Dataset(num_blocks=200, num_rows=1000, schema={value: int64}) >>> ds.map(lambda r: {"v2": r["value"] * 2}).take(2) # doctest: +SKIP [ArrowRow({'v2': 0}), ArrowRow({'v2': 2})] This is similar to range(), but uses Arrow tables to hold the integers in Arrow records. The dataset elements take the form {"value": N}. Args: n: The upper bound of the range of integer records. parallelism: The amount of parallelism to use for the dataset. Parallelism may be limited by the number of items. Returns: Dataset holding the integers as Arrow records. """ return read_datasource(RangeDatasource(), parallelism=parallelism, n=n, block_format="arrow")
def range_tensor(n: int, *, shape: Tuple = (1, ), parallelism: int = 200) -> Dataset[ArrowRow]: """Create a Tensor dataset from a range of integers [0..n). Examples: >>> ds = ray.data.range_tensor(1000, shape=(3, 10)) >>> ds.map_batches(lambda arr: arr * 2, batch_format="pandas").show() This is similar to range_arrow(), but uses the ArrowTensorArray extension type. The dataset elements take the form {"value": array(N, shape=shape)}. Args: n: The upper bound of the range of integer records. shape: The shape of each record. parallelism: The amount of parallelism to use for the dataset. Parallelism may be limited by the number of items. Returns: Dataset holding the integers as Arrow tensor records. """ return read_datasource( RangeDatasource(), parallelism=parallelism, n=n, block_format="tensor", tensor_shape=tuple(shape), )
def range_tensor(n: int, *, shape: Tuple = (1, ), parallelism: int = 200) -> Dataset[np.ndarray]: """Create a Tensor dataset from a range of integers [0..n). Examples: >>> ds = ray.data.range_tensor(1000, shape=(3, 10)) >>> ds.map_batches(lambda arr: arr ** 2).show() This is similar to range(), but uses np.ndarrays to hold the integers in tensor form. The dataset has overall the shape ``(n,) + shape``. Args: n: The upper bound of the range of integer records. shape: The shape of each record. parallelism: The amount of parallelism to use for the dataset. Returns: Dataset holding the integers as tensors. """ return read_datasource( RangeDatasource(), parallelism=parallelism, n=n, block_format="tensor", tensor_shape=tuple(shape))
def range(n: int, *, parallelism: int = 200) -> Dataset[int]: """Create a dataset from a range of integers [0..n). Examples: >>> ray.data.range(10000).map(lambda x: x * 2).show() Args: n: The upper bound of the range of integers. parallelism: The amount of parallelism to use for the dataset. Returns: Dataset holding the integers. """ return read_datasource( RangeDatasource(), parallelism=parallelism, n=n, block_format="list")
def range_arrow(n: int, *, parallelism: int = 200) -> Dataset[ArrowRow]: """Create an Arrow dataset from a range of integers [0..n). Examples: >>> ds = ray.data.range_arrow(1000) >>> ds.map(lambda r: {"v2": r["value"] * 2}).show() This is similar to range(), but uses Arrow tables to hold the integers in Arrow records. The dataset elements take the form {"value": N}. Args: n: The upper bound of the range of integer records. parallelism: The amount of parallelism to use for the dataset. Returns: Dataset holding the integers as Arrow records. """ return read_datasource( RangeDatasource(), parallelism=parallelism, n=n, block_format="arrow")
def range_tensor(n: int, *, shape: Tuple = (1, ), parallelism: int = -1) -> Dataset[ArrowRow]: """Create a Tensor dataset from a range of integers [0..n). Examples: >>> import ray >>> ds = ray.data.range_tensor(1000, shape=(2, 2)) # doctest: +SKIP >>> ds # doctest: +SKIP Dataset( num_blocks=200, num_rows=1000, schema={__value__: <ArrowTensorType: shape=(2, 2), dtype=int64>}, ) >>> ds.map_batches(lambda arr: arr * 2).take(2) # doctest: +SKIP [array([[0, 0], [0, 0]]), array([[2, 2], [2, 2]])] This is similar to range_table(), but uses the ArrowTensorArray extension type. The dataset elements take the form {VALUE_COL_NAME: array(N, shape=shape)}. Args: n: The upper bound of the range of integer records. shape: The shape of each record. parallelism: The amount of parallelism to use for the dataset. Parallelism may be limited by the number of items. Returns: Dataset holding the integers as Arrow tensor records. """ return read_datasource( RangeDatasource(), parallelism=parallelism, n=n, block_format="tensor", tensor_shape=tuple(shape), )
def range(n: int, *, parallelism: int = 200) -> Dataset[int]: """Create a dataset from a range of integers [0..n). Examples: >>> import ray >>> ds = ray.data.range(10000) # doctest: +SKIP >>> ds # doctest: +SKIP Dataset(num_blocks=200, num_rows=10000, schema=<class 'int'>) >>> ds.map(lambda x: x * 2).take(4) # doctest: +SKIP [0, 2, 4, 6] Args: n: The upper bound of the range of integers. parallelism: The amount of parallelism to use for the dataset. Parallelism may be limited by the number of items. Returns: Dataset holding the integers. """ return read_datasource( RangeDatasource(), parallelism=parallelism, n=n, block_format="list" )