Beispiel #1
0
def choose_from_datasets_v1(datasets, choice_dataset):
    return dataset_ops.DatasetV1Adapter(
        choose_from_datasets_v2(datasets, choice_dataset))
Beispiel #2
0
 def _apply_fn(dataset):
     out_dataset = dataset_ops.DatasetV1Adapter(
         _CacheDataset(dataset, filename))
     return out_dataset
Beispiel #3
0
def sample_from_datasets_v1(datasets, weights=None, seed=None):
    return dataset_ops.DatasetV1Adapter(
        sample_from_datasets_v2(datasets, weights, seed))
Beispiel #4
0
def choose_from_datasets_v1(datasets,
                            choice_dataset,
                            stop_on_empty_dataset=False):
  return dataset_ops.DatasetV1Adapter(
      choose_from_datasets_v2(datasets, choice_dataset, stop_on_empty_dataset))
Beispiel #5
0
 def _apply_fn(dataset):
     out_dataset = dataset_ops.DatasetV1Adapter(
         _ShuffleDataset(dataset, buffer_size, seed,
                         reshuffle_each_iteration))
     return out_dataset
Beispiel #6
0
def sample_from_datasets_v1(datasets,
                            weights=None,
                            seed=None,
                            stop_on_empty_dataset=False):
  return dataset_ops.DatasetV1Adapter(
      sample_from_datasets_v2(datasets, weights, seed, stop_on_empty_dataset))
Beispiel #7
0
def _AutoShardDatasetV1(input_dataset, num_workers, index):  # pylint: disable=invalid-name
    return dataset_ops.DatasetV1Adapter(
        _AutoShardDataset(input_dataset, num_workers, index))
Beispiel #8
0
def _AutoShardDatasetV1(input_dataset, num_workers, index):
  return dataset_ops.DatasetV1Adapter(
      _AutoShardDataset(input_dataset, num_workers, index))
    def parallel_scan_range(self,
                            start,
                            end,
                            num_parallel_scans=None,
                            probability=None,
                            columns=None,
                            **kwargs):
        """Retrieves rows (including values) from the Bigtable service.

    Rows with row-keys between `start` and `end` will be retrieved. This method
    is similar to `scan_range`, but by contrast performs multiple sub-scans in
    parallel in order to achieve higher performance.

    Note: The dataset produced by this method is not deterministic!

    Specifying the columns to retrieve for each row is done by either using
    kwargs or in the columns parameter. To retrieve values of the columns "c1",
    and "c2" from the column family "cfa", and the value of the column "c3"
    from column family "cfb", the following datasets (`ds1`, and `ds2`) are
    equivalent:

    ```
    table = # ...
    ds1 = table.parallel_scan_range("row_start",
                                    "row_end",
                                    columns=[("cfa", "c1"),
                                             ("cfa", "c2"),
                                             ("cfb", "c3")])
    ds2 = table.parallel_scan_range("row_start", "row_end",
                                    cfa=["c1", "c2"], cfb="c3")
    ```

    Note: only the latest value of a cell will be retrieved.

    Args:
      start: The start of the range when scanning by range.
      end: (Optional.) The end of the range when scanning by range.
      num_parallel_scans: (Optional.) The number of concurrent scans against the
        Cloud Bigtable instance.
      probability: (Optional.) A float between 0 (exclusive) and 1 (inclusive).
        A non-1 value indicates to probabilistically sample rows with the
        provided probability.
      columns: The columns to read. Note: most commonly, they are expressed as
        kwargs. Use the columns value if you are using column families that are
        reserved. The value of columns and kwargs are merged. Columns is a list
        of tuples of strings ("column_family", "column_qualifier").
      **kwargs: The column families and columns to read. Keys are treated as
        column_families, and values can be either lists of strings, or strings
        that are treated as the column qualifier (column name).

    Returns:
      A `tf.data.Dataset` returning the row keys and the cell contents.

    Raises:
      ValueError: If the configured probability is unexpected.
    """
        probability = _normalize_probability(probability)
        normalized = _normalize_columns(columns, kwargs)
        ds = dataset_ops.DatasetV1Adapter(
            _BigtableSampleKeyPairsDataset(self, "", start, end))
        return self._make_parallel_scan_dataset(ds, num_parallel_scans,
                                                probability, normalized)
Beispiel #10
0
def CounterV1(start=0, step=1, dtype=dtypes.int64):
  return dataset_ops.DatasetV1Adapter(CounterV2(start, step, dtype))