コード例 #1
0
    def testDistribution(self, initial_known):
        classes = np.random.randint(5, size=(20000, ))  # Uniformly sampled
        target_dist = [0.9, 0.05, 0.05, 0.0, 0.0]
        initial_dist = [0.2] * 5 if initial_known else None
        classes = math_ops.cast(classes,
                                dtypes.int64)  # needed for Windows build.
        dataset = dataset_ops.Dataset.from_tensor_slices(classes).shuffle(
            200, seed=21).map(lambda c: (c, string_ops.as_string(c))).repeat()

        get_next = self.getNext(
            dataset.apply(
                resampling.rejection_resample(target_dist=target_dist,
                                              initial_dist=initial_dist,
                                              class_func=lambda c, _: c,
                                              seed=27)))

        returned = []
        while len(returned) < 4000:
            returned.append(self.evaluate(get_next()))

        returned_classes, returned_classes_and_data = zip(*returned)
        _, returned_data = zip(*returned_classes_and_data)
        self.assertAllEqual(
            [compat.as_bytes(str(c)) for c in returned_classes], returned_data)
        total_returned = len(returned_classes)
        class_counts = np.array([
            len([True for v in returned_classes if v == c]) for c in range(5)
        ])
        returned_dist = class_counts / total_returned
        self.assertAllClose(target_dist, returned_dist, atol=1e-2)
コード例 #2
0
    def benchmark_resample_performance(self):
        init_dist = [0.25, 0.25, 0.25, 0.25]
        target_dist = [0.0, 0.0, 0.0, 1.0]
        num_classes = len(init_dist)
        # We don't need many samples to test a dirac-delta target distribution
        num_samples = 1000
        data_np = np.random.choice(num_classes, num_samples, p=init_dist)
        # Prepare the dataset
        dataset = dataset_ops.Dataset.from_tensor_slices(data_np).repeat()
        # Reshape distribution via rejection sampling.
        dataset = dataset.apply(
            resampling.rejection_resample(class_func=lambda x: x,
                                          target_dist=target_dist,
                                          initial_dist=init_dist,
                                          seed=142))
        options = dataset_ops.Options()
        options.experimental_optimization.apply_default_optimizations = False
        dataset = dataset.with_options(options)

        wall_time = self.run_benchmark(dataset=dataset,
                                       num_elements=num_samples,
                                       iters=10,
                                       warmup=True)
        resample_time = wall_time * num_samples

        self.report_benchmark(iters=10,
                              wall_time=resample_time,
                              name="resample_{}".format(num_samples))
コード例 #3
0
  def testDistribution(self, initial_known):
    classes = np.random.randint(5, size=(20000,))  # Uniformly sampled
    target_dist = [0.9, 0.05, 0.05, 0.0, 0.0]
    initial_dist = [0.2] * 5 if initial_known else None
    classes = math_ops.cast(classes, dtypes.int64)  # needed for Windows build.
    dataset = dataset_ops.Dataset.from_tensor_slices(classes).shuffle(
        200, seed=21).map(lambda c: (c, string_ops.as_string(c))).repeat()

    get_next = self.getNext(
        dataset.apply(
            resampling.rejection_resample(
                target_dist=target_dist,
                initial_dist=initial_dist,
                class_func=lambda c, _: c,
                seed=27)))

    returned = []
    while len(returned) < 4000:
      returned.append(self.evaluate(get_next()))

    returned_classes, returned_classes_and_data = zip(*returned)
    _, returned_data = zip(*returned_classes_and_data)
    self.assertAllEqual([compat.as_bytes(str(c))
                         for c in returned_classes], returned_data)
    total_returned = len(returned_classes)
    class_counts = np.array([
        len([True for v in returned_classes if v == c])
        for c in range(5)])
    returned_dist = class_counts / total_returned
    self.assertAllClose(target_dist, returned_dist, atol=1e-2)
コード例 #4
0
    def testOtherDtypes(self, target_dtype, init_dtype):
        target_dist = np.array([0.5, 0.5], dtype=target_dtype)

        if init_dtype is None:
            init_dist = None
        else:
            init_dist = np.array([0.5, 0.5], dtype=init_dtype)

        dataset = dataset_ops.Dataset.range(10)
        resampler = resampling.rejection_resample(class_func=lambda x: x % 2,
                                                  target_dist=target_dist,
                                                  initial_dist=init_dist)

        dataset = dataset.apply(resampler)
        get_next = self.getNext(dataset)
        self.evaluate(get_next())
コード例 #5
0
def _time_resampling(test_obj, data_np, target_dist, init_dist, num_to_sample):
    dataset = dataset_ops.Dataset.from_tensor_slices(data_np).repeat()

    # Reshape distribution via rejection sampling.
    dataset = dataset.apply(
        resampling.rejection_resample(class_func=lambda x: x,
                                      target_dist=target_dist,
                                      initial_dist=init_dist,
                                      seed=142))

    get_next = dataset_ops.make_one_shot_iterator(dataset).get_next()

    with test_obj.test_session() as sess:
        start_time = time.time()
        for _ in xrange(num_to_sample):
            sess.run(get_next)
        end_time = time.time()

    return end_time - start_time
コード例 #6
0
ファイル: resampling.py プロジェクト: neuroph12/CNNDDDD
def rejection_resample(class_func, target_dist, initial_dist=None, seed=None):
    """A transformation that resamples a dataset to achieve a target distribution.

  **NOTE** Resampling is performed via rejection sampling; some fraction
  of the input values will be dropped.

  Args:
    class_func: A function mapping an element of the input dataset to a scalar
      `tf.int32` tensor. Values should be in `[0, num_classes)`.
    target_dist: A floating point type tensor, shaped `[num_classes]`.
    initial_dist: (Optional.)  A floating point type tensor, shaped
      `[num_classes]`.  If not provided, the true class distribution is
      estimated live in a streaming fashion.
    seed: (Optional.) Python integer seed for the resampler.

  Returns:
    A `Dataset` transformation function, which can be passed to
    `tf.data.Dataset.apply`.
  """
    return resampling.rejection_resample(class_func, target_dist, initial_dist,
                                         seed)
コード例 #7
0
ファイル: resample_test.py プロジェクト: gunan/tensorflow
def _time_resampling(
    test_obj, data_np, target_dist, init_dist, num_to_sample):
  dataset = dataset_ops.Dataset.from_tensor_slices(data_np).repeat()

  # Reshape distribution via rejection sampling.
  dataset = dataset.apply(
      resampling.rejection_resample(
          class_func=lambda x: x,
          target_dist=target_dist,
          initial_dist=init_dist,
          seed=142))

  get_next = dataset.make_one_shot_iterator().get_next()

  with test_obj.test_session() as sess:
    start_time = time.time()
    for _ in xrange(num_to_sample):
      sess.run(get_next)
    end_time = time.time()

  return end_time - start_time
コード例 #8
0
ファイル: resampling.py プロジェクト: Ajaycs99/tensorflow
def rejection_resample(class_func, target_dist, initial_dist=None, seed=None):
  """A transformation that resamples a dataset to achieve a target distribution.

  **NOTE** Resampling is performed via rejection sampling; some fraction
  of the input values will be dropped.

  Args:
    class_func: A function mapping an element of the input dataset to a scalar
      `tf.int32` tensor. Values should be in `[0, num_classes)`.
    target_dist: A floating point type tensor, shaped `[num_classes]`.
    initial_dist: (Optional.)  A floating point type tensor, shaped
      `[num_classes]`.  If not provided, the true class distribution is
      estimated live in a streaming fashion.
    seed: (Optional.) Python integer seed for the resampler.

  Returns:
    A `Dataset` transformation function, which can be passed to
    `tf.data.Dataset.apply`.
  """
  return resampling.rejection_resample(class_func, target_dist, initial_dist,
                                       seed)
コード例 #9
0
    def testExhaustion(self):
        init_dist = [0.5, 0.5]
        target_dist = [0.9, 0.1]
        dataset = dataset_ops.Dataset.range(10000)
        resampler = resampling.rejection_resample(class_func=lambda x: x % 2,
                                                  target_dist=target_dist,
                                                  initial_dist=init_dist)
        dataset = dataset.apply(resampler)

        get_next = self.getNext(dataset)
        returned = []
        with self.assertRaises(errors.OutOfRangeError):
            while True:
                returned.append(self.evaluate(get_next()))

        classes, _ = zip(*returned)
        bincount = np.bincount(np.array(classes),
                               minlength=len(init_dist)).astype(
                                   np.float32) / len(classes)

        self.assertAllClose(target_dist, bincount, atol=1e-2)
コード例 #10
0
    def testEdgeCasesSampleFromInitialDataset(self, only_initial_dist):
        init_dist = [0.5, 0.5]
        target_dist = [0.5, 0.5] if only_initial_dist else [0.0, 1.0]
        num_classes = len(init_dist)
        # We don't need many samples to test that this works.
        num_samples = 100
        data_np = np.random.choice(num_classes, num_samples, p=init_dist)

        dataset = dataset_ops.Dataset.from_tensor_slices(data_np)

        # Reshape distribution.
        dataset = dataset.apply(
            resampling.rejection_resample(class_func=lambda x: x,
                                          target_dist=target_dist,
                                          initial_dist=init_dist))

        get_next = self.getNext(dataset)

        returned = []
        with self.assertRaises(errors.OutOfRangeError):
            while True:
                returned.append(self.evaluate(get_next()))
コード例 #11
0
def _time_resampling(data_np, target_dist, init_dist, num_to_sample):  # pylint: disable=missing-docstring
    dataset = dataset_ops.Dataset.from_tensor_slices(data_np).repeat()

    # Reshape distribution via rejection sampling.
    dataset = dataset.apply(
        resampling.rejection_resample(class_func=lambda x: x,
                                      target_dist=target_dist,
                                      initial_dist=init_dist,
                                      seed=142))

    options = dataset_ops.Options()
    options.experimental_optimization.apply_default_optimizations = False
    dataset = dataset.with_options(options)
    get_next = dataset_ops.make_one_shot_iterator(dataset).get_next()

    with session.Session() as sess:
        start_time = time.time()
        for _ in xrange(num_to_sample):
            sess.run(get_next)
        end_time = time.time()

    return end_time - start_time
コード例 #12
0
  def testEdgeCasesSampleFromInitialDataset(self, only_initial_dist):
    init_dist = [0.5, 0.5]
    target_dist = [0.5, 0.5] if only_initial_dist else [0.0, 1.0]
    num_classes = len(init_dist)
    # We don't need many samples to test that this works.
    num_samples = 100
    data_np = np.random.choice(num_classes, num_samples, p=init_dist)

    dataset = dataset_ops.Dataset.from_tensor_slices(data_np)

    # Reshape distribution.
    dataset = dataset.apply(
        resampling.rejection_resample(
            class_func=lambda x: x,
            target_dist=target_dist,
            initial_dist=init_dist))

    get_next = self.getNext(dataset)

    returned = []
    with self.assertRaises(errors.OutOfRangeError):
      while True:
        returned.append(self.evaluate(get_next()))
コード例 #13
0
def _time_resampling(data_np, target_dist, init_dist, num_to_sample):  # pylint: disable=missing-docstring
  dataset = dataset_ops.Dataset.from_tensor_slices(data_np).repeat()

  # Reshape distribution via rejection sampling.
  dataset = dataset.apply(
      resampling.rejection_resample(
          class_func=lambda x: x,
          target_dist=target_dist,
          initial_dist=init_dist,
          seed=142))

  options = dataset_ops.Options()
  options.experimental_optimization.apply_default_optimizations = False
  dataset = dataset.with_options(options)
  get_next = dataset_ops.make_one_shot_iterator(dataset).get_next()

  with session.Session() as sess:
    start_time = time.time()
    for _ in xrange(num_to_sample):
      sess.run(get_next)
    end_time = time.time()

  return end_time - start_time
コード例 #14
0
    def testRandomClasses(self):
        init_dist = [0.25, 0.25, 0.25, 0.25]
        target_dist = [0.0, 0.0, 0.0, 1.0]
        num_classes = len(init_dist)
        # We don't need many samples to test a dirac-delta target distribution.
        num_samples = 100
        data_np = np.random.choice(num_classes, num_samples, p=init_dist)

        dataset = dataset_ops.Dataset.from_tensor_slices(data_np)

        # Apply a random mapping that preserves the data distribution.
        def _remap_fn(_):
            return math_ops.cast(
                random_ops.random_uniform([1]) * num_classes, dtypes.int32)[0]

        dataset = dataset.map(_remap_fn)

        # Reshape distribution.
        dataset = dataset.apply(
            resampling.rejection_resample(class_func=lambda x: x,
                                          target_dist=target_dist,
                                          initial_dist=init_dist))

        get_next = dataset_ops.make_one_shot_iterator(dataset).get_next()

        with self.cached_session() as sess:
            returned = []
            with self.assertRaises(errors.OutOfRangeError):
                while True:
                    returned.append(sess.run(get_next))

        classes, _ = zip(*returned)
        bincount = np.bincount(np.array(classes),
                               minlength=num_classes).astype(
                                   np.float32) / len(classes)

        self.assertAllClose(target_dist, bincount, atol=1e-2)
コード例 #15
0
ファイル: resample_test.py プロジェクト: gunan/tensorflow
  def testRandomClasses(self):
    init_dist = [0.25, 0.25, 0.25, 0.25]
    target_dist = [0.0, 0.0, 0.0, 1.0]
    num_classes = len(init_dist)
    # We don't need many samples to test a dirac-delta target distribution.
    num_samples = 100
    data_np = np.random.choice(num_classes, num_samples, p=init_dist)

    dataset = dataset_ops.Dataset.from_tensor_slices(data_np)

    # Apply a random mapping that preserves the data distribution.
    def _remap_fn(_):
      return math_ops.cast(random_ops.random_uniform([1]) * num_classes,
                           dtypes.int32)[0]
    dataset = dataset.map(_remap_fn)

    # Reshape distribution.
    dataset = dataset.apply(
        resampling.rejection_resample(
            class_func=lambda x: x,
            target_dist=target_dist,
            initial_dist=init_dist))

    get_next = dataset.make_one_shot_iterator().get_next()

    with self.cached_session() as sess:
      returned = []
      with self.assertRaises(errors.OutOfRangeError):
        while True:
          returned.append(sess.run(get_next))

    classes, _ = zip(*returned)
    bincount = np.bincount(
        np.array(classes),
        minlength=num_classes).astype(np.float32) / len(classes)

    self.assertAllClose(target_dist, bincount, atol=1e-2)