Example #1
0
    def testTrainingMomentum(self):
        with self.session() as sess:

            x = array_ops.placeholder(datatype, shape=[1, 224, 224, 4])
            y_ = array_ops.placeholder(datatype, shape=[1, 1000])

            with ipu.scopes.ipu_scope("/device:IPU:0"):
                logits = inference(x)

                loss = math_ops.reduce_mean(
                    nn_ops.softmax_cross_entropy_with_logits_v2(
                        logits=logits, labels=array_ops.stop_gradient(y_)))

                train = momentum.MomentumOptimizer(0.01, 0.9).minimize(loss)

            report = tu.ReportJSON(self, sess)

            sess.run(variables.global_variables_initializer())
            report.reset()

            data = np.zeros([1, 224, 224, 4])
            labels = np.zeros([1, 1000])

            sess.run(train, feed_dict={x: data, y_: labels})
            report.parse_log()

            report.assert_total_tile_memory(38642237)
Example #2
0
    def testSharing(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            # train.MomentumOptimizer is V1 only API.
            with ops.Graph().as_default(), self.cached_session():
                var0 = variables.Variable([1.0, 2.0], dtype=dtype)
                var1 = variables.Variable([3.0, 4.0], dtype=dtype)
                grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
                grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
                mom_opt = momentum_lib.MomentumOptimizer(learning_rate=2.0,
                                                         momentum=0.9)
                mom_update1 = mom_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                mom_update2 = mom_opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                self.evaluate(variables.global_variables_initializer())

                self.assertEqual(["momentum"], mom_opt.get_slot_names())
                slot0 = mom_opt.get_slot(var0, "momentum")
                self.assertEqual(slot0.get_shape(), var0.get_shape())
                slot1 = mom_opt.get_slot(var1, "momentum")
                self.assertEqual(slot1.get_shape(), var1.get_shape())

                # Fetch params to validate initial values
                self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                self.assertAllClose([3.0, 4.0], self.evaluate(var1))
                # Step 1: the momentum accumulators where 0. So we should see a normal
                # update: v -= grad * learning_rate
                mom_update1.run()
                # Check that the momentum accumulators have been updated.
                self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
                                                   self.evaluate(slot0))
                self.assertAllCloseAccordingToType(np.array([0.01, 0.01]),
                                                   self.evaluate(slot1))
                # Check that the parameters have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
                    self.evaluate(var0))
                self.assertAllCloseAccordingToType(
                    np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
                    self.evaluate(var1))
                # Step 2: the second momentum accumulators contain the previous update.
                mom_update2.run()
                # Check that the momentum accumulators have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]),
                    self.evaluate(slot0))
                self.assertAllCloseAccordingToType(
                    np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
                    self.evaluate(slot1))
                # Check that the parameters have been updated.
                self.assertAllCloseAccordingToType(
                    np.array([
                        1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
                        2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
                    ]), self.evaluate(var0))
                self.assertAllCloseAccordingToType(
                    np.array([
                        2.98 - ((0.9 * 0.01 + 0.01) * 2.0),
                        3.98 - ((0.9 * 0.01 + 0.01) * 2.0)
                    ]), self.evaluate(var1))
Example #3
0
  def testIterationsNotMultiple(self):
    def dataset_parser(value):
      a = value
      b = (value + 10.) / 2.0
      return a, b

    def dataset_fn():
      dataset = tu.create_single_increasing_dataset(5, shape=[4, 4, 2])
      dataset = dataset.batch(batch_size=2, drop_remainder=True)
      return dataset.map(dataset_parser)

    def model(c, x, b):
      with variable_scope.variable_scope("vs", use_resource=True):
        y = layers.Conv2D(2,
                          1,
                          use_bias=True,
                          kernel_initializer=init_ops.ones_initializer(),
                          name='conv1')(x)
      y = y + b
      y = math_ops.reduce_sum(y) + c
      return y

    def inputs_fn():
      with ops.device('cpu'):
        return [array_ops.placeholder(np.float32, shape=[])]

    with self.assertRaisesRegex(
        errors.FailedPreconditionError,
        'Detected a gradient accumulation operation with 32'):
      _gradient_accumulation_loop(self, model, inputs_fn, [10.01], 3, 32,
                                  dataset_fn,
                                  momentum.MomentumOptimizer(0.01, 0.9), 10)
Example #4
0
  def testTensorLearningRateAndMomentum(self):
    for dtype in self.float_types:
      with self.session(), self.test_scope():
        var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
        var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
        grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
        grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
        mom_opt = momentum_lib.MomentumOptimizer(
            learning_rate=constant_op.constant(2.0),
            momentum=constant_op.constant(0.9))
        mom_update = mom_opt.apply_gradients(
            zip([grads0, grads1], [var0, var1]))
        self.evaluate(variables.global_variables_initializer())
        # Check we have slots
        self.assertEqual(["momentum"], mom_opt.get_slot_names())
        slot0 = mom_opt.get_slot(var0, "momentum")
        self.assertEqual(slot0.get_shape(), var0.get_shape())
        self.assertFalse(slot0 in variables.trainable_variables())
        slot1 = mom_opt.get_slot(var1, "momentum")
        self.assertEqual(slot1.get_shape(), var1.get_shape())
        self.assertFalse(slot1 in variables.trainable_variables())

        # Fetch params to validate initial values
        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
        self.assertAllClose([3.0, 4.0], self.evaluate(var1))
        # Step 1: the momentum accumulators where 0. So we should see a normal
        # update: v -= grad * learning_rate
        mom_update.run()
        # Check that the momentum accumulators have been updated.
        self.assertAllCloseAccordingToType(
            np.array([0.1, 0.1]), self.evaluate(slot0))
        self.assertAllCloseAccordingToType(
            np.array([0.01, 0.01]), self.evaluate(slot1))
        # Check that the parameters have been updated.
        self.assertAllCloseAccordingToType(
            np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
            self.evaluate(var0))
        self.assertAllCloseAccordingToType(
            np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
            self.evaluate(var1))
        # Step 2: the momentum accumulators contain the previous update.
        mom_update.run()
        # Check that the momentum accumulators have been updated.
        self.assertAllCloseAccordingToType(
            np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]),
            self.evaluate(slot0))
        self.assertAllCloseAccordingToType(
            np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
            self.evaluate(slot1))
        # Check that the parameters have been updated.
        self.assertAllCloseAccordingToType(
            np.array([
                1.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0),
                2.0 - (0.1 * 2.0) - ((0.9 * 0.1 + 0.1) * 2.0)
            ]), self.evaluate(var0))
        self.assertAllCloseAccordingToType(
            np.array([
                2.98 - ((0.9 * 0.01 + 0.01) * 2.0),
                3.98 - ((0.9 * 0.01 + 0.01) * 2.0)
            ]), self.evaluate(var1))
Example #5
0
  def testCompare3(self):
    def dataset_fn():
      dataset = tu.create_single_increasing_dataset(10, shape=[4])
      dataset = dataset.batch(batch_size=2, drop_remainder=True)

      def dataset_parser(value):
        label = math_ops.reduce_mean(value, axis=[1])
        return math_ops.cast(value,
                             np.int32), math_ops.cast(label / 10, np.int32)

      return dataset.map(dataset_parser)

    num_batches_to_accumulate = 20
    repeat_count = 2
    optimizer = momentum.MomentumOptimizer(0.01, 0.8)

    def fwd_fn(idx, label):
      with variable_scope.variable_scope("part1", use_resource=True):
        embedding = variable_scope.get_variable(
            "c",
            shape=[10, 1216],
            dtype=np.float32,
            initializer=init_ops.constant_initializer(10.01),
            trainable=True)
      x = embedding_ops.embedding_lookup(embedding, idx)

      logits = math_ops.reduce_sum(x, axis=[-1])
      loss = math_ops.reduce_mean(
          nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                      labels=label))
      return loss

    _compare_to_cpu(self, fwd_fn, lambda: [], [], repeat_count,
                    num_batches_to_accumulate, dataset_fn, optimizer)
    def testThatBackpropRuns(self):
        """Run optimization to ensure that gradients can be computed."""

        batch_size = 1
        image_height = 9
        image_width = 12
        image = variables.Variable(
            np.float32(
                np.random.uniform(
                    size=[batch_size, image_height, image_width, 3])))
        control_point_locations = [[3., 3.]]
        control_point_locations = constant_op.constant(
            np.float32(np.expand_dims(control_point_locations, 0)))
        control_point_displacements = [[0.25, -0.5]]
        control_point_displacements = constant_op.constant(
            np.float32(np.expand_dims(control_point_displacements, 0)))
        warped_image, _ = sparse_image_warp.sparse_image_warp(
            image,
            control_point_locations,
            control_point_locations + control_point_displacements,
            num_boundary_points=3)

        loss = math_ops.reduce_mean(math_ops.abs(warped_image - image))
        optimizer = momentum.MomentumOptimizer(0.001, 0.9)
        grad = gradients.gradients(loss, [image])
        grad, _ = clip_ops.clip_by_global_norm(grad, 1.0)
        opt_func = optimizer.apply_gradients(zip(grad, [image]))
        init_op = variables.global_variables_initializer()

        with self.test_session() as sess:
            sess.run(init_op)
            for _ in range(5):
                sess.run([loss, opt_func])
Example #7
0
    def test_interpolation_gradient(self):
        """Make sure that backprop can run. Correctness of gradients is assumed.

    Here, we create a use a small 'training' set and a more densely-sampled
    set of query points, for which we know the true value in advance. The goal
    is to choose x locations for the training data such that interpolating using
    this training data yields the best reconstruction for the function
    values at the query points. The training data locations are optimized
    iteratively using gradient descent.
    """
        tp = _QuadraticPlusSinProblemND()
        (query_points, query_values, train_points,
         train_values) = tp.get_problem(optimizable=True)

        regularization = 0.001
        for interpolation_order in (1, 2, 3, 4):
            interpolator = interpolate_spline.interpolate_spline(
                train_points, train_values, query_points, interpolation_order,
                regularization)

            loss = math_ops.reduce_mean(
                math_ops.square(query_values - interpolator))

            optimizer = momentum.MomentumOptimizer(0.001, 0.9)
            grad = gradients.gradients(loss, [train_points])
            grad, _ = clip_ops.clip_by_global_norm(grad, 1.0)
            opt_func = optimizer.apply_gradients(zip(grad, [train_points]))
            init_op = variables.global_variables_initializer()

            with self.test_session() as sess:
                sess.run(init_op)
                for _ in range(100):
                    sess.run([loss, opt_func])
Example #8
0
    def testVariablesAcrossGraphs(self):
        optimizer = momentum_lib.MomentumOptimizer(0.01, 0.5)
        with ops.Graph().as_default():
            var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtypes.float32,
                                                          name="var0")
            var1 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtypes.float32,
                                                          name="var1")
            loss = math_ops.reduce_sum(var0 + var1)
            optimizer.minimize(loss)
            optimizer_variables = optimizer.variables()
            self.assertStartsWith(optimizer_variables[0].name, "var0")
            self.assertStartsWith(optimizer_variables[1].name, "var1")
            self.assertEqual(2, len(optimizer_variables))

        with ops.Graph().as_default():
            var2 = resource_variable_ops.ResourceVariable([1.0, 2.0],
                                                          dtype=dtypes.float32,
                                                          name="var2")
            var3 = resource_variable_ops.ResourceVariable([3.0, 4.0],
                                                          dtype=dtypes.float32,
                                                          name="var3")
            loss = math_ops.reduce_sum(var2 + var3)
            optimizer.minimize(loss)
            optimizer_variables = optimizer.variables()
            self.assertStartsWith(optimizer_variables[0].name, "var2")
            self.assertStartsWith(optimizer_variables[1].name, "var3")
            self.assertEqual(2, len(optimizer_variables))
    def testIndexedSlicesGradient(self):
        with ops.Graph().as_default():
            embedding_matrix = variable_scope.get_variable(
                "embedding_matrix", [5, 5],
                initializer=init_ops.random_normal_initializer())

            def cond(it, _):
                return it < 5

            def body(it, cost):
                embedding = embedding_ops.embedding_lookup(
                    embedding_matrix + 0.0, [0])
                cost += math_ops.reduce_sum(embedding)
                return it + 1, cost

            _, cost = control_flow_ops.while_loop(
                cond, body,
                [constant_op.constant(0),
                 constant_op.constant(0.0)])
            optimizer = momentum.MomentumOptimizer(0.1, 0.9)
            train_op = optimizer.minimize(cost)
            with self.cached_session() as sess:
                sess.run(variables.global_variables_initializer())
                for _ in range(10):
                    sess.run([train_op])
Example #10
0
  def testNumericEquivalenceForNesterovMomentum(self):
    if testing_utils.should_run_tf_function() or context.executing_eagerly():
      self.skipTest(
          'v1 optimizer does not run in experimental_run_tf_function mode or '
          'eager mode')
    np.random.seed(1331)
    with self.cached_session():
      train_samples = 20
      input_dim = 3
      num_classes = 2
      (x, y), _ = testing_utils.get_test_data(
          train_samples=train_samples,
          test_samples=10,
          input_shape=(input_dim,),
          num_classes=num_classes)
      y = keras.utils.to_categorical(y)

      num_hidden = 5
      model_k_v1 = testing_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_k_v2 = testing_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_k_v2.set_weights(model_k_v1.get_weights())
      model_tf = testing_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_tf.set_weights(model_k_v2.get_weights())

      opt_k_v1 = optimizers.SGD(momentum=0.9, nesterov=True)
      opt_k_v2 = gradient_descent.SGD(momentum=0.9, nesterov=True)
      opt_tf = momentum.MomentumOptimizer(
          learning_rate=0.01, momentum=0.9, use_nesterov=True)

      model_k_v1.compile(
          opt_k_v1,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=testing_utils.should_run_eagerly(),
          experimental_run_tf_function=testing_utils.should_run_tf_function())
      model_k_v2.compile(
          opt_k_v2,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=testing_utils.should_run_eagerly(),
          experimental_run_tf_function=testing_utils.should_run_tf_function())
      model_tf.compile(
          opt_tf,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=testing_utils.should_run_eagerly(),
          experimental_run_tf_function=testing_utils.should_run_tf_function())

      hist_k_v1 = model_k_v1.fit(x, y, batch_size=5, epochs=10, shuffle=False)
      hist_k_v2 = model_k_v2.fit(x, y, batch_size=5, epochs=10, shuffle=False)
      hist_tf = model_tf.fit(x, y, batch_size=5, epochs=10, shuffle=False)

      self.assertAllClose(model_k_v1.get_weights(), model_tf.get_weights())
      self.assertAllClose(model_k_v1.get_weights(), model_k_v2.get_weights())
      self.assertAllClose(opt_k_v1.get_weights(), opt_k_v2.get_weights())
      self.assertAllClose(hist_k_v1.history['loss'], hist_tf.history['loss'])
      self.assertAllClose(hist_k_v1.history['loss'], hist_k_v2.history['loss'])
Example #11
0
 def testNesterovMomentum(self):
     for dtype in [dtypes.float32, dtypes.float64]:
         # train.MomentumOptimizer is V1 only API.
         with ops.Graph().as_default(), self.cached_session():
             var0 = variables.Variable([1.0, 2.0], dtype=dtype)
             var1 = variables.Variable([3.0, 4.0], dtype=dtype)
             var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
             var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
             accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
             accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
             cost = 5 * var0 * var0 + 3 * var1
             global_step = variables.Variable(array_ops.zeros([],
                                                              dtypes.int64),
                                              name="global_step")
             mom_op = momentum_lib.MomentumOptimizer(learning_rate=2.0,
                                                     momentum=0.9,
                                                     use_nesterov=True)
             opt_op = mom_op.minimize(cost, global_step, [var0, var1])
             self.evaluate(variables.global_variables_initializer())
             for t in range(1, 5):
                 opt_op.run()
                 var0_np, accum0_np = self._update_nesterov_momentum_numpy(
                     var0_np, accum0_np, var0_np * 10, 2.0, 0.9)
                 var1_np, accum1_np = self._update_nesterov_momentum_numpy(
                     var1_np, accum1_np, 3, 2.0, 0.9)
                 self.assertAllClose(var0_np, self.evaluate(var0))
                 self.assertAllClose(var1_np, self.evaluate(var1))
Example #12
0
    def testMinimizeSparseResourceVariable(self):
        for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
            # This test invokes the ResourceSparseApplyMomentum operation, which
            # did not have a registered GPU kernel as of April 2018. With graph
            # execution, the placement algorithm notices this and automatically
            # places the variable in CPU (host) memory. With eager execution,
            # the variable would be placed in GPU memory if available, which
            # would then conflict with the future invocation of the
            # ResourceSparseApplyMomentum operation.
            # To work around this discrepancy, for now we force the variable
            # to be placed on CPU.
            with ops.device("/cpu:0"):
                var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]],
                                                              dtype=dtype)

            # pylint: disable=cell-var-from-loop
            def loss():
                x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
                pred = math_ops.matmul(
                    embedding_ops.embedding_lookup([var0], [0]), x)
                return pred * pred

            # pylint: enable=cell-var-from-loop

            opt = momentum_lib.MomentumOptimizer(learning_rate=1.0,
                                                 momentum=0.0)
            sgd_op = opt.minimize(loss)
            self.evaluate(variables.global_variables_initializer())
            # Run 1 step of sgd
            self.evaluate(sgd_op)
            # Validate updated params
            self.assertAllCloseAccordingToType([[-111, -138]],
                                               self.evaluate(var0))
Example #13
0
 def testNesterovMomentum(self):
     for dtype in self.float_types:
         with self.cached_session(), self.test_scope():
             var0 = resource_variable_ops.ResourceVariable([0.1, 0.2],
                                                           dtype=dtype)
             var1 = resource_variable_ops.ResourceVariable([0.3, 0.4],
                                                           dtype=dtype)
             var0_np = np.array([0.1, 0.2], dtype=dtype)
             var1_np = np.array([0.3, 0.4], dtype=dtype)
             accum0_np = np.array([0.0, 0.0], dtype=dtype)
             accum1_np = np.array([0.0, 0.0], dtype=dtype)
             cost = 0.4 * var0 * var0 + 0.9 * var1
             global_step = resource_variable_ops.ResourceVariable(
                 array_ops.zeros([], dtypes.int32), name="global_step")
             mom_op = momentum_lib.MomentumOptimizer(learning_rate=0.1,
                                                     momentum=0.9,
                                                     use_nesterov=True)
             opt_op = mom_op.minimize(cost, global_step, [var0, var1])
             variables.global_variables_initializer().run()
             for _ in range(1, 5):
                 opt_op.run()
                 var0_np, accum0_np = self._update_nesterov_momentum_numpy(
                     var0_np, accum0_np, var0_np * 0.8, 0.1, 0.9)
                 var1_np, accum1_np = self._update_nesterov_momentum_numpy(
                     var1_np, accum1_np, 0.9, 0.1, 0.9)
                 self.assertAllCloseAccordingToType(var0_np, var0.eval())
                 self.assertAllCloseAccordingToType(var1_np, var1.eval())
Example #14
0
    def testDynamicLossScaleWithSlots(self, strategy_fn):
        with strategy_fn().scope() as strategy:
            var = variables.Variable([1.0, 2.0])
            # An SGD optimizer with momentum has slot variables.
            opt = momentum.MomentumOptimizer(1.0, momentum=1.)
            initial_loss_scale = 2.
            loss_scale = loss_scale_module.DynamicLossScale(
                initial_loss_scale=initial_loss_scale,
                increment_period=1,
                multiplier=4)
            opt = loss_scale_optimizer.MixedPrecisionLossScaleOptimizer(
                opt, loss_scale)
            loss = lambda: var / strategy.num_replicas_in_sync
            run_fn = lambda: opt.minimize(loss, var_list=[var])
            run_op = strategy.experimental_run(run_fn)
            self.evaluate(variables.global_variables_initializer())
            self._run_if_in_graph_mode(run_op)
            # The momentum accumulator starts at 0 and the gradient is 1. The
            # accumulator is incremented by the gradient, so it is now 1. Then the
            # variable is subtracted by the accumulator, so the variable is subtracted
            # by 1.
            self.assertAllClose([0.0, 1.0], self.evaluate(var))
            self.assertEqual(self.evaluate(opt._loss_scale()),
                             initial_loss_scale * 4)

            run_op = strategy.experimental_run(run_fn)
            self._run_if_in_graph_mode(run_op)
            # The momentum accumulator was 1 before this step and the gradient is 1.
            # The accumulator is incremented by the gradient, so it is now 2. Then the
            # variable is subtracted by the accumulator, so the variable is subtracted
            # by 2.
            self.assertAllClose([-2., -1.], self.evaluate(var))
            self.assertEqual(self.evaluate(opt._loss_scale()),
                             initial_loss_scale * 16)
Example #15
0
 def test_ops_with_var_and_momentum(self):
     var_list = [
         deo.get_variable('sp_var', initializer=0.0, dim=2),
     ]
     opt_list = [
         momentum.MomentumOptimizer(0.1, 0.1),
     ]
     self.common_run_context(var_list, opt_list, name='momentum_test')
Example #16
0
def get_multiple_optimizers():
    return [
        adagrad.AdagradOptimizer(0.1),
        adam.AdamOptimizer(0.1),
        ftrl.FtrlOptimizer(0.1),
        momentum.MomentumOptimizer(0.1, 0.1),
        rmsprop.RMSPropOptimizer(0.1)
    ]
Example #17
0
 def body(x, label):
     logits = inference(x)
     loss = math_ops.reduce_mean(
         nn_ops.softmax_cross_entropy_with_logits_v2(
             logits=logits,
             labels=array_ops.stop_gradient(label)))
     return x, label, momentum.MomentumOptimizer(
         0.01, 0.9).minimize(loss)
Example #18
0
    def testPipelineCompareRecomputeDropout(self):
        def dataset_fn():
            dataset = tu.create_single_increasing_dataset(7, shape=[4, 4])

            def dataset_parser(value):
                img = value
                label = value[0][0] % 4
                return img, math_ops.cast(label, np.int32)

            dataset = dataset.map(dataset_parser)

            return dataset.batch(batch_size=2, drop_remainder=True)

        gradient_accumulation_count = 24
        repeat_count = 2
        optimizer = momentum.MomentumOptimizer(0.01, 0.98)

        def stage(x, name):
            with variable_scope.variable_scope(name, use_resource=True):
                weight = variable_scope.get_variable(
                    "w",
                    shape=[4, 4],
                    dtype=np.float32,
                    initializer=init_ops.ones_initializer())
            x = math_ops.matmul(x, weight)
            x = rand_ops.dropout(x, seed=[10, 10])
            return x

        def stage1(x, label):
            return stage(x, "s1"), label

        def stage2(x, label):
            return stage(x, "s2"), label

        def stage3(x, label):
            x = stage(x, "s3")
            logits = math_ops.reduce_sum(x, axis=[1])
            loss = math_ops.reduce_mean(
                nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                            labels=label))
            return loss

        def inputs_fn():
            with ops.device('cpu'):
                return []

        pipelining_test_util.PipelineTester.compare_pipeline_to_sharding(
            [stage1, stage2, stage3],
            inputs_fn, [10.01],
            repeat_count,
            gradient_accumulation_count,
            dataset_fn,
            optimizer,
            self,
            21458,
            recomp=True,
            schedule=pipelining_ops.PipelineSchedule.Grouped)
Example #19
0
 def body(x, label):
     logits = inference(x)
     loss = math_ops.reduce_mean(
         nn_ops.softmax_cross_entropy_with_logits_v2(
             logits=logits,
             labels=array_ops.stop_gradient(label)))
     opt = gradient_accumulation_optimizer.GradientAccumulationOptimizer(
         momentum.MomentumOptimizer(0.01, 0.9), 10)
     return x, label, opt.minimize(loss)
Example #20
0
  def testMinimizeWith2DIndiciesForEmbeddingLookup(self):
    var0 = resource_variable_ops.ResourceVariable(array_ops.ones([2, 2]))

    def loss():
      return math_ops.reduce_sum(embedding_ops.embedding_lookup(var0, [[1]]))

    opt = momentum_lib.MomentumOptimizer(learning_rate=1.0, momentum=0.0)
    sgd_op = opt.minimize(loss)
    self.evaluate(variables.global_variables_initializer())
    self.evaluate(sgd_op)
    self.assertAllCloseAccordingToType([[1, 1], [0, 0]], self.evaluate(var0))
Example #21
0
    def testReturningNonTensorRaisesError(self):
        optimizer = momentum.MomentumOptimizer(learning_rate=1.0, momentum=1.0)
        optimizer.apply_gradients = function.defun(optimizer.apply_gradients)
        v = resource_variable_ops.ResourceVariable(1.0)
        grad = backprop.implicit_grad(lambda v: v**2)(v)

        with self.assertRaisesRegexp(TypeError,
                                     '.*must return zero or more Tensors.*'):
            # TODO(akshayka): We might want to allow defun-ing Python functions
            # that return operations (and just execute the op instead of running it).
            optimizer.apply_gradients(grad)
Example #22
0
 def testLikeDistBeliefMom01(self):
   with self.cached_session():
     db_grad, db_out = self._dbParamsMom01()
     num_samples = len(db_grad)
     var0 = variables.Variable([0.0] * num_samples)
     grads0 = constant_op.constant([0.0] * num_samples)
     mom_opt = momentum_lib.MomentumOptimizer(learning_rate=0.1, momentum=0.1)
     mom_update = mom_opt.apply_gradients(zip([grads0], [var0]))
     variables.global_variables_initializer().run()
     for i in xrange(num_samples):
       mom_update.run(feed_dict={grads0: db_grad[i]})
       self.assertAllClose(np.array(db_out[i]), self.evaluate(var0))
Example #23
0
      def model(features):
        a = variable_scope.get_variable("a", initializer=1.0)

        def body(x):
          return a * x

        logits = ipu.loops.repeat(5, body, [features])
        loss = math_ops.reduce_sum(logits)
        optimizer = momentum.MomentumOptimizer(learning_rate=.001,
                                               momentum=0.9)
        grads_and_vars = optimizer.compute_gradients(loss)
        train_op = optimizer.apply_gradients(grads_and_vars)
        return a, loss, train_op
Example #24
0
 def _GetOptimizer(self, opt):
   if opt == "adagrad":
     return adagrad.AdagradOptimizer(learning_rate=1e-2)
   elif opt == "adam":
     return adam.AdamOptimizer(learning_rate=1e-2)
   elif opt == "rmsprop":
     return rmsprop.RMSPropOptimizer(learning_rate=1e-2)
   elif opt == "momentum":
     return momentum.MomentumOptimizer(learning_rate=1e-2, momentum=0.9)
   elif opt == "sgd":
     return gradient_descent.GradientDescentOptimizer(learning_rate=1e-2)
   else:
     raise ValueError("Unsupported optimizer: %s" % opt)
Example #25
0
  def testOptimizerInDefunWithCapturedVariable(self):
    v = resource_variable_ops.ResourceVariable(1.0)
    def loss():
      return v**2

    optimizer = momentum.MomentumOptimizer(learning_rate=1.0, momentum=1.0)

    @function.defun
    def train():
      grad = backprop.implicit_grad(loss)()
      optimizer.apply_gradients(grad)

    train()
    self.assertEqual(v.numpy(), -1.0)
Example #26
0
    def testOptimizerInDefun(self):
        def loss(v):
            return v**2

        optimizer = momentum.MomentumOptimizer(learning_rate=1.0, momentum=1.0)

        @function.defun
        def train():
            self.v = resource_variable_ops.ResourceVariable(1.0)
            grad = backprop.implicit_grad(loss)(self.v)
            optimizer.apply_gradients(grad)
            return self.v.read_value()

        value = train()
        self.assertEqual(value.numpy(), -1.0)
Example #27
0
 def testMinimizeSparseResourceVariable(self):
   for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
     with self.test_session():
       var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
       x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
       pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
       loss = pred * pred
       sgd_op = momentum_lib.MomentumOptimizer(
           learning_rate=1.0, momentum=0.0).minimize(loss)
       variables.global_variables_initializer().run()
       # Fetch params to validate initial values
       self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
       # Run 1 step of sgd
       sgd_op.run()
       # Validate updated params
       self.assertAllCloseAccordingToType(
           [[-111, -138]], var0.eval())
Example #28
0
  def testMinimizeSparseResourceVariable(self):
    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
      var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)

      # pylint: disable=cell-var-from-loop
      def loss():
        x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
        pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
        return pred * pred
      # pylint: enable=cell-var-from-loop

      opt = momentum_lib.MomentumOptimizer(learning_rate=1.0, momentum=0.0)
      sgd_op = opt.minimize(loss)
      self.evaluate(variables.global_variables_initializer())
      # Run 1 step of sgd
      self.evaluate(sgd_op)
      # Validate updated params
      self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0))
  def testMetagraph(self):
    with ops.Graph().as_default():
      with variable_scope.variable_scope("foo", use_resource=True):
        a = variable_scope.get_variable("a", initializer=10.0)

      momentum.MomentumOptimizer(
          learning_rate=0.001, momentum=0.1).minimize(
              a,
              colocate_gradients_with_ops=True,
              global_step=training_util.get_or_create_global_step())

      graph = ops.get_default_graph()
      meta_graph_def = saver.export_meta_graph(graph=graph)

    with ops.Graph().as_default():
      saver.import_meta_graph(meta_graph_def, import_scope="")
      meta_graph_two = saver.export_meta_graph(graph=graph)
    self.assertEqual(meta_graph_def, meta_graph_two)
Example #30
0
 def testIndexedSlicesGradient(self):
   with ops.Graph().as_default():
     embedding_matrix = tf.get_variable(
         "embedding_matrix", [5, 5],
         initializer=tf.random_normal_initializer())
     def Cond(it, _):
       return it < 5
     def Body(it, cost):
       embedding = embedding_ops.embedding_lookup(embedding_matrix + 0.0, [0])
       cost += tf.reduce_sum(embedding)
       return it + 1, cost
     _, cost = control_flow_ops.While(
         Cond, Body, [tf.constant(0), tf.constant(0.0)])
     optimizer = momentum.MomentumOptimizer(0.1, 0.9)
     train_op = optimizer.minimize(cost)
     with self.test_session() as sess:
       sess.run(tf.initialize_all_variables())
       for _ in range(10):
         sess.run([train_op])