Ejemplo n.º 1
0
    def testLearningRateDecayUsedInTwoFunctions(self):
        with context.eager_mode():
            a = variables.Variable([1., 2.], name='var')
            b = variables.Variable([1.], name='var')

            learning_rate_decay = learning_rate_schedule.InverseTimeDecay(
                0.5, decay_steps=1.0, decay_rate=0.5)
            opt = adam.Adam(learning_rate=learning_rate_decay)
            loss_a = lambda: 3 * a
            loss_b = lambda: 2 * b

            @def_function.function
            def fn_a():
                opt.minimize(loss_a, [a])
                return a

            @def_function.function
            def fn_b():
                opt.minimize(loss_b, [b])
                return b

            fn_a()
            fn_b()
Ejemplo n.º 2
0
 def testDeferredRestorationUsageEager(self):
   """An idiomatic eager execution example."""
   num_training_steps = 10
   checkpoint_directory = self.get_temp_dir()
   checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
   for training_continuation in range(3):
     model = MyModel()
     optimizer = adam.Adam(0.001)
     root = trackable_utils.Checkpoint(
         optimizer=optimizer, model=model)
     root.restore(checkpoint_management.latest_checkpoint(
         checkpoint_directory))
     for _ in range(num_training_steps):
       # TODO(allenl): Use a Dataset and serialize/checkpoint it.
       input_value = constant_op.constant([[3.]])
       with backprop.GradientTape() as tape:
         loss = model(input_value)
       variables = model.trainable_variables
       gradients = tape.gradient(loss, variables)
       optimizer.apply_gradients(zip(gradients, variables))
     root.save(file_prefix=checkpoint_prefix)
     self.assertEqual((training_continuation + 1) * num_training_steps,
                      root.optimizer.iterations.numpy())
Ejemplo n.º 3
0
    def testGettingAndSettingLearningRate(self, strategy_fn):
        with strategy_fn().scope() as strategy:
            var = variables.Variable([5.0])
            opt = adam.Adam(learning_rate=1.0)
            loss = lambda: var * 2.0
            run_fn = lambda: opt.minimize(loss, [var])
            run_op = strategy.experimental_run(run_fn)
            self.evaluate(variables.global_variables_initializer())
            self._run_if_in_graph_mode(run_op)

            lr = self.evaluate(opt.lr)
            self.assertEqual(1.0, lr)

            opt.lr = 2.0
            lr = self.evaluate(opt.lr)
            self.assertEqual(2.0, lr)

            self.evaluate(opt.lr.assign(3.0))
            lr = self.evaluate(opt.lr)
            self.assertEqual(3.0, lr)

            with self.assertRaises(AttributeError):
                opt.not_an_attr += 3
Ejemplo n.º 4
0
 def testDeferredRestorationUsageEager(self):
     """An idiomatic eager execution example."""
     num_training_steps = 10
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
     for training_continuation in range(3):
         model = MyModel()
         optimizer = adam.Adam(0.001)
         root = util.Checkpoint(
             optimizer=optimizer,
             model=model,
             optimizer_step=training_util.get_or_create_global_step())
         root.restore(
             checkpoint_management.latest_checkpoint(checkpoint_directory))
         for _ in range(num_training_steps):
             # TODO(allenl): Use a Dataset and serialize/checkpoint it.
             input_value = constant_op.constant([[3.]])
             optimizer.minimize(
                 lambda: model(input_value),  # pylint: disable=cell-var-from-loop
                 global_step=root.optimizer_step)
         root.save(file_prefix=checkpoint_prefix)
         self.assertEqual((training_continuation + 1) * num_training_steps,
                          root.optimizer_step.numpy())
Ejemplo n.º 5
0
 def testDeferredRestorationUsageEager(self):
   """An idiomatic eager execution example."""
   num_training_steps = 10
   checkpoint_directory = self.get_temp_dir()
   for training_continuation in range(3):
     with self.test_scope():
       model = Subclassed()
       optimizer = adam.Adam(0.001)
       root = checkpointable_utils.Checkpoint(
           optimizer=optimizer, model=model)
       manager = checkpoint_management.CheckpointManager(
           root, checkpoint_directory, max_to_keep=2)
       root.restore(manager.latest_checkpoint)
       for _ in range(num_training_steps):
         input_value = constant_op.constant([[3.]])
         with backprop.GradientTape() as tape:
           loss = model(input_value)
         variables = model.trainable_variables
         gradients = tape.gradient(loss, variables)
         optimizer.apply_gradients(zip(gradients, variables))
       manager.save()
       self.assertEqual((training_continuation + 1) * num_training_steps,
                        root.optimizer.iterations.numpy())
Ejemplo n.º 6
0
  def testTensorLearningRate(self):
    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
      with self.cached_session():
        # Initialize variables for numpy implementation.
        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

        var0 = variables.Variable(var0_np)
        var1 = variables.Variable(var1_np)
        grads0 = constant_op.constant(grads0_np)
        grads1 = constant_op.constant(grads1_np)
        opt = adam.Adam(constant_op.constant(0.001))
        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
        variables.global_variables_initializer().run()

        # Fetch params to validate initial values
        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
        self.assertAllClose([3.0, 4.0], self.evaluate(var1))

        beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype)
        # Run 3 steps of Adam
        for t in range(3):
          self.assertAllCloseAccordingToType(0.9**(t + 1),
                                             self.evaluate(beta_1_power))
          self.assertAllCloseAccordingToType(0.999**(t + 1),
                                             self.evaluate(beta_2_power))
          update.run()

          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)

          # Validate updated params
          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
Ejemplo n.º 7
0
 def testPartialRestoreWarningObject(self):
   with context.eager_mode():
     optimizer = adam.Adam(0.0)
     original_root = trackable_utils.Checkpoint(v1=variables_lib.Variable(2.),
                                                v2=variables_lib.Variable(3.),
                                                optimizer=optimizer)
     # Create a slot variable to save
     optimizer.minimize(original_root.v1.read_value, [original_root.v1])
     prefix = os.path.join(self.get_temp_dir(), "ckpt")
     save_path = original_root.save(prefix)
     partial_root = trackable_utils.Checkpoint(v1=variables_lib.Variable(0.))
     weak_partial_root = weakref.ref(partial_root)
     weak_v1 = weakref.ref(partial_root.v1)
     partial_root.restore(save_path)
     self.assertEqual(2., partial_root.v1.numpy())
     with test.mock.patch.object(logging, "warning") as mock_log:
       del partial_root
       self.assertIsNone(weak_partial_root())
       self.assertIsNone(weak_v1())
       messages = str(mock_log.call_args_list)
     self.assertIn("(root).v2'", messages)
     self.assertIn("(root).optimizer's state 'm' for (root).v1", messages)
     self.assertNotIn("(root).v1'", messages)
     self.assertIn("expect_partial()", messages)
  def test_getitem_slice_with_stop_and_ellipsis_only(self):
    if not context.executing_eagerly():
      self.skipTest('Complex slicing like this fails in v1')
    inp = keras.Input(shape=(4, 3, 8))
    slice_stop = keras.Input(shape=(), dtype='int32')

    out = inp[..., :slice_stop[0]]
    model = keras.Model(
        inputs=[inp, slice_stop],
        outputs=out)
    model.compile(
        adam.Adam(0.001),
        'mse',
        run_eagerly=testing_utils.should_run_eagerly())
    batch_size = 7
    stop = 6
    x = array_ops.stack([
        math_ops.range(8) for _ in range(batch_size)])
    args = [x, constant_op.constant(stop, shape=(batch_size,))]
    expected = array_ops.stack([
        math_ops.range(8)[:stop] for _ in range(batch_size)])

    if keras_tensor.keras_tensors_enabled():
      self.assertIn('tf.__operators__.getitem', (
          x.name for x in model.layers))
      self.assertNotIn('tf.strided_slice', (
          x.name for x in model.layers))
    self.assertAllEqual(model(args), expected)
    self.assertAllEqual(model.predict(args, batch_size=batch_size), expected)

    # Make sure it can be successfully saved and loaded
    config = model.get_config()
    model = keras.Model.from_config(config)

    self.assertAllEqual(model(args), expected)
    self.assertAllEqual(model.predict(args, batch_size=batch_size), expected)
Ejemplo n.º 9
0
    def testAgnosticUsage(self):
        """Graph/eager agnostic usage."""
        # Does create garbage when executing eagerly due to ops.Graph() creation.
        with self.test_session():
            num_training_steps = 10
            checkpoint_directory = self.get_temp_dir()
            optimizer = adam.Adam(0.001)

            def _train_fn(model, input_value):
                with backprop.GradientTape() as tape:
                    loss = model(input_value)
                variables = model.trainable_variables
                gradients = tape.gradient(loss, variables)
                return optimizer.apply_gradients(zip(gradients, variables))

            for training_continuation in range(3):
                with testing_utils.device(should_use_gpu=True):
                    model = MyModel()
                    root = trackable_utils.Checkpoint(optimizer=optimizer,
                                                      model=model)
                    manager = checkpoint_management.CheckpointManager(
                        root, checkpoint_directory, max_to_keep=1)
                    status = root.restore(save_path=manager.latest_checkpoint)
                    input_value = constant_op.constant([[3.]])
                    train_fn = functools.partial(_train_fn, model, input_value)
                    if not context.executing_eagerly():
                        train_fn = functools.partial(self.evaluate, train_fn())
                    status.initialize_or_restore()
                    for _ in range(num_training_steps):
                        train_fn()
                    manager.save()
                    self.assertEqual(
                        (training_continuation + 1) * num_training_steps,
                        self.evaluate(root.optimizer.iterations))
                    self.assertEqual(training_continuation + 1,
                                     self.evaluate(root.save_counter))
Ejemplo n.º 10
0
    def testDeferredSlotRestoration(self):
        checkpoint_directory = self.get_temp_dir()

        root = tracking.Checkpointable()
        root.var = util.add_variable(root, name="var", initializer=0.)
        optimizer = adam.Adam(0.1)
        if context.executing_eagerly():
            optimizer.minimize(root.var.read_value)
        else:
            train_op = optimizer.minimize(root.var)
            # Note that `optimizer` has not been added as a dependency of
            # `root`. Create a one-off grouping so that slot variables for `root.var`
            # get initialized too.
            self.evaluate(
                util.gather_initializers(
                    util.Checkpoint(root=root, optimizer=optimizer)))
            self.evaluate(train_op)
        self.evaluate(state_ops.assign(root.var, 12.))
        no_slots_path = util.CheckpointableSaver(root).save(
            os.path.join(checkpoint_directory, "no_slots"))
        root.optimizer = optimizer
        self.evaluate(state_ops.assign(root.var, 13.))
        self.evaluate(
            state_ops.assign(optimizer.get_slot(name="m", var=root.var), 14.))
        slots_path = util.CheckpointableSaver(root).save(
            os.path.join(checkpoint_directory, "with_slots"))
        new_root = tracking.Checkpointable()
        # Load the slot-containing checkpoint (deferred), then immediately overwrite
        # the non-slot variable (also deferred).
        slot_status = util.CheckpointableSaver(new_root).restore(slots_path)
        no_slot_status = util.CheckpointableSaver(new_root).restore(
            no_slots_path)
        with self.assertRaises(AssertionError):
            no_slot_status.assert_consumed()
        new_root.var = util.add_variable(new_root, name="var", shape=[])
        no_slot_status.assert_consumed()
        no_slot_status.run_restore_ops()
        self.assertEqual(12., self.evaluate(new_root.var))
        new_root.optimizer = adam.Adam(0.1)
        with self.assertRaisesRegexp(AssertionError, "beta_1_power"):
            slot_status.assert_consumed()
        self.assertEqual(12., self.evaluate(new_root.var))
        if context.executing_eagerly():
            # Slot variables are only created with restoring initializers when
            # executing eagerly.
            self.assertEqual(
                14.,
                self.evaluate(
                    new_root.optimizer.get_slot(name="m", var=new_root.var)))
        else:
            self.assertIs(
                new_root.optimizer.get_slot(name="m", var=new_root.var), None)
        if context.executing_eagerly():
            new_root.optimizer.minimize(new_root.var.read_value)
        else:
            train_op = new_root.optimizer.minimize(new_root.var)
            # The slot variable now exists; restore() didn't create it, but we should
            # now have a restore op for it.
            slot_status.run_restore_ops()
            self.assertEqual(
                14.,
                self.evaluate(
                    new_root.optimizer.get_slot(name="m", var=new_root.var)))
            self.evaluate(train_op)
        slot_status.assert_consumed()
    def test_complete_flow_with_mode(self, distribution,
                                     use_train_and_evaluate):
        label_dimension = 2
        input_dimension = label_dimension
        batch_size = 10
        data = np.linspace(0.,
                           2.,
                           batch_size * label_dimension,
                           dtype=np.float32)
        data = data.reshape(batch_size, label_dimension)
        train_input_fn = self.dataset_input_fn(
            x={'x': data},
            y=data,
            batch_size=batch_size // len(distribution.worker_devices))
        eval_input_fn = self.dataset_input_fn(x={'x': data},
                                              y=data,
                                              batch_size=batch_size //
                                              len(distribution.worker_devices))
        predict_input_fn = numpy_io.numpy_input_fn(x={'x': data},
                                                   batch_size=batch_size,
                                                   shuffle=False)

        linear_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        dnn_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        feature_columns = linear_feature_columns + dnn_feature_columns
        session_config = config_pb2.ConfigProto(log_device_placement=True,
                                                allow_soft_placement=True)
        estimator = dnn_linear_combined.DNNLinearCombinedRegressor(
            linear_feature_columns=linear_feature_columns,
            dnn_hidden_units=(2, 2),
            dnn_feature_columns=dnn_feature_columns,
            label_dimension=label_dimension,
            model_dir=self._model_dir,
            dnn_optimizer=adam.Adam(0.001),
            linear_optimizer=adam.Adam(0.001),
            config=run_config.RunConfig(train_distribute=distribution,
                                        eval_distribute=distribution,
                                        session_config=session_config))

        num_steps = 2
        if use_train_and_evaluate:
            scores, _ = training.train_and_evaluate(
                estimator,
                training.TrainSpec(train_input_fn, max_steps=num_steps),
                training.EvalSpec(eval_input_fn))
        else:
            estimator.train(train_input_fn, steps=num_steps)
            scores = estimator.evaluate(eval_input_fn)

        self.assertIn('loss', six.iterkeys(scores))

        predictions = np.array([
            x[prediction_keys.PredictionKeys.PREDICTIONS]
            for x in estimator.predict(predict_input_fn)
        ])
        self.assertAllEqual((batch_size, label_dimension), predictions.shape)

        feature_spec = feature_column.make_parse_example_spec(feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                                 serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
Ejemplo n.º 12
0
    def create_model(self):
        """
        定义CNN/LSTM/CTC模型,使用函数式模型
        输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s)
        隐藏层:卷积池化层,卷积核大小为3x3,池化窗口大小为2
        隐藏层:全连接层
        输出层:全连接层,神经元数量为self.ms_output_size,使用softmax作为激活函数,
        CTC层:使用CTC的loss作为损失函数,实现连接性时序多输出
        :return:
        """
        input_data = Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1))

        layer_h1 = Conv2D(32, (3, 3), use_bias=False, activation='relu', padding='same',
                          kernel_initializer='he_normal')(input_data)  # 卷积层
        layer_h1 = Dropout(0.05)(layer_h1)
        layer_h2 = Conv2D(32, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(
            layer_h1)  # 卷积层
        layer_h3 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h2)  # 池化层
        # layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合
        layer_h3 = Dropout(0.05)(layer_h3)
        layer_h4 = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(
            layer_h3)  # 卷积层
        layer_h4 = Dropout(0.1)(layer_h4)
        layer_h5 = Conv2D(64, (3, 3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(
            layer_h4)  # 卷积层
        layer_h6 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h5)  # 池化层

        layer_h6 = Dropout(0.1)(layer_h6)
        layer_h7 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same',
                          kernel_initializer='he_normal')(layer_h6)  # 卷积层
        layer_h7 = Dropout(0.15)(layer_h7)
        layer_h8 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same',
                          kernel_initializer='he_normal')(layer_h7)  # 卷积层
        layer_h9 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h8)  # 池化层

        layer_h9 = Dropout(0.15)(layer_h9)
        layer_h10 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same',
                           kernel_initializer='he_normal')(layer_h9)  # 卷积层
        layer_h10 = Dropout(0.2)(layer_h10)
        layer_h11 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same',
                           kernel_initializer='he_normal')(layer_h10)  # 卷积层
        layer_h12 = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h11)  # 池化层

        layer_h12 = Dropout(0.2)(layer_h12)
        layer_h13 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same',
                           kernel_initializer='he_normal')(layer_h12)  # 卷积层
        layer_h13 = Dropout(0.2)(layer_h13)
        layer_h14 = Conv2D(128, (3, 3), use_bias=True, activation='relu', padding='same',
                           kernel_initializer='he_normal')(layer_h13)  # 卷积层
        layer_h15 = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h14)  # 池化层

        layer_h16 = Reshape((200, 3200))(layer_h15)  # Reshape层
        layer_h16 = Dropout(0.3)(layer_h16)
        layer_h17 = Dense(128, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h16)  # 全连接层
        layer_h17 = Dropout(0.3)(layer_h17)
        layer_h18 = Dense(self.ms_output_size, use_bias=True, kernel_initializer='he_normal')(layer_h17)  # 全连接层

        y_pred = Activation('softmax', name='Activation0')(layer_h18)
        model_data = Model(inputs=input_data, outputs=y_pred)
        # model_data.summary()

        labels = Input(name='the_labels', shape=[self.label_max_string_length], dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')
        # Keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer
        loss_out = Lambda(self.ctc_lambda_func, output_shape=(1,), name='ctc')(
            [y_pred, labels, input_length, label_length])

        model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)
        # model.summary()

        # clip norm seems to speeds up convergence
        opt = Adam_v2.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, decay=0.0, epsilon=10e-8)
        model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=opt)

        # logger.debug('Create Model Successful, Compiles Model Successful. ')
        return model, model_data
Ejemplo n.º 13
0
    def _benchmark(self, gradient_type, num_gpus, mode, loss_scaling):
        """Benchmarks loss scaling.

    We run a simple model with several scalar variables. The loss is the sum of
    all variables. The model is simple because we want to measure only the
    performance of loss scaling, not the performance of the model itself.

    Args:
      gradient_type: "optimizer" or "gradient_tape". How gradients are computed.
        "optimizer" uses Optimizer.minimize. "gradient_tape" uses
        GradientTape.gradient along with LossScaleOptimizer.get_scaled_loss and
        LossScaleOptimizer.get_unscaled_gradients.
      num_gpus: The number of GPUs to use. Must be at least 1.
      mode: "eager" or "tf_function". "tf_function" causes all computations to
        be wrapped in a tf.function, while "eager" runs computations eagerly.
      loss_scaling: "fixed", "dynamic", or None. The type of loss scaling to
        use. None means use no loss scaling, which is useful as a baseline to
        see how much slower loss scaling is in comparison.
    """
        ls_str = loss_scaling or 'no_loss_scaling'
        name = '%s_%d_GPU_%s_%s' % (gradient_type, num_gpus, mode, ls_str)
        with context.eager_mode(), _get_strategy(num_gpus).scope() as strategy:
            opt = adam.Adam()
            if loss_scaling == 'fixed':
                loss_scale = loss_scale_module.FixedLossScale(2.)
            elif loss_scaling == 'dynamic':
                # Make increment_period so high that it's effectively infinite. This
                # means the loss scale will never change. Any performance overhead
                # from increasing/decreasing the loss scale is typically negligible
                # since it happens infrequently, so we only benchmark the common case
                # of the loss scale not changing.
                increment_period = 1000000
                loss_scale = loss_scale_module.DynamicLossScale(
                    initial_loss_scale=2., increment_period=increment_period)
            else:
                assert loss_scaling is None
                loss_scale = None
            if loss_scale:
                opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)

            num_vars = 200
            num_warmup_iters = 1
            num_iters = 20
            # By using scalar variables, we reduce overhead of the actual GPU work of
            # multiplying variables, dividing gradients, and checking gradients for
            # NaNs. Measuring these overheads isn't very useful as there is little we
            # can do to reduce them (one such way would be to fuse dividing gradients
            # and checking them for NaNs). We still have all other overheads, such as
            # all-reducing the `is_finite` values and having a tf.cond or
            # tf.while_loop based on whether gradients are NaNs. Currently, these
            # other overheads are much more significant than the GPU work.
            var_list = [
                variables.Variable(i, dtype='float32') for i in range(num_vars)
            ]

            def get_loss():
                return math_ops.add_n(var_list)

            if gradient_type == 'gradient_tape':
                if loss_scale is None:

                    def minimize_fn():
                        with backprop.GradientTape() as tape:
                            loss = get_loss()
                        grads = tape.gradient(loss, var_list)
                        return opt.apply_gradients(zip(grads, var_list))
                else:

                    def minimize_fn():
                        with backprop.GradientTape() as tape:
                            loss = get_loss()
                            scaled_loss = opt.get_scaled_loss(loss)
                        scaled_grads = tape.gradient(scaled_loss, var_list)
                        grads = opt.get_unscaled_gradients(scaled_grads)
                        return opt.apply_gradients(zip(grads, var_list))
            else:
                assert gradient_type == 'optimizer'

                def minimize_fn():
                    return opt.minimize(get_loss, var_list)

            def run_fn():
                strategy.run(minimize_fn)

            if mode == 'tf_function':
                run_fn = def_function.function(run_fn)

            for _ in range(num_warmup_iters):
                run_fn()

            start = time.time()
            for _ in range(num_iters):
                run_fn()
            end = time.time()
            self.report_benchmark(iters=num_iters,
                                  wall_time=(end - start) / num_iters,
                                  name=name)
Ejemplo n.º 14
0
    def test_trackable_save_restore(self):
        with self.test_session():

            def _templated():
                v = variable_scope.get_variable(
                    "v",
                    shape=[1],
                    initializer=init_ops.zeros_initializer(),
                    use_resource=True)
                v2 = variable_scope.get_variable(
                    "v2",
                    shape=[1],
                    initializer=init_ops.zeros_initializer(),
                    use_resource=True)
                manual = _ManualScope()
                return v, v + 1., v2, manual, manual()

            save_template = template.make_template("s1", _templated)
            v1_save, _, v2_save, manual_scope, manual_scope_v = save_template()
            six.assertCountEqual(
                self, [
                    id(v1_save),
                    id(v2_save),
                    id(manual_scope),
                    id(manual_scope_v),
                    id(save_template)
                ], map(id, trackable_utils.list_objects(save_template)))
            manual_dep, = manual_scope._checkpoint_dependencies
            self.assertEqual("in_manual_scope", manual_dep.name)
            self.assertIs(manual_scope_v, manual_dep.ref)
            optimizer = adam.Adam(0.0)
            save_root = trackable_utils.Checkpoint(my_template=save_template,
                                                   optimizer=optimizer)
            optimizer.minimize(v1_save.read_value, var_list=[v1_save])
            self.evaluate([v.initializer for v in save_template.variables])
            optimizer_variables = optimizer.variables() + list(
                optimizer._hyper.values())
            self.evaluate([v.initializer for v in optimizer_variables])
            self.evaluate(v1_save.assign([12.]))
            self.evaluate(v2_save.assign([14.]))
            checkpoint_directory = self.get_temp_dir()
            checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
            save_path = save_root.save(checkpoint_prefix)

            load_template = template.make_template("s2", _templated)
            load_optimizer = adam.Adam(0.0)
            load_root = trackable_utils.Checkpoint(my_template=load_template,
                                                   optimizer=load_optimizer)
            status = load_root.restore(save_path)
            var, var_plus_one, var2, _, _ = load_template()
            load_optimizer.minimize(var.read_value, var_list=[var])
            self.assertLen(load_template._checkpoint_dependencies, 3)
            self.assertEqual("v",
                             load_template._checkpoint_dependencies[0].name)
            self.assertEqual("v2",
                             load_template._checkpoint_dependencies[1].name)
            self.assertEqual("ManualScope",
                             load_template._checkpoint_dependencies[2].name)
            status.assert_consumed().run_restore_ops()
            self.assertAllEqual([12.], self.evaluate(var))
            self.assertAllEqual([13.], self.evaluate(var_plus_one))
            self.assertAllEqual([14.], self.evaluate(var2))
Ejemplo n.º 15
0
 def testNamingWithOptimizer(self):
     input_value = constant_op.constant([[3.]])
     model = MyModel()
     # A nuisance Model using the same optimizer. Its slot variables should not
     # go in the checkpoint, since it is never depended on.
     other_model = MyModel()
     optimizer = adam.Adam(0.001)
     optimizer_step = training_util.get_or_create_global_step()
     root_checkpointable = util.Checkpoint(optimizer=optimizer,
                                           model=model,
                                           optimizer_step=optimizer_step)
     if context.executing_eagerly():
         optimizer.minimize(lambda: model(input_value),
                            global_step=optimizer_step)
         optimizer.minimize(lambda: other_model(input_value),
                            global_step=optimizer_step)
     else:
         train_op = optimizer.minimize(model(input_value),
                                       global_step=optimizer_step)
         optimizer.minimize(other_model(input_value),
                            global_step=optimizer_step)
         self.evaluate(util.gather_initializers(root_checkpointable))
         self.evaluate(train_op)
     named_variables, serialized_graph, _ = (util._serialize_object_graph(
         root_checkpointable, saveables_cache=None))
     expected_checkpoint_names = (
         # Created in the root node, so no prefix.
         "optimizer_step",
         "model/_second/kernel",
         "model/_named_dense/kernel",
         "model/_named_dense/bias",
         # non-Layer dependency of the model
         "model/_non_layer/a_variable",
         # The optimizer creates two non-slot variables
         "optimizer/beta_1_power",
         "optimizer/beta_2_power",
         # Slot variables
         "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m",
         "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v",
         "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m",
         "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v",
         "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m",
         "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v",
     )
     suffix = "/.ATTRIBUTES/VARIABLE_VALUE"
     expected_checkpoint_names = [
         name + suffix for name in expected_checkpoint_names
     ]
     # The Dense layers also save get_config() JSON
     expected_checkpoint_names.extend([
         "model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON",
         "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON"
     ])
     named_variables = {v.name: v for v in named_variables}
     six.assertCountEqual(self, expected_checkpoint_names,
                          named_variables.keys())
     # Check that we've mapped to the right variable objects (not exhaustive)
     self.assertEqual("global_step",
                      named_variables["optimizer_step" + suffix].full_name)
     self.assertEqual(
         "my_model/dense_1/kernel",
         named_variables["model/_second/kernel" + suffix].full_name)
     self.assertEqual(
         "my_model/dense/kernel",
         named_variables["model/_named_dense/kernel" + suffix].full_name)
     self.assertEqual(
         "beta_1_power",
         named_variables["optimizer/beta_1_power" + suffix].full_name)
     self.assertEqual(
         "beta_2_power",
         named_variables["optimizer/beta_2_power" + suffix].full_name)
     # Spot check the generated protocol buffers.
     self.assertEqual("optimizer",
                      serialized_graph.nodes[0].children[1].local_name)
     optimizer_node = serialized_graph.nodes[
         serialized_graph.nodes[0].children[1].node_id]
     self.assertEqual("beta_1_power", optimizer_node.children[0].local_name)
     self.assertEqual(
         "beta_1_power", serialized_graph.nodes[
             optimizer_node.children[0].node_id].attributes[0].full_name)
     self.assertEqual(
         "my_model/dense/kernel",
         serialized_graph.nodes[optimizer_node.slot_variables[
             0].original_variable_node_id].attributes[0].full_name)
     # We strip off the :0 suffix, as variable.name-based saving does.
     self.assertEqual(
         "my_model/dense/kernel/Adam",
         serialized_graph.nodes[optimizer_node.slot_variables[
             0].slot_variable_node_id].attributes[0].full_name)
     self.assertEqual(
         "my_model/dense/kernel/Adam:0",
         optimizer.get_slot(var=model._named_dense.kernel, name="m").name)
     self.assertEqual(
         "model/_named_dense/kernel" + suffix,
         serialized_graph.nodes[optimizer_node.slot_variables[
             0].original_variable_node_id].attributes[0].checkpoint_key)
     self.assertEqual("m", optimizer_node.slot_variables[0].slot_name)
     self.assertEqual(
         "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix,
         serialized_graph.nodes[optimizer_node.slot_variables[
             0].slot_variable_node_id].attributes[0].checkpoint_key)
Ejemplo n.º 16
0
 def __init__(self):
   super(_HasOptimizer, self).__init__()
   self.layer = core.Dense(1)
   self.optimizer = adam.Adam(0.01)
Ejemplo n.º 17
0
 def testSaveRestore(self):
     model = MyModel()
     optimizer = adam.Adam(0.001)
     root_checkpointable = util.Checkpoint(optimizer=optimizer, model=model)
     input_value = constant_op.constant([[3.]])
     if context.executing_eagerly():
         optimizer.minimize(lambda: model(input_value))
     else:
         train_op = optimizer.minimize(model(input_value))
         # TODO(allenl): Make initialization more pleasant when graph building.
         root_checkpointable.save_counter  # pylint: disable=pointless-statement
         self.evaluate(util.gather_initializers(root_checkpointable))
         self.evaluate(train_op)
     prefix = os.path.join(self.get_temp_dir(), "ckpt")
     self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.]))
     m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m")
     self.evaluate(state_ops.assign(m_bias_slot, [1.5]))
     save_path = root_checkpointable.save(file_prefix=prefix)
     self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.]))
     self.evaluate(state_ops.assign(root_checkpointable.save_counter, 3))
     optimizer_variables = self.evaluate(optimizer.variables())
     self.evaluate(state_ops.assign(m_bias_slot, [-2.]))
     # Immediate restoration
     status = root_checkpointable.restore(
         save_path=save_path).assert_consumed()
     status.run_restore_ops()
     self.assertAllEqual([42.],
                         self.evaluate(model._named_dense.variables[1]))
     self.assertAllEqual(1, self.evaluate(root_checkpointable.save_counter))
     self.assertAllEqual([1.5], self.evaluate(m_bias_slot))
     if not context.executing_eagerly():
         return  # Restore-on-create is only supported when executing eagerly
     on_create_model = MyModel()
     on_create_optimizer = adam.Adam(
         0.001,
         # Preserve beta_1_power and beta_2_power when appying gradients
         # so we can test that they've been restored correctly.
         beta_1=1.0,
         beta_2=1.0)
     on_create_root = util.Checkpoint(optimizer=on_create_optimizer,
                                      model=on_create_model)
     # Deferred restoration
     status = on_create_root.restore(save_path=save_path)
     on_create_model(constant_op.constant([[3.]]))  # create variables
     self.assertAllEqual(1, self.evaluate(on_create_root.save_counter))
     self.assertAllEqual([42.],
                         self.evaluate(
                             on_create_model._named_dense.variables[1]))
     on_create_m_bias_slot = on_create_optimizer.get_slot(
         on_create_model._named_dense.variables[1], "m")
     # Optimizer slot variables are created when the original variable is
     # restored.
     self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot))
     self.assertAllEqual(optimizer_variables[2:],
                         self.evaluate(on_create_optimizer.variables()))
     dummy_var = resource_variable_ops.ResourceVariable([1.])
     on_create_optimizer.minimize(loss=dummy_var.read_value)
     status.assert_consumed()
     beta_1_power, beta_2_power = on_create_optimizer._get_beta_accumulators(
     )
     self.assertAllEqual(optimizer_variables[0],
                         self.evaluate(beta_1_power))
     self.assertAllEqual(optimizer_variables[1],
                         self.evaluate(beta_2_power))
Ejemplo n.º 18
0
# Hyper-paratmeters of optimizer in graph.
HP_IN_GRAPH = {
    'Adam': ['decay', 'learning_rate'],
    'Ftrl': [
        'decay', 'l1_regularization_strength', 'l2_regularization_strength',
        'learning_rate', 'learning_rate_power'
    ],
    'RMSProp': ['decay', 'learning_rate', 'momentum', 'rho'],
    'Adagrad': ['decay', 'learning_rate'],
    'SGD': ['decay', 'learning_rate', 'momentum'],
}

# optimizer v2 instance.
OPT_V2_INSTANCE = {
    'Adagrad': adagrad.Adagrad(),
    'Adam': adam.Adam(),
    'Ftrl': ftrl.Ftrl(),
    'RMSProp': rmsprop.RMSprop(),
    'SGD': gradient_descent.SGD(),
}


def _add_new_variable(initial_value, var_name_v2, var_name_v1, var_map,
                      var_names_map):
    """Creates a new variable and add it to the variable maps."""
    var = tf.Variable(initial_value, name=var_name_v2)
    var_map[var_name_v2] = var
    var_names_map[var_name_v2] = var_name_v1


def _add_opt_variable(opt_name_v2, var_name_v1, idx, suffix_v2, reader,
Ejemplo n.º 19
0
 def testAdamCompatibility(self):
     opt_v1 = optimizers.Adam()
     opt_v2 = adam.Adam()
     self._testOptimizersCompatibility(opt_v1, opt_v2)
Ejemplo n.º 20
0
    def testMultipleGraphsNonSlotVariables(self):
        with context.graph_mode():
            checkpoint_directory = self.get_temp_dir()
            checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
            optimizer = adam.Adam(0.001)
            # Construct a model in one graph
            first_graph = ops.Graph()
            first_session = session_lib.Session(graph=first_graph)
            with first_graph.as_default(), first_session.as_default():
                first_variable = resource_variable_ops.ResourceVariable([1.])
                first_root_checkpointable = util.Checkpoint(
                    optimizer=optimizer, variable=first_variable)
                train_op = optimizer.minimize(first_variable.read_value)
                self.evaluate(
                    util.gather_initializers(first_root_checkpointable))
                self.evaluate(train_op)
                self.evaluate(first_variable.assign([1.]))
                self.evaluate(
                    optimizer.get_slot(var=first_variable,
                                       name="m").assign([2.]))
                beta_1_power, _ = optimizer._get_beta_accumulators()
                self.evaluate(beta_1_power.assign(3.))

            # Save and load in a second graph
            second_graph = ops.Graph()
            with second_graph.as_default(), session_lib.Session(
                    graph=second_graph):
                second_variable = resource_variable_ops.ResourceVariable([1.])
                second_root_checkpointable = util.Checkpoint(
                    optimizer=optimizer, variable=second_variable)
                train_op = optimizer.minimize(second_variable.read_value)
                second_root_checkpointable.restore(
                    None).initialize_or_restore()
                self.evaluate(train_op)
                self.evaluate(second_variable.assign([4.]))
                self.evaluate(
                    optimizer.get_slot(var=second_variable,
                                       name="m").assign([5.]))
                beta_1_power, _ = optimizer._get_beta_accumulators()
                self.evaluate(beta_1_power.assign(6.))
                save_path = second_root_checkpointable.save(checkpoint_prefix)
                self.evaluate(second_variable.assign([7.]))
                self.evaluate(
                    optimizer.get_slot(var=second_variable,
                                       name="m").assign([8.]))
                beta_1_power, _ = optimizer._get_beta_accumulators()
                self.assertAllEqual(6., self.evaluate(beta_1_power))
                status = second_root_checkpointable.restore(save_path)
                status.assert_consumed().run_restore_ops()
                self.assertAllEqual([4.], self.evaluate(second_variable))
                self.assertAllEqual([5.],
                                    self.evaluate(
                                        optimizer.get_slot(var=second_variable,
                                                           name="m")))
                beta_1_power, _ = optimizer._get_beta_accumulators()
                self.assertAllEqual(6., self.evaluate(beta_1_power))

            # Check that the first graph is unmolested
            with first_graph.as_default(), first_session.as_default():
                self.assertAllEqual([1.], self.evaluate(first_variable))
                self.assertAllEqual([2.],
                                    self.evaluate(
                                        optimizer.get_slot(var=first_variable,
                                                           name="m")))
                beta_1_power, _ = optimizer._get_beta_accumulators()
                self.assertAllEqual(3., self.evaluate(beta_1_power))
Ejemplo n.º 21
0
    def testBasicWithLearningRateInverseTimeDecay(self):
        # TODO(tanzheny, omalleyt): Fix test in eager mode.
        for i, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            with ops.Graph().as_default(), self.cached_session(use_gpu=True):
                # Initialize variables for numpy implementation.
                m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                var0 = resource_variable_ops.ResourceVariable(var0_np,
                                                              name="var0_%d" %
                                                              i)
                var1 = resource_variable_ops.ResourceVariable(var1_np,
                                                              name="var1_%d" %
                                                              i)
                grads0 = constant_op.constant(grads0_np)
                grads1 = constant_op.constant(grads1_np)

                learning_rate = 0.001
                decay = 0.5
                lr_schedule = learning_rate_schedule.InverseTimeDecay(
                    learning_rate, decay_steps=1.0, decay_rate=decay)
                beta_1 = 0.9
                beta_2 = 0.999
                epsilon = 1e-7

                opt = adam.Adam(learning_rate=lr_schedule,
                                beta_1=beta_1,
                                beta_2=beta_2,
                                epsilon=epsilon)
                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))

                self.evaluate(variables.global_variables_initializer())
                # Run 3 steps of Adam
                for t in range(3):
                    self.evaluate(update)

                    lr_np = learning_rate / (1 + decay * t)

                    var0_np, m0, v0 = adam_update_numpy(var0_np,
                                                        grads0_np,
                                                        t,
                                                        m0,
                                                        v0,
                                                        lr=lr_np)
                    var1_np, m1, v1 = adam_update_numpy(var1_np,
                                                        grads1_np,
                                                        t,
                                                        m1,
                                                        v1,
                                                        lr=lr_np)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))
Ejemplo n.º 22
0
    def doTestBasic(self, use_resource=False):
        for i, dtype in enumerate(
            [dtypes.half, dtypes.float32, dtypes.float64]):
            with self.session(graph=ops.Graph()):
                # Initialize variables for numpy implementation.
                m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
                var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
                grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
                var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
                grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)

                if use_resource:
                    var0 = resource_variable_ops.ResourceVariable(
                        var0_np, name="var0_%d" % i)
                    var1 = resource_variable_ops.ResourceVariable(
                        var1_np, name="var1_%d" % i)
                else:
                    var0 = variables.Variable(var0_np)
                    var1 = variables.Variable(var1_np)
                grads0 = constant_op.constant(grads0_np)
                grads1 = constant_op.constant(grads1_np)

                opt = adam.Adam()
                update = opt.apply_gradients(
                    zip([grads0, grads1], [var0, var1]))
                opt_variables = opt.variables()
                beta1_power, beta2_power = opt._get_beta_accumulators()
                self.assertTrue(beta1_power is not None)
                self.assertTrue(beta2_power is not None)
                self.assertIn(beta1_power, opt_variables)
                self.assertIn(beta2_power, opt_variables)

                with ops.Graph().as_default():
                    # Shouldn't return non-slot variables from other graphs.
                    self.assertEqual(0, len(opt.variables()))

                if not context.executing_eagerly():
                    self.evaluate(variables.global_variables_initializer())
                    # Fetch params to validate initial values
                    self.assertAllClose([1.0, 2.0], self.evaluate(var0))
                    self.assertAllClose([3.0, 4.0], self.evaluate(var1))

                beta1_power, beta2_power = opt._get_beta_accumulators()

                # Run 3 steps of Adam
                for t in range(1, 4):
                    if not context.executing_eagerly():
                        self.evaluate(update)
                    elif t > 1:
                        opt.apply_gradients(zip([grads0, grads1],
                                                [var0, var1]))

                    self.assertAllCloseAccordingToType(
                        0.9**(t + 1), self.evaluate(beta1_power))
                    self.assertAllCloseAccordingToType(
                        0.999**(t + 1), self.evaluate(beta2_power))

                    var0_np, m0, v0 = adam_update_numpy(
                        var0_np, grads0_np, t, m0, v0)
                    var1_np, m1, v1 = adam_update_numpy(
                        var1_np, grads1_np, t, m1, v1)

                    # Validate updated params
                    self.assertAllCloseAccordingToType(var0_np,
                                                       self.evaluate(var0))
                    self.assertAllCloseAccordingToType(var1_np,
                                                       self.evaluate(var1))
                    if use_resource:
                        self.assertEqual("var0_%d/Adam:0" % (i, ),
                                         opt.get_slot(var=var0, name="m").name)
Ejemplo n.º 23
0
 def testAmsgradWithError(self):
   with self.assertRaisesRegexp(ValueError,
                                "Amsgrad is currently not supported"):
     adam.Adam(learning_rate=1., beta_1=0.9, beta_2=0.99, amsgrad=True)
Ejemplo n.º 24
0
    def testDeferredSlotRestoration(self):
        with self.test_session():
            checkpoint_directory = self.get_temp_dir()

            root = trackable_utils.Checkpoint()
            root.var = trackable_utils.add_variable(root,
                                                    name="var",
                                                    initializer=0.)
            optimizer = adam.Adam(0.1)
            variables = [root.var]
            gradients = [1.]
            train_op = optimizer.apply_gradients(zip(gradients, variables))
            # Note that `optimizer` has not been added as a dependency of
            # `root`. Create a one-off grouping so that slot variables for `root.var`
            # get initialized too.
            self.evaluate(
                trackable_utils.gather_initializers(
                    trackable_utils.Checkpoint(root=root,
                                               optimizer=optimizer)))
            self.evaluate(train_op)
            self.evaluate(state_ops.assign(root.var, 12.))
            no_slots_path = root.save(
                os.path.join(checkpoint_directory, "no_slots"))
            root.optimizer = optimizer
            self.evaluate(state_ops.assign(root.var, 13.))
            self.evaluate(
                state_ops.assign(
                    optimizer.get_slot(slot_name="m", var=root.var), 14.))
            slots_path = root.save(
                os.path.join(checkpoint_directory, "with_slots"))
            new_root = trackable_utils.Checkpoint()
            # Load the slot-containing checkpoint (deferred), then immediately
            # overwrite the non-slot variable (also deferred).
            slot_status = new_root.restore(slots_path)
            no_slot_status = new_root.restore(no_slots_path)
            with self.assertRaises(AssertionError):
                no_slot_status.assert_consumed()
            new_root.var = trackable_utils.add_variable(new_root,
                                                        name="var",
                                                        shape=[])
            no_slot_status.assert_consumed()
            no_slot_status.run_restore_ops()
            self.assertEqual(12., self.evaluate(new_root.var))
            new_root.optimizer = adam.Adam(0.1)
            slot_status.assert_existing_objects_matched()
            if not context.executing_eagerly():
                with self.assertRaisesRegex(AssertionError,
                                            "Unresolved object"):
                    slot_status.assert_consumed()
            self.assertEqual(12., self.evaluate(new_root.var))
            if context.executing_eagerly():
                # Slot variables are only created with restoring initializers when
                # executing eagerly.
                self.assertEqual(
                    14.,
                    self.evaluate(
                        new_root.optimizer.get_slot(slot_name="m",
                                                    var=new_root.var)))
            else:
                # Slot variables are not created eagerly when graph building.
                with self.assertRaises(KeyError):
                    new_root.optimizer.get_slot(slot_name="m",
                                                var=new_root.var)
            variables = [new_root.var]
            gradients = [1.]
            train_op = new_root.optimizer.apply_gradients(
                zip(gradients, variables))
            # The slot variable now exists; restore() didn't create it, but we should
            # now have a restore op for it.
            slot_status.run_restore_ops()
            if not context.executing_eagerly():
                # The train op hasn't run when graph building, so the slot variable has
                # its restored value. It has run in eager, so the value will
                # be different.
                self.assertEqual(
                    14.,
                    self.evaluate(
                        new_root.optimizer.get_slot(slot_name="m",
                                                    var=new_root.var)))
            self.evaluate(train_op)
            slot_status.assert_consumed()
Ejemplo n.º 25
0
 def __init__(self):
     self.dense = core.Dense(1)
     self.optimizer = adam.Adam(0.01)
Ejemplo n.º 26
0
    def test_initialize_if_not_restoring(self):
        with self.test_session():
            checkpoint_directory = self.get_temp_dir()
            checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
            optimizer_only_prefix = os.path.join(checkpoint_directory, "opt")
            with testing_utils.device(should_use_gpu=True):
                model = MyModel()
                optimizer = adam.Adam(0.001)
                root = trackable_utils.Checkpoint(
                    model=model
                )  # Do not save the optimizer with the checkpoint.
                optimizer_checkpoint = trackable_utils.Checkpoint(
                    optimizer=optimizer)

                checkpoint_path = checkpoint_management.latest_checkpoint(
                    checkpoint_directory)
                status = root.restore(save_path=checkpoint_path)
                input_value = constant_op.constant([[3.]])

                def train_fn():
                    with backprop.GradientTape() as tape:
                        loss = model(input_value)
                    variables = model.trainable_variables
                    gradients = tape.gradient(loss, variables)
                    return optimizer.apply_gradients(zip(gradients, variables))

                if not context.executing_eagerly():
                    train_fn = functools.partial(self.evaluate, train_fn())
                status.initialize_or_restore()
                # TODO(tanzheny): Add hyper variables to .variables(), and set them with
                # set_weights etc.
                variables_not_in_the_variables_property = [
                    obj for obj in optimizer._hyper.values()
                    if isinstance(obj, variables_lib.Variable)
                ]
                self.evaluate([
                    v.initializer for v in optimizer.variables() +
                    variables_not_in_the_variables_property
                ])
                train_fn()
                model_save_path = root.save(file_prefix=checkpoint_prefix)
                self.evaluate(optimizer.beta_1.assign(42.))
                optimizer_save_path = optimizer_checkpoint.save(
                    optimizer_only_prefix)
            del train_fn

            # Restore into a graph with the optimizer
            with testing_utils.device(should_use_gpu=True):
                model = MyModel()
                optimizer = adam.Adam(0.001)
                root = trackable_utils.Checkpoint(optimizer=optimizer,
                                                  model=model)
                status = root.restore(save_path=model_save_path)
                input_value = constant_op.constant([[3.]])

                def train_fn1():
                    with backprop.GradientTape() as tape:
                        loss = model(input_value)
                    variables = model.trainable_variables
                    gradients = tape.gradient(loss, variables)
                    return optimizer.apply_gradients(zip(gradients, variables))

                if not context.executing_eagerly():
                    train_fn1 = functools.partial(self.evaluate, train_fn1())
                status.initialize_or_restore()
                train_fn1()
                with self.assertRaises(AssertionError):
                    status.assert_existing_objects_matched()
                with self.assertRaises(AssertionError):
                    status.assert_consumed()
            del train_fn1

            # Make sure initialization doesn't clobber later restores
            with testing_utils.device(should_use_gpu=True):
                model = MyModel()
                optimizer = adam.Adam(0.001, beta_1=1.0)
                root = trackable_utils.Checkpoint(optimizer=optimizer,
                                                  model=model)
                opt_root = trackable_utils.Checkpoint(optimizer=optimizer)
                status = root.restore(save_path=model_save_path)
                init_only_optimizer_status = opt_root.restore(save_path=None)
                optimizer_status = opt_root.restore(
                    save_path=optimizer_save_path)
                input_value = constant_op.constant([[3.]])

                def train_fn2():
                    with backprop.GradientTape() as tape:
                        loss = model(input_value)
                    variables = model.trainable_variables
                    gradients = tape.gradient(loss, variables)
                    return optimizer.apply_gradients(zip(gradients, variables))

                if not context.executing_eagerly():
                    train_fn2 = functools.partial(self.evaluate, train_fn2())
                optimizer_status.run_restore_ops()
                status.initialize_or_restore()
                init_only_optimizer_status.initialize_or_restore()
                train_fn2()
                self.assertEqual(42., self.evaluate(optimizer.beta_1))
Ejemplo n.º 27
0
    def testNamingWithOptimizer(self):
        input_value = constant_op.constant([[3.]])
        model = MyModel()
        # A nuisance Model using the same optimizer. Its slot variables should not
        # go in the checkpoint, since it is never depended on.
        other_model = MyModel()
        optimizer = adam.Adam(0.001)
        step = training_util.get_or_create_global_step()
        root_trackable = trackable_utils.Checkpoint(optimizer=optimizer,
                                                    model=model,
                                                    step=step)

        with backprop.GradientTape() as tape:
            loss = model(input_value)
        variables = model.trainable_variables
        gradients = tape.gradient(loss, variables)
        train_op = control_flow_ops.group(
            optimizer.apply_gradients(zip(gradients, variables)),
            step.assign_add(1))

        with backprop.GradientTape() as tape:
            loss = other_model(input_value)
        variables = other_model.trainable_variables
        gradients = tape.gradient(loss, variables)
        optimizer.apply_gradients(zip(gradients, variables))

        self.evaluate(trackable_utils.gather_initializers(root_trackable))
        self.evaluate(train_op)
        named_variables, serialized_graph, _ = graph_view.ObjectGraphView(
            root_trackable).serialize_object_graph()
        expected_slot_keys = (
            "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m",
            "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v",
            "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m",
            "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v",
            "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m",
            "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v",
        )
        expected_checkpoint_names = (
            # Created in the root node, so no prefix.
            "step",
            "model/_second/kernel",
            "model/_named_dense/kernel",
            "model/_named_dense/bias",
            # non-Layer dependency of the model
            "model/_non_layer/a_variable",
            "optimizer/learning_rate",
            "optimizer/beta_1",
            "optimizer/beta_2",
            "optimizer/iter",
            "optimizer/decay",
        ) + expected_slot_keys
        suffix = "/.ATTRIBUTES/VARIABLE_VALUE"
        expected_checkpoint_names = [
            name + suffix for name in expected_checkpoint_names
        ]
        named_variables = {v.name: v for v in named_variables}
        six.assertCountEqual(self, expected_checkpoint_names,
                             named_variables.keys())
        # Check that we've mapped to the right variable objects (not exhaustive)
        self.assertEqual("global_step",
                         named_variables["step" + suffix].full_name)
        self.assertEqual(
            "my_model/dense_1/kernel",
            named_variables["model/_second/kernel" + suffix].full_name)
        self.assertEqual(
            "my_model/dense/kernel",
            named_variables["model/_named_dense/kernel" + suffix].full_name)
        self.assertEqual(
            "Adam/beta_1",
            named_variables["optimizer/beta_1" + suffix].full_name)
        self.assertEqual(
            "Adam/beta_2",
            named_variables["optimizer/beta_2" + suffix].full_name)
        # Spot check the generated protocol buffers.
        self.assertEqual("optimizer",
                         serialized_graph.nodes[0].children[1].local_name)
        optimizer_node = serialized_graph.nodes[
            serialized_graph.nodes[0].children[1].node_id]
        children = [node.local_name for node in optimizer_node.children]
        six.assertCountEqual(
            self,
            # hyper variable dependencies
            ["beta_1", "beta_2", "iter", "decay", "learning_rate"],
            children)
        serialized_slot_keys = []
        for slot in optimizer_node.slot_variables:
            for attribute in (serialized_graph.nodes[
                    slot.slot_variable_node_id].attributes):
                serialized_slot_keys.append(attribute.checkpoint_key)
        six.assertCountEqual(self,
                             [key + suffix for key in expected_slot_keys],
                             serialized_slot_keys)
    def test_train_premade_widedeep_model_with_feature_layers(self):
        vocab_list = ['alpha', 'beta', 'gamma']
        vocab_val = [0.4, 0.6, 0.9]
        data = np.random.choice(vocab_list, size=256)
        y = np.zeros_like(data, dtype=np.float32)
        for vocab, val in zip(vocab_list, vocab_val):
            indices = np.where(data == vocab)
            y[indices] = val + np.random.uniform(
                low=-0.01, high=0.01, size=indices[0].shape)
        cat_column = tf.feature_column.categorical_column_with_vocabulary_list(
            key='symbol', vocabulary_list=vocab_list)
        ind_column = tf.feature_column.indicator_column(cat_column)
        # TODO(tanzheny): use emb column for dense part once b/139667019 is fixed.
        # emb_column = feature_column.embedding_column(cat_column, dimension=5)
        keras_input = keras.layers.Input(name='symbol',
                                         shape=3,
                                         dtype=tf.dtypes.string)

        # build linear part with feature layer.
        linear_feature_layer = tf.compat.v1.keras.layers.DenseFeatures(
            [ind_column])
        linear_model = linear.LinearModel(units=1,
                                          name='Linear',
                                          kernel_initializer='zeros')
        combined_linear = keras.Sequential(
            [linear_feature_layer, linear_model])

        # build dnn part with feature layer.
        dnn_feature_layer = tf.compat.v1.keras.layers.DenseFeatures(
            [ind_column])
        dense_layer = keras.layers.Dense(units=1,
                                         name='DNNDense',
                                         kernel_initializer='zeros')
        combined_dnn = keras.Sequential([dnn_feature_layer, dense_layer])

        # build and compile wide deep.
        wide_deep_model = wide_deep.WideDeepModel(combined_linear,
                                                  combined_dnn)
        wide_deep_model._set_inputs({'symbol': keras_input})
        sgd_opt = gradient_descent.SGD(0.1)
        adam_opt = adam.Adam(0.1)
        wide_deep_model.compile([sgd_opt, adam_opt], 'mse', ['mse'])

        # build estimator.
        train_input_fn = numpy_io.numpy_input_fn(x={'symbol': data},
                                                 y=y,
                                                 num_epochs=20,
                                                 shuffle=False)
        eval_input_fn = numpy_io.numpy_input_fn(x={'symbol': data},
                                                y=y,
                                                num_epochs=20,
                                                shuffle=False)
        est = keras_lib.model_to_estimator(keras_model=wide_deep_model,
                                           config=self._config,
                                           checkpoint_format='saver')

        before_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1)
        est.train(input_fn=train_input_fn, steps=20)
        after_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1)
        self.assertLess(after_eval_results['loss'],
                        before_eval_results['loss'])
        self.assertLess(after_eval_results['loss'], 0.1)
adagrad_optimizer_v1_fn = combinations.NamedObject(
    "AdagradV1", lambda: adagrad.AdagradOptimizer(0.001))
adam_optimizer_v1_fn = combinations.NamedObject(
    "AdamV1", lambda: adam.AdamOptimizer(0.001, epsilon=1))
rmsprop_optimizer_v1_fn = combinations.NamedObject(
    "RmsPropV1", lambda: rmsprop.RMSPropOptimizer(0.001))

# TODO(shiningsun): consider adding the other v1 optimizers
optimizers_v1 = [gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn]

adadelta_optimizer_keras_v2_fn = combinations.NamedObject(
    "AdadeltaKerasV2", lambda: adadelta_keras_v2.Adadelta(0.001))
adagrad_optimizer_keras_v2_fn = combinations.NamedObject(
    "AdagradKerasV2", lambda: adagrad_keras_v2.Adagrad(0.001))
adam_optimizer_keras_v2_fn = combinations.NamedObject(
    "AdamKerasV2", lambda: adam_keras_v2.Adam(0.001, epsilon=1.0))
adamax_optimizer_keras_v2_fn = combinations.NamedObject(
    "AdamaxKerasV2", lambda: adamax_keras_v2.Adamax(0.001, epsilon=1.0))
nadam_optimizer_keras_v2_fn = combinations.NamedObject(
    "NadamKerasV2", lambda: nadam_keras_v2.Nadam(0.001, epsilon=1.0))
ftrl_optimizer_keras_v2_fn = combinations.NamedObject(
    "FtrlKerasV2", lambda: ftrl_keras_v2.Ftrl(0.001))
gradient_descent_optimizer_keras_v2_fn = combinations.NamedObject(
    "GradientDescentKerasV2", lambda: gradient_descent_keras_v2.SGD(0.2))
rmsprop_optimizer_keras_v2_fn = combinations.NamedObject(
    "RmsPropKerasV2", lambda: rmsprop_keras_v2.RMSprop(0.001))

# TODO(shiningsun): consider adding the other v2 optimizers
optimizers_v2 = [
    gradient_descent_optimizer_keras_v2_fn, adagrad_optimizer_keras_v2_fn
]
Ejemplo n.º 30
0
 def testSaveRestore(self):
     with self.test_session():
         model = MyModel()
         optimizer = adam.Adam(0.001)
         root_trackable = trackable_utils.Checkpoint(optimizer=optimizer,
                                                     model=model)
         input_value = constant_op.constant([[3.]])
         with backprop.GradientTape() as tape:
             loss = model(input_value)
         variables = model.trainable_variables
         gradients = tape.gradient(loss, variables)
         train_op = optimizer.apply_gradients(zip(gradients, variables))
         self.assertFalse(root_trackable.save_counter.trainable)
         self.evaluate(trackable_utils.gather_initializers(root_trackable))
         self.evaluate(train_op)
         prefix = os.path.join(self.get_temp_dir(), "ckpt")
         self.evaluate(
             state_ops.assign(model._named_dense.variables[1], [42.]))
         m_bias_slot = optimizer.get_slot(model._named_dense.variables[1],
                                          "m")
         self.evaluate(state_ops.assign(m_bias_slot, [1.5]))
         save_path = root_trackable.save(file_prefix=prefix)
         self.evaluate(
             state_ops.assign(model._named_dense.variables[1], [43.]))
         self.evaluate(state_ops.assign(root_trackable.save_counter, 3))
         optimizer_variables = self.evaluate(
             sorted(optimizer.variables(), key=lambda v: v.name))
         self.evaluate(state_ops.assign(m_bias_slot, [-2.]))
         # Immediate restoration
         status = root_trackable.restore(
             save_path=save_path).assert_consumed()
         status.run_restore_ops()
         self.assertAllEqual([42.],
                             self.evaluate(model._named_dense.variables[1]))
         self.assertAllEqual(1, self.evaluate(root_trackable.save_counter))
         self.assertAllEqual([1.5], self.evaluate(m_bias_slot))
         if not context.executing_eagerly():
             return  # Restore-on-create is only supported when executing eagerly
         on_create_model = MyModel()
         on_create_optimizer = adam.Adam(0.001)
         on_create_root = trackable_utils.Checkpoint(
             optimizer=on_create_optimizer, model=on_create_model)
         # Deferred restoration
         status = on_create_root.restore(save_path=save_path)
         status.assert_nontrivial_match()
         status.assert_existing_objects_matched()
         with self.assertRaises(AssertionError):
             status.assert_consumed()
         on_create_model(constant_op.constant([[3.]]))  # create variables
         self.assertAllEqual(1, self.evaluate(on_create_root.save_counter))
         self.assertAllEqual([42.],
                             self.evaluate(
                                 on_create_model._named_dense.variables[1]))
         on_create_m_bias_slot = on_create_optimizer.get_slot(
             on_create_model._named_dense.variables[1], "m")
         status.assert_existing_objects_matched()
         if not context.executing_eagerly():
             with self.assertRaises(AssertionError):
                 status.assert_consumed()
         # Optimizer slot variables are created when the original variable is
         # restored.
         self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot))
         dummy_var = variables_lib.Variable([1.])
         on_create_optimizer.minimize(loss=dummy_var.read_value,
                                      var_list=[dummy_var])
         status.assert_existing_objects_matched()
         status.assert_consumed()
         self.assertAllEqual(
             optimizer_variables,
             # Creation order is different, so .variables() needs to be re-sorted.
             self.evaluate(
                 sorted(optimizer.variables(), key=lambda v: v.name)))