예제 #1
0
  def on_epoch_end(self, epoch, logs=None):
    logs = logs or {}
    logs['lr'] = K.get_value(self.model.optimizer.lr)
    current = logs.get(self.monitor)
    if current is None:
      logging.warning('Reduce LR on plateau conditioned on metric `%s` '
                      'which is not available. Available metrics are: %s',
                      self.monitor, ','.join(list(logs.keys())))

    else:
      if self.in_cooldown():
        self.cooldown_counter -= 1
        self.wait = 0

      if self.monitor_op(current, self.best):
        self.best = current
        self.wait = 0
      elif not self.in_cooldown():
        self.wait += 1
        if self.wait >= self.patience:
          old_lr = float(K.get_value(self.model.optimizer.lr))
          if old_lr > self.min_lr:
            new_lr = old_lr * self.factor
            new_lr = max(new_lr, self.min_lr)
            K.set_value(self.model.optimizer.lr, new_lr)
            if self.verbose > 0:
              print('\nEpoch %05d: ReduceLROnPlateau reducing learning '
                    'rate to %s.' % (epoch + 1, new_lr))
            self.cooldown_counter = self.cooldown
            self.wait = 0
예제 #2
0
 def get_config(self):
   config = {
       'lr': float(K.get_value(self.lr)),
       'decay': float(K.get_value(self.decay)),
       'epsilon': self.epsilon
   }
   base_config = super(Adagrad, self).get_config()
   return dict(list(base_config.items()) + list(config.items()))
예제 #3
0
 def get_config(self):
   config = {
       'lr': float(K.get_value(self.lr)),
       'momentum': float(K.get_value(self.momentum)),
       'decay': float(K.get_value(self.decay)),
       'nesterov': self.nesterov
   }
   base_config = super(SGD, self).get_config()
   return dict(list(base_config.items()) + list(config.items()))
예제 #4
0
 def get_config(self):
   config = {
       'lr': float(K.get_value(self.lr)),
       'beta_1': float(K.get_value(self.beta_1)),
       'beta_2': float(K.get_value(self.beta_2)),
       'epsilon': self.epsilon,
       'schedule_decay': self.schedule_decay
   }
   base_config = super(Nadam, self).get_config()
   return dict(list(base_config.items()) + list(config.items()))
예제 #5
0
 def get_config(self):
   config = {
       'lr': float(K.get_value(self.lr)),
       'beta_1': float(K.get_value(self.beta_1)),
       'beta_2': float(K.get_value(self.beta_2)),
       'decay': float(K.get_value(self.decay)),
       'epsilon': self.epsilon,
       'amsgrad': self.amsgrad
   }
   base_config = super(Adam, self).get_config()
   return dict(list(base_config.items()) + list(config.items()))
def show_batch_normalization_layer(layer):
    """Serialize batch normalization layer to dict"""
    moving_mean = K.get_value(layer.moving_mean)
    moving_variance = K.get_value(layer.moving_variance)
    result = {}
    result['moving_mean'] = encode_floats(moving_mean)
    result['moving_variance'] = encode_floats(moving_variance)
    if layer.center:
        beta = K.get_value(layer.beta)
        result['beta'] = encode_floats(beta)
    if layer.scale:
        gamma = K.get_value(layer.gamma)
        result['gamma'] = encode_floats(gamma)
    return result
예제 #7
0
  def on_epoch_begin(self, epoch, logs=None):
    # TODO(yashkatariya): Change the property checking when the learning
    # rate attribute is unified across all TF Optimizers.
    if isinstance(self.model.optimizer, optimizers.TFOptimizer):
      if not hasattr(self.model.optimizer.optimizer, '_lr') and not hasattr(
          self.model.optimizer.optimizer, '_learning_rate'):
        raise ValueError(
            'TF Optimizer must have a "_lr" or "_learning_rate" attribute.')
      else:
        opt = self.model.optimizer.optimizer
        if hasattr(opt, '_lr'):
          opt_lr = Variable(opt._lr)  # pylint: disable=protected-access
        elif hasattr(opt, '_learning_rate'):
          opt_lr = Variable(opt._learning_rate)  # pylint: disable=protected-access
    else:
      if not hasattr(self.model.optimizer, 'lr'):
        raise ValueError('Optimizer must have a "lr" attribute.')
      else:
        opt = self.model.optimizer
        opt_lr = opt.lr

    try:  # new API
      lr = float(K.get_value(opt_lr))
      lr = self.schedule(epoch, lr)
    except TypeError:  # Support for old API for backward compatibility
      lr = self.schedule(epoch)
    if not isinstance(lr, (float, np.float32, np.float64)):
      raise ValueError('The output of the "schedule" function '
                       'should be float.')
    K.set_value(opt_lr, lr)
    if self.verbose > 0:
      print('\nEpoch %05d: LearningRateScheduler reducing learning '
            'rate to %s.' % (epoch + 1, lr))
예제 #8
0
 def _serialize_hyperparameter(self, hyperparameter_name):
   """Serialize a hyperparameter that can be a float, callable, or Tensor."""
   value = self._get_hyper(hyperparameter_name)
   if callable(value):
     return value()
   if isinstance(value, (ops.Tensor, tf_variables.Variable)):
     return backend.get_value(value)
   return value
예제 #9
0
  def testOptimizerWithCallbacks(self):
    np.random.seed(1331)
    input_np = np.random.random((10, 3))
    output_np = np.random.random((10, 4))
    a = input_layer.Input(shape=(3,), name='input_a')
    model = sequential.Sequential()
    model.add(core.Dense(4, name='dense'))
    model.add(core.Dropout(0.5, name='dropout'))
    model(a)
    optimizer = gradient_descent.SGD(learning_rate=0.1)
    model.compile(optimizer, loss='mse', metrics=['mae'])
    # This does not reduce the LR after the first epoch (due to low delta).
    cbks = [
        callbacks.ReduceLROnPlateau(
            monitor='val_loss', factor=0.1, min_delta=0, patience=1, cooldown=5)
    ]
    model.fit(
        input_np,
        output_np,
        batch_size=10,
        validation_data=(input_np, output_np),
        callbacks=cbks,
        epochs=2,
        verbose=0)
    self.assertAllClose(
        float(backend.get_value(model.optimizer.lr)), 0.1, atol=1e-4)

    # This should reduce the LR after the first epoch (due to high delta).
    cbks = [
        callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.1,
            min_delta=10,
            patience=1,
            cooldown=5)
    ]
    model.fit(
        input_np,
        output_np,
        batch_size=10,
        validation_data=(input_np, output_np),
        callbacks=cbks,
        epochs=2,
        verbose=2)
    self.assertAllClose(
        float(backend.get_value(model.optimizer.lr)), 0.01, atol=1e-4)
예제 #10
0
 def _serialize_hyperparameter(self, hyperparameter_name):
   """Serialize a hyperparameter that can be a float, callable, or Tensor."""
   value = self._hyper[hyperparameter_name]
   if isinstance(value, learning_rate_schedule.LearningRateSchedule):
     return learning_rate_schedule.serialize(value)
   if callable(value):
     return value()
   if tensor_util.is_tensor(value):
     return backend.get_value(value)
   return value
예제 #11
0
 def _serialize_hyperparameter(self, hyperparameter_name):
   """Serialize a hyperparameter that can be a float, callable, or Tensor."""
   value = self._hyper[hyperparameter_name]
   if isinstance(value, learning_rate_schedule.LearningRateSchedule):
     return learning_rate_schedule.serialize(value)
   if callable(value):
     return value()
   if isinstance(value, (ops.Tensor, tf_variables.Variable,
                         distributed_values.TPUMirroredVariable)):
     return backend.get_value(value)
   return value
예제 #12
0
  def test_save_weights_with_autocast_vars(self, strategy_fn, h5=False):
    with strategy_fn().scope():
      with policy.policy_scope('infer_float32_vars'):
        x = layers.Input(shape=(1,), batch_size=2, dtype=dtypes.float16)
        layer = AddLayer(assert_type=dtypes.float16)
        y = layer(x)
        y = math_ops.cast(y, dtypes.float32)
        model = models.Model(inputs=x, outputs=y)

    model.set_weights([np.array(100.)])
    x = np.ones((2, 1), dtype=np.float16)
    self.assertAllClose(backend.get_value(model(x)), x + 100.)
    suffix = '.h5' if h5 else ''
    weights_file = os.path.join(self.get_temp_dir(), 'weights' + suffix)
    model.save_weights(weights_file)

    model.set_weights([np.array(200.)])
    self.assertAllClose(backend.get_value(model(x)), x + 200.)
    model.load_weights(weights_file)
    self.assertAllClose(backend.get_value(model(x)), x + 100.)
    self.assertEqual(model.get_weights(), [np.array(100.)])
예제 #13
0
 def on_epoch_begin(self, epoch, logs=None):
   if not hasattr(self.model.optimizer, 'lr'):
     raise ValueError('Optimizer must have a "lr" attribute.')
   try:  # new API
     lr = float(K.get_value(self.model.optimizer.lr))
     lr = self.schedule(epoch, lr)
   except TypeError:  # Support for old API for backward compatibility
     lr = self.schedule(epoch)
   if not isinstance(lr, (float, np.float32, np.float64)):
     raise ValueError('The output of the "schedule" function '
                      'should be float.')
   K.set_value(self.model.optimizer.lr, lr)
   if self.verbose > 0:
     print('\nEpoch %05d: LearningRateScheduler reducing learning '
           'rate to %s.' % (epoch + 1, lr))
예제 #14
0
def convert_all_kernels_in_model(model):
  """Converts all convolution kernels in a model from Theano to TensorFlow.

  Also works from TensorFlow to Theano.

  Arguments:
      model: target model for the conversion.
  """
  # Note: SeparableConvolution not included
  # since only supported by TF.
  conv_classes = {
      'Conv1D',
      'Conv2D',
      'Conv3D',
      'Conv2DTranspose',
  }
  to_assign = []
  for layer in model.layers:
    if layer.__class__.__name__ in conv_classes:
      original_kernel = K.get_value(layer.kernel)
      converted_kernel = convert_kernel(original_kernel)
      to_assign.append((layer.kernel, converted_kernel))
  K.batch_set_value(to_assign)
  def apply_mask(self, prediction_result):
    """Removes prediction output that corresponds to padded input."""
    padding_mask = K.get_value(self.padding_mask)
    assert len(padding_mask.shape) == 1

    if len(self.output_shape) == 1:
      prediction = np.take(prediction_result,
                           np.nonzero(
                               padding_mask[:len(prediction_result)]),
                           axis=0)
      if prediction.shape[0] == 1:
        prediction = np.squeeze(prediction, axis=0)
      return prediction

    else:
      predictions = []
      for i in range(len(self.output_shape)):
        prediction = prediction_result[i]
        prediction = np.take(prediction, np.nonzero(
            padding_mask[:len(prediction)]), axis=0)
        predictions.append(np.squeeze(prediction))

      return predictions
예제 #16
0
  def test_save_weights_with_dynamic_loss_scaling(self, strategy_fn):
    with context.eager_mode():
      strategy = strategy_fn()
      if (isinstance(strategy, mirrored_strategy.MirroredStrategy) and
          not context.executing_eagerly()):
        # TODO(b/121381184): Enable running the test in this case.
        return

      # Create and run model.
      with strategy.scope():
        x = layers.Input(shape=(2,), batch_size=2, dtype=dtypes.float32)
        y = AddLayer(assert_type=dtypes.float32)(x)
        model = models.Model(inputs=x, outputs=y)

        loss_scale = loss_scale_module.DynamicLossScale(
            initial_loss_scale=1., increment_period=2., multiplier=2.)
        opt = gradient_descent.SGD(1.)
        opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
        model.compile(optimizer=opt, loss='mse')
      # Run for 3 steps (6 examples with a batch size of 2)
      model.fit(np.zeros((6, 2)), np.zeros((6, 2)), batch_size=2)
      self.assertEqual(backend.get_value(loss_scale()), 2)
      self.assertEqual(backend.get_value(loss_scale._num_good_steps), 1)

      # Save model weights.
      save_prefix = os.path.join(self.get_temp_dir(), 'ckpt')
      model.save_weights(save_prefix)

      # Run model again for 1 step (2 examples with a batch size of 2)
      model.fit(np.zeros((2, 2)), np.zeros((2, 2)), batch_size=2)
      self.assertEqual(backend.get_value(loss_scale()), 4)
      self.assertEqual(backend.get_value(loss_scale._num_good_steps), 0)

      # Load model weights and ensure loss scale weights are restored.
      model.load_weights(save_prefix)
      self.assertEqual(backend.get_value(loss_scale()), 2)
      self.assertEqual(backend.get_value(loss_scale._num_good_steps), 1)
예제 #17
0
def create_cnn_model(weights_path=None):
    # creates our cnn model
    #filters which total weights is “n*m*k*l” (Here the input has l=32 feature maps as inputs, k=64 feature maps as outputs)
    #Then there is a term called bias for each feature map. So, the total number of parameters are “(n*m*l+1)*k”.
    '''
    PARAMETERS
    https://towardsdatascience.com/understanding-and-calculating-the-number-of-parameters-in-convolution-neural-networks-cnns-fc88790d530d
    https://medium.com/@shashikachamod4u/calculate-output-size-and-number-of-trainable-parameters-in-a-convolution-layer-1d64cae6c009
    https://medium.com/@iamvarman/how-to-calculate-the-number-of-parameters-in-the-cnn-5bd55364d7ca
    https://cs231n.github.io/convolutional-networks/
    '''

    input = Input(shape=(1, IMG_WIDTH, IMG_HEIGHT))
    input_pad = ZeroPadding2D(padding=(3, 3))(input)

    conv1_1_3x3_s1 = Conv2D(32, (3, 3),
                            strides=(1, 1),
                            padding='same',
                            activation='relu',
                            name='conv1_1/3x3_s1',
                            kernel_regularizer=l2(l2_regulizer))(input_pad)
    conv1_2_3x3_s1 = Conv2D(
        32, (3, 3),
        strides=(1, 1),
        padding='same',
        activation='relu',
        name='conv1_2/3x3_s1',
        kernel_regularizer=l2(l2_regulizer))(conv1_1_3x3_s1)
    conv1_zero_pad = ZeroPadding2D(padding=(1, 1))(conv1_2_3x3_s1)
    pool1_helper = PoolHelper()(conv1_zero_pad)
    pool1_2_2x2_s1 = MaxPooling2D(pool_size=(2, 2),
                                  strides=(1, 1),
                                  padding='same',
                                  name='pool1/2x2_s1')(pool1_helper)
    pool1_norm1 = LRN(name='pool1/norm1')(pool1_2_2x2_s1)

    conv2_1_3x3_reduce = Conv2D(
        64, (1, 1),
        padding='same',
        activation='relu',
        name='conv2_1/3x3_reduce',
        kernel_regularizer=l2(l2_regulizer))(pool1_norm1)
    conv2_2_3x3 = Conv2D(
        64, (3, 3),
        padding='same',
        activation='relu',
        name='conv2_2/3x3',
        kernel_regularizer=l2(l2_regulizer))(conv2_1_3x3_reduce)
    conv2_norm2 = LRN(name='conv2/norm2')(conv2_2_3x3)
    conv2_zero_pad = ZeroPadding2D(padding=(1, 1))(conv2_norm2)
    pool2_helper = PoolHelper()(conv2_zero_pad)
    pool2_3x3_s2 = MaxPooling2D(pool_size=(3, 3),
                                strides=(2, 2),
                                padding='same',
                                name='pool2/3x3_s2')(pool2_helper)

    conv3_1_3x3_s1 = Conv2D(128, (3, 3),
                            strides=(1, 1),
                            padding='same',
                            activation='relu',
                            name='conv3_1/3x3_s1',
                            kernel_regularizer=l2(l2_regulizer))(pool2_3x3_s2)
    conv3_2_3x3_s1 = Conv2D(
        128, (3, 3),
        strides=(1, 1),
        padding='same',
        activation='relu',
        name='conv3_2/3x3_s1',
        kernel_regularizer=l2(l2_regulizer))(conv3_1_3x3_s1)
    conv3_zero_pad = ZeroPadding2D(padding=(1, 1))(conv3_2_3x3_s1)
    pool3_helper = PoolHelper()(conv3_zero_pad)
    pool3_2_2x2_s1 = MaxPooling2D(pool_size=(2, 2),
                                  strides=(1, 1),
                                  padding='same',
                                  name='pool3/2x2_s1')(pool3_helper)
    pool3_norm1 = LRN(name='pool3/norm1')(pool3_2_2x2_s1)

    conv4_1_3x3_reduce = Conv2D(
        256, (1, 1),
        padding='same',
        activation='relu',
        name='conv4_1/3x3_reduce',
        kernel_regularizer=l2(l2_regulizer))(pool3_norm1)
    conv4_2_3x3 = Conv2D(
        256, (3, 3),
        padding='same',
        activation='relu',
        name='conv4_2/3x3',
        kernel_regularizer=l2(l2_regulizer))(conv4_1_3x3_reduce)
    conv4_norm2 = LRN(name='conv4/norm2')(conv4_2_3x3)
    conv4_zero_pad = ZeroPadding2D(padding=(1, 1))(conv4_norm2)
    pool4_helper = PoolHelper()(conv4_zero_pad)
    pool4_3x3_s2 = MaxPooling2D(pool_size=(3, 3),
                                strides=(2, 2),
                                padding='same',
                                name='pool4/3x3_s2')(pool4_helper)

    conv5_1_3x3_s1 = Conv2D(512, (3, 3),
                            strides=(1, 1),
                            padding='same',
                            activation='relu',
                            name='conv5_1/3x3_s1',
                            kernel_regularizer=l2(l2_regulizer))(pool4_3x3_s2)
    conv5_2_3x3_s1 = Conv2D(
        512, (3, 3),
        strides=(1, 1),
        padding='same',
        activation='relu',
        name='conv5_2/3x3_s1',
        kernel_regularizer=l2(l2_regulizer))(conv5_1_3x3_s1)
    conv5_zero_pad = ZeroPadding2D(padding=(1, 1))(conv5_2_3x3_s1)
    pool5_helper = PoolHelper()(conv5_zero_pad)
    pool5_2_2x2_s1 = MaxPooling2D(pool_size=(2, 2),
                                  strides=(1, 1),
                                  padding='same',
                                  name='pool5/2x2_s1')(pool5_helper)
    pool5_norm1 = LRN(name='pool5/norm1')(pool5_2_2x2_s1)

    conv6_1_3x3_reduce = Conv2D(
        1024, (1, 1),
        padding='same',
        activation='relu',
        name='conv6_1/3x3_reduce',
        kernel_regularizer=l2(l2_regulizer))(pool5_norm1)
    conv6_2_3x3 = Conv2D(
        1024, (3, 3),
        padding='same',
        activation='relu',
        name='conv6_2/3x3',
        kernel_regularizer=l2(l2_regulizer))(conv6_1_3x3_reduce)
    conv6_norm2 = LRN(name='conv6/norm2')(conv6_2_3x3)
    conv6_zero_pad = ZeroPadding2D(padding=(1, 1))(conv6_norm2)
    pool6_helper = PoolHelper()(conv6_zero_pad)
    pool6_3x3_s2 = MaxPooling2D(pool_size=(3, 3),
                                strides=(2, 2),
                                padding='same',
                                name='pool6/3x3_s2')(pool6_helper)

    pool7_2x2_s1 = AveragePooling2D(pool_size=(2, 2),
                                    strides=(1, 1),
                                    name='pool7/2x2_s1')(pool6_3x3_s2)

    loss_flat = Flatten()(pool7_2x2_s1)
    pool7_drop_2x2_s1 = Dropout(rate=0.5)(loss_flat)
    loss_classifier = Dense(
        num_classes,
        name='loss3/classifier',
        kernel_regularizer=l2(l2_regulizer))(pool7_drop_2x2_s1)
    loss_classifier_act = Activation('softmax', name='prob')(loss_classifier)

    mynet = Model(inputs=input, outputs=[loss_classifier_act])

    if weights_path:
        mynet.load_weights(weights_path)

    if keras.backend.backend() == 'tensorflow':
        # convert the convolutional kernels for tensorflow
        ops = []
        for layer in mynet.layers:
            if layer.__class__.__name__ == 'Conv2D':
                original_w = K.get_value(layer.kernel)
                converted_w = convert_kernel(original_w)
                ops.append(tf.assign(layer.kernel, converted_w).op)
        K.get_session().run(ops)

    return mynet
예제 #18
0
def iterator_predict_loop(model, inputs, steps, verbose=0):
  """Predict function for eager execution when input is dataset iterator.

  Arguments:
      model: Instance of `Model`.
      inputs: Input dataset iterator.
      steps: Total number of steps (batches of samples) before declaring
          `_predict_loop` finished.
      verbose: Verbosity mode.

  Returns:
      Array of predictions (if the model has a single output)
      or list of arrays of predictions (if the model has multiple outputs).

  Raises:
      ValueError: In case of mismatch between given number of inputs and
        expectations of the model.
  """
  assert isinstance(inputs, iterator_ops.EagerIterator)
  outs = []
  if verbose == 1:
    progbar = generic_utils.Progbar(target=steps)
  for step_index in range(steps):
    # Get data from the iterator.
    try:
      next_element = inputs.get_next()
    except errors.OutOfRangeError:
      logging.warning(
          'Your dataset iterator ran out of data; '
          'interrupting prediction. Make sure that your '
          'dataset can generate at least `steps` '
          'batches (in this case, %d batches).', steps)
      break

    if not isinstance(next_element, (list, tuple)) or len(next_element) != 2:
      raise ValueError(
          'Please provide data as a list or tuple of 2 elements '
          ' - input and target pair. Received %s. We do not use the '
          '`target` value here.' % next_element)
    x, _ = next_element

    # Validate and standardize data.
    x, _, _ = model._standardize_user_data(x)

    if model._expects_training_arg:
      batch_outs = model.call(x[0] if len(x) == 1 else x, training=False)
    else:
      batch_outs = model.call(x[0] if len(x) == 1 else x)
    if not isinstance(batch_outs, list):
      batch_outs = [batch_outs]

    # We collect the results from every step and then concatenate them once
    # in the end. This is an expensive process. We are doing this because we
    # do not know the number of samples beforehand.
    if step_index == 0:
      for _ in batch_outs:
        outs.append([])
    for i, batch_out in enumerate(batch_outs):
      outs[i].append(backend.get_value(batch_out))

    if verbose == 1:
      progbar.update(step_index + 1)
  for i, out in enumerate(outs):
    outs[i] = np.concatenate(tuple(out), axis=0)
  if len(outs) == 1:
    return outs[0]
  return outs
예제 #19
0
def iterator_predict_loop(model, inputs, steps, verbose=0):
    """Predict function for eager execution when input is dataset iterator.

  Arguments:
      model: Instance of `Model`.
      inputs: Input dataset iterator.
      steps: Total number of steps (batches of samples) before declaring
          `_predict_loop` finished.
      verbose: Verbosity mode.

  Returns:
      Array of predictions (if the model has a single output)
      or list of arrays of predictions (if the model has multiple outputs).

  Raises:
      ValueError: In case of mismatch between given number of inputs and
        expectations of the model.
  """
    assert isinstance(inputs, iterator_ops.EagerIterator)
    if not isinstance(inputs.output_shapes,
                      (list, tuple)) or len(inputs.output_shapes) > 2:
        raise ValueError(
            'Please provide data as a list or tuple of 1 or 2 elements '
            ' - input or input and target pair. Received %s. We do not use the '
            '`target` value here.' % inputs.output_shapes)
    outs = []
    if verbose == 1:
        progbar = generic_utils.Progbar(target=steps)
    for step_index in range(steps):
        # Get data from the iterator.
        try:
            next_element = inputs.get_next()
        except errors.OutOfRangeError:
            logging.warning(
                'Your dataset iterator ran out of data; '
                'interrupting prediction. Make sure that your '
                'dataset can generate at least `steps` '
                'batches (in this case, %d batches).', steps)
            break

        # expects a tuple, where first element of tuple represents inputs
        x = next_element[0]

        # Validate and standardize data.
        x, _, _ = model._standardize_user_data(x)
        x = training_utils.cast_if_floating_dtype(x)

        if model._expects_training_arg:
            batch_outs = model.call(x[0] if len(x) == 1 else x, training=False)
        else:
            batch_outs = model.call(x[0] if len(x) == 1 else x)
        if not isinstance(batch_outs, list):
            batch_outs = [batch_outs]

        # We collect the results from every step and then concatenate them once
        # in the end. This is an expensive process. We are doing this because we
        # do not know the number of samples beforehand.
        if step_index == 0:
            for _ in batch_outs:
                outs.append([])
        for i, batch_out in enumerate(batch_outs):
            outs[i].append(backend.get_value(batch_out))

        if verbose == 1:
            progbar.update(step_index + 1)
    for i, out in enumerate(outs):
        outs[i] = np.concatenate(tuple(out), axis=0)
    if len(outs) == 1:
        return outs[0]
    return outs
예제 #20
0
    def test_dynamic_loss_scaling(self, strategy_fn, cloning=True):
        strategy = strategy_fn()
        initial_loss_scale = 2.
        batch_size = 4
        expected_gradient = backend.variable([initial_loss_scale / batch_size],
                                             dtype=dtypes.float16)
        # If this variable is set to True, the model below will have NaN gradients
        have_nan_gradients = backend.variable(False, dtype=dtypes.bool)
        with strategy.scope():
            with policy.policy_scope(policy.Policy('infer_float32_vars')):
                x = layers.Input(shape=(1, ),
                                 batch_size=batch_size,
                                 dtype=dtypes.float16)
                layer = AddLayer(assert_type=dtypes.float16)
                y = layer(x)
                identity_with_nan_grads = (
                    mp_test_util.create_identity_with_nan_gradients_fn(
                        have_nan_gradients))
                y = core.Lambda(identity_with_nan_grads)(y)
                identity_with_grad_check_fn = (
                    mp_test_util.create_identity_with_grad_check_fn(
                        expected_dtype=dtypes.float16,
                        expected_gradient=expected_gradient))
                y = core.Lambda(identity_with_grad_check_fn)(y)
                y = math_ops.cast(y, dtypes.float32)
                model = models.Model(inputs=x, outputs=y)

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                opt = gradient_descent.SGD(1.)
                loss_scale = loss_scale_module.DynamicLossScale(
                    initial_loss_scale=initial_loss_scale, increment_period=2)
                opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
                model.compile(opt, loss=loss_fn, cloning=cloning)

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices(
            (x, y)).batch(batch_size)
        model.fit(dataset)
        # The variables starts with 1 and has a gradient of 1, so will go down by 1
        # each step.
        self.assertEqual(backend.eval(layer.v), 0)

        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -1)

        # There have been two steps without NaNs, so the loss scale will double
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient * 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -2)

        # Next test with NaN gradients.
        backend.set_value(have_nan_gradients, True)
        model.fit(dataset)
        # Variable should not be updated
        self.assertEqual(backend.eval(layer.v), -2)

        # Test with finite gradients again
        backend.set_value(have_nan_gradients, False)
        # The loss scale will be halved due to the NaNs, so the gradient will also
        # be halved
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient / 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -3)
예제 #21
0
def train_net(args):
    data_dir = config.dataset_path
    image_size = config.image_shape[0:2]
    assert len(image_size) == 2
    assert image_size[0] == image_size[1]
    print('image_size', image_size)
    print('num_classes', config.num_classes)
    training_path = os.path.join(data_dir, "train.tfrecords")

    print('Called with argument:', args, config)
    train_dataset, batches_per_epoch = data_input.training_dataset(
        training_path, default.per_batch_size)

    extractor, classifier = build_model((image_size[0], image_size[1], 3),
                                        args)

    global_step = 0
    ckpt_path = os.path.join(
        args.models_root, '%s-%s-%s' % (args.network, args.loss, args.dataset),
        'model-{step:04d}.ckpt')
    ckpt_dir = os.path.dirname(ckpt_path)
    print('ckpt_path', ckpt_path)
    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir)
    if len(args.pretrained) == 0:
        latest = tf.train.latest_checkpoint(ckpt_dir)
        if latest:
            global_step = int(latest.split('-')[-1].split('.')[0])
            classifier.load_weights(latest)
    else:
        print('loading', args.pretrained, args.pretrained_epoch)
        load_path = os.path.join(args.pretrained, '-', args.pretrained_epoch,
                                 '.ckpt')
        classifier.load_weights(load_path)

    initial_epoch = global_step // batches_per_epoch
    rest_batches = global_step % batches_per_epoch

    lr_decay_steps = [(int(x), args.lr * np.power(0.1, i + 1))
                      for i, x in enumerate(args.lr_steps.split(','))]
    print('lr_steps', lr_decay_steps)

    valid_datasets = data_input.load_valid_set(data_dir, config.val_targets)

    classifier.compile(
        optimizer=keras.optimizers.SGD(lr=args.lr, momentum=args.mom),
        loss=keras.losses.CategoricalCrossentropy(from_logits=True),
        metrics=[keras.metrics.SparseCategoricalAccuracy()])
    classifier.summary()

    tensor_board = keras.callbacks.TensorBoard(ckpt_dir)
    tensor_board.set_model(classifier)

    train_names = ['train_loss', 'train_acc']
    train_results = []
    highest_score = 0
    for epoch in range(initial_epoch, default.end_epoch):
        for batch in range(rest_batches, batches_per_epoch + 1):
            utils.update_learning_rate(classifier, lr_decay_steps, global_step)
            train_results = classifier.train_on_batch(train_dataset,
                                                      reset_metrics=False)
            global_step += 1
            if global_step % 1000 == 0:
                print('lr-batch-epoch:',
                      float(K.get_value(classifier.optimizer.lr)), batch,
                      epoch)
            if global_step >= 0 and global_step % args.verbose == 0:
                acc_list = []
                for key in valid_datasets:
                    data_set, data_set_flip, is_same_list = valid_datasets[key]
                    embeddings = extractor.predict(data_set)
                    embeddings_flip = extractor.predict(data_set_flip)
                    embeddings_parts = [embeddings, embeddings_flip]
                    x_norm = 0.0
                    x_norm_cnt = 0
                    for part in embeddings_parts:
                        for i in range(part.shape[0]):
                            embedding = part[i]
                            norm = np.linalg.norm(embedding)
                            x_norm += norm
                            x_norm_cnt += 1
                    x_norm /= x_norm_cnt
                    embeddings = embeddings_parts[0] + embeddings_parts[1]
                    embeddings = sklearn.preprocessing.normalize(embeddings)
                    print(embeddings.shape)
                    _, _, accuracy, val, val_std, far = verification.evaluate(
                        embeddings, is_same_list, folds=10)
                    acc, std = np.mean(accuracy), np.std(accuracy)

                    print('[%s][%d]XNorm: %f' % (key, batch, x_norm))
                    print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' %
                          (key, batch, acc, std))
                    acc_list.append(acc)

                if len(acc_list) > 0:
                    score = sum(acc_list)
                    if highest_score == 0:
                        highest_score = score
                    elif highest_score >= score:
                        print('\nStep %05d: score did not improve from %0.5f' %
                              (global_step, highest_score))
                    else:
                        path = ckpt_path.format(step=global_step)
                        print(
                            '\nStep %05d: score improved from %0.5f to %0.5f,'
                            ' saving model to %s' %
                            (global_step, highest_score, score, path))
                        highest_score = score
                        classifier.save_weights(path)

        utils.write_log(tensor_board, train_names, train_results, epoch)
        classifier.reset_metrics()
예제 #22
0
def run_skf_with_training_error(model_mode,
                                loss_mode,
                                fl,
                                fl_store,
                                hparams,
                                skf_file,
                                label_type='cutoff',
                                scoring='mse',
                                skf_sheet=None,
                                te_sheet=None,
                                k_folds=10,
                                k_shuffle=True,
                                save_model=False,
                                save_model_name=None,
                                save_model_dir=None,
                                plot_name=None):
    '''
    Stratified k fold cross validation for training and evaluating model 2 only. Model 1 data is trained before hand.
    :param model_mode: Choose between using SNN or cDNN (non_smiles) and SNN_smiles or cDNN_smiles
    :param cv_mode: Cross validation mode. Either 'skf' or 'loocv'.
    :param hparams: hparams dict containing hyperparameters information
    :param loader_file: data_loader excel file location
    :param skf_file: skf_file name to save excel file as
    :param skf_sheet: name of sheet to save inside the skf_file excel. If None, will default to SNN or cDNN as name
    :param k_folds: Number of k folds. Used only for skf cv_mode
    :param k_shuffle: Whether to shuffle the given examples to split into k folds if using skf
    :return:
    '''
    fn = 6
    numel = 3
    # Run k model instance to perform skf
    predicted_labels_store = []
    mse_store = []
    mse_norm_store = []
    folds = []
    val_idx = []
    val_features_c = []
    val_labels = []
    column_headers = fl.labels_names

    wb = openpyxl.load_workbook(te_sheet)
    msee_store = []
    mre_store = []

    for fold, fl_tuple in enumerate(fl_store):
        instance_start = time.time()
        (ss_fl,
         i_ss_fl) = fl_tuple  # ss_fl is training fl, i_ss_fl is validation fl

        wb.create_sheet('{}'.format(fold))
        ws = wb[wb.sheetnames[-1]]

        # Set up model
        if loss_mode == 'normal':
            sess = tf.compat.v1.Session()
            # sess = tf.Session()
            K.set_session(sess)
            model = MTmodel(fl=ss_fl,
                            mode=model_mode,
                            hparams=hparams,
                            labels_norm=labels_norm)
        elif loss_mode == 'hul':
            model = HULMTmodel(fl=ss_fl,
                               mode=model_mode,
                               hparams=hparams,
                               labels_norm=labels_norm)
            print('HUL Standard Deviation Values:')
            print([
                np.exp(K.get_value(log_var[0]))**0.5
                for log_var in model.model.layers[-1].log_vars
            ])
        elif loss_mode == 'ann':
            sess = tf.compat.v1.Session()
            # sess = tf.Session()
            K.set_session(sess)
            model = Kmodel(fl=ss_fl, mode=model_mode, hparams=hparams)
        elif loss_mode == 'p_model':
            model = Pmodel(fl=ss_fl, mode=model_mode, hparams=hparams)
        elif loss_mode == 'svr':
            if not fl.normalise_labels:
                raise TypeError(
                    'fl labels are not normalised. For SVR, the labels must be normalised.'
                )
            model = SVRmodel(fl=ss_fl,
                             epsilon=hparams['epsilon'],
                             c=hparams['c'])
        elif loss_mode == 'dtr':
            #if not fl.normalise_labels:
            #raise TypeError('fl labels are not normalised. For SVR, the labels must be normalised.')
            model = DTRmodel(fl=ss_fl,
                             max_depth=hparams['max_depth'],
                             num_est=hparams['num_est'])
        elif loss_mode == 'mimosvr':
            if not fl.normalise_labels:
                raise TypeError(
                    'fl labels are not normalised. For SVR, the labels must be normalised.'
                )
            model = MIMOSVRmodel(fl=ss_fl, gamma=hparams['gamma'])

        else:
            raise KeyError('loss_mode ' + loss_mode +
                           'is not a valid selection for loss mode.')

        # Train model and save model training loss vs epoch plot if plot_name is given, else no plot will be saved
        if plot_name:
            model.train_model(ss_fl,
                              i_ss_fl,
                              plot_name='{}_fold_{}.png'.format(
                                  plot_name, fold))
        else:
            model.train_model(ss_fl, i_ss_fl)

        p_y, _, _ = model.eval(fl)
        if fl.normalise_labels:
            p_y = fl.labels_scaler.inverse_transform(p_y)
        for row, p_label in enumerate(p_y.tolist()):
            if p_label[1] > p_label[2]:
                p_y[row, 1] = p_y[row, 2]
            if p_label[0] > p_y[row, 1]:
                p_y[row, 0] = p_y[row, 1]
        se_store = (fl.labels - p_y)**2
        re_store = np.abs(fl.labels - p_y) / fl.labels

        df = pd.DataFrame(data=np.concatenate(
            (fl.labels, p_y, se_store, re_store), axis=1),
                          index=list(range(1, 1 + fl.count)),
                          columns=list(column_headers) +
                          ['P_{}'.format(col) for col in column_headers] +
                          ['SE_{}'.format(col) for col in column_headers] +
                          ['RE_{}'.format(col) for col in column_headers])
        print_df_to_excel(df=df, ws=ws)

        col = fn + 1 + 1 + 2 * numel + 3
        msee_store.append(np.mean(se_store))
        mre_store.append(np.mean(re_store))
        ws.cell(1, col).value = 'MSE'
        ws.cell(1, col + 1).value = msee_store[-1]
        ws.cell(2, col).value = 'MRE'
        ws.cell(2, col + 1).value = mre_store[-1]
        ws.cell(3, col).value = 'ARE'
        ws.cell(3, col + 1).value = mare_store[-1]

        # Evaluation
        predicted_labels, mse, mse_norm = model.eval(i_ss_fl)
        if fl.normalise_labels:
            predicted_labels = fl.labels_scaler.inverse_transform(
                predicted_labels)

        if label_type == 'cutoff':
            for row, p_label in enumerate(predicted_labels.tolist()):
                if p_label[1] > p_label[2]:
                    predicted_labels[row, 1] = predicted_labels[row, 2]
                if p_label[0] > predicted_labels[row, 1]:
                    predicted_labels[row, 0] = predicted_labels[row, 1]
            pass

        predicted_labels_store.extend(predicted_labels)
        mse_store.append(mse)
        mse_norm_store.append(mse_norm)
        '''
        if fold == k_folds-1:
            stringlist = []
            model.model.summary(print_fn=lambda x: stringlist.append(x))
            short_model_summary = "\n".join(stringlist)
            print(short_model_summary)
        '''
        # Saving model
        if save_model:
            # Set save_model_name
            if isinstance(save_model_name, str):
                save_model_name1 = save_model_name + '_' + model_mode + '_' + str(
                    fold + 1)
            else:
                save_model_name1 = model_mode + '_' + str(fold + 1)

            # Save model
            print('Saving instance {} model in {}'.format(
                fold + 1, save_model_dir + save_model_name1 + '.h5'))
            if loss_mode == 'normal' or loss_mode == 'ann':
                model.model.save(save_model_dir + save_model_name1 + '.h5')
            elif loss_mode == 'hul':
                model.prediction_model.save(save_model_dir + save_model_name1 +
                                            '.h5')
            elif loss_mode == 'svr' or loss_mode == 'dtr':
                pickle.dump(
                    Predict_SVR_DTR(model=model.model,
                                    labels_scaler=model.labels_scaler),
                    open(save_model_dir + save_model_name1 + '.pkl', 'wb'))

        # Need to put the next 3 lines if not memory will run out
        del model
        if loss_mode == 'normal' or loss_mode == 'ann':
            K.clear_session()
            sess.close()
        gc.collect()

        # Preparing data to put into new_df that consists of all the validation dataset and its predicted labels
        folds.extend(
            [fold] * i_ss_fl.count
        )  # Make a col that contains the fold number for each example
        if len(val_features_c):
            val_features_c = np.concatenate(
                (val_features_c, i_ss_fl.features_c), axis=0)
        else:
            val_features_c = i_ss_fl.features_c

        val_labels.extend(i_ss_fl.labels)
        val_idx.extend(i_ss_fl.idx)

        # Printing one instance summary.
        instance_end = time.time()
        print(
            '\nFor k-fold run {} out of {}. Each fold has {} examples. Model is {} with {} loss. Time taken for '
            'instance = {}\n'
            'Post-training results: \nmse = {}, mse_norm = {}. Scoring is {}\n'
            '####################################################################################################'
            .format(fold + 1, k_folds, i_ss_fl.count, model_mode, loss_mode,
                    instance_end - instance_start, mse, mse_norm, scoring))

    ws = wb[wb.sheetnames[0]]
    df = pd.DataFrame(data=np.array([msee_store, mre_store]).T,
                      columns=['mse', 're'],
                      index=range(1, 1 + len(msee_store)))
    df.insert(0, 'Fold', list(range(len(fl_store))))
    print_df_to_excel(df=df, ws=ws)
    wb.save(te_sheet)

    mse_avg = np.average(mse_store)
    mse_norm_avg = np.average(mse_norm_store)
    re = np.average(
        np.abs(np.array(val_labels) - np.array(predicted_labels_store)) /
        np.array(val_labels))

    # Calculating metrics based on complete validation prediction
    mse_full = mean_squared_error(val_labels, predicted_labels_store)
    try:
        mse_norm_full = mean_squared_error(
            fl.labels_scaler.transform(val_labels),
            fl.labels_scaler.transform(predicted_labels_store))
    except AttributeError:
        mse_norm_full = mse_full

    # Creating dataframe to print into excel later.
    new_df = np.concatenate(
        (
            np.array(folds)[:, None],  # Convert 1d list to col. vector
            val_features_c,
            np.array(val_labels),
            np.array(predicted_labels_store)),
        axis=1)
    if fl.label_type == 'points':
        predicted_labels_name = list(map(str, np.arange(2, 101)))
        predicted_labels_name = [f'P_{x}' for x in predicted_labels_name]
        headers = ['folds'] + \
                  list(map(str, fl.features_c_names)) + \
                  list(map(str, np.arange(2,101))) + \
                  predicted_labels_name
    elif fl.label_type == 'cutoff':
        predicted_labels_name = list(fl.labels_names)
        predicted_labels_name = [f'P_{x}' for x in predicted_labels_name]
        headers = ['folds'] + \
                  list(map(str, fl.features_c_names)) + \
                  list(fl.labels_names) + \
                  predicted_labels_name

    # val_idx is the original position of the example in the data_loader
    new_df = pd.DataFrame(data=new_df, columns=headers, index=val_idx)

    print('Writing into' + skf_file)
    wb = load_workbook(skf_file)

    # Creating new worksheet. Even if SNN worksheet already exists, a new SNN1 ws will be created and so on
    if skf_sheet is None:
        wb.create_sheet(model_mode)
    else:
        wb.create_sheet(model_mode + skf_sheet)
    sheet_name = wb.sheetnames[
        -1]  # Taking the ws name from the back ensures that if SNN1 is the new ws, it works

    # Writing hparam dataframe first
    pd_writer = pd.ExcelWriter(skf_file, engine='openpyxl')
    pd_writer.book = wb
    pd_writer.sheets = dict((ws.title, ws) for ws in wb.worksheets)
    new_df.to_excel(pd_writer, sheet_name)
    start_col = len(new_df.columns) + 4
    hparams = pd.DataFrame(dict([(k, Series(v)) for k, v in hparams.items()]))
    hparams.to_excel(pd_writer, sheet_name, startrow=0, startcol=start_col - 1)
    start_row = 5

    # Writing other subset split, instance per run, and bounds
    ws = wb[sheet_name]
    headers = ['mse', 'mse_norm', 're']
    values = [mse_avg, mse_norm_avg]
    values_full = [mse_full, mse_norm_full, re]
    print_array_to_excel(np.array(headers), (1 + start_row, start_col + 1),
                         ws,
                         axis=1)
    print_array_to_excel(np.array(values), (2 + start_row, start_col + 1),
                         ws,
                         axis=1)
    print_array_to_excel(np.array(values_full), (3 + start_row, start_col + 1),
                         ws,
                         axis=1)
    ws.cell(2 + start_row, start_col).value = 'Folds avg'
    ws.cell(3 + start_row, start_col).value = 'Overall'
    pd_writer.save()
    pd_writer.close()
    wb.close()

    if scoring == 'mse':
        return mse_full
    elif scoring == 're':
        return re
    else:
        raise KeyError('Scoring function {} is not valid'.format(scoring))
예제 #23
0
    def test_dynamic_loss_scaling(self,
                                  strategy_fn,
                                  pass_loss_scale_to_policy=False,
                                  get_config=False):
        strategy = strategy_fn()
        initial_loss_scale = 2.
        batch_size = 4
        loss_scale = loss_scale_module.DynamicLossScale(
            initial_loss_scale=initial_loss_scale, increment_period=2)
        expected_gradient = backend.variable([initial_loss_scale / batch_size],
                                             dtype=dtypes.float16)
        # If this variable is set to True, the model below will have NaN gradients
        have_nan_gradients = backend.variable(False, dtype=dtypes.bool)
        with strategy.scope():
            opt = gradient_descent.SGD(1.)
            if pass_loss_scale_to_policy:
                p = policy.Policy('mixed_float16', loss_scale=loss_scale)
            else:
                p = policy.Policy('mixed_float16', loss_scale=None)
                opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
            with policy.policy_scope(p):
                x = layers.Input(shape=(1, ),
                                 batch_size=batch_size,
                                 dtype=dtypes.float16)
                layer = mp_test_util.MultiplyLayer(assert_type=dtypes.float16)
                y = layer(x)
                identity_with_nan_grads = (
                    mp_test_util.create_identity_with_nan_gradients_fn(
                        have_nan_gradients))
                y = core.Lambda(identity_with_nan_grads)(y)
                identity_with_grad_check_fn = (
                    mp_test_util.create_identity_with_grad_check_fn(
                        expected_dtype=dtypes.float16,
                        expected_gradient=expected_gradient))
                y = core.Lambda(identity_with_grad_check_fn)(y)
                model = models.Model(inputs=x, outputs=y)
                if get_config:
                    config = model.get_config()
                    model = model.__class__.from_config(
                        config,
                        custom_objects={
                            'MultiplyLayer': mp_test_util.MultiplyLayer
                        })
                    (layer, ) = (
                        layer for layer in model.layers
                        if isinstance(layer, mp_test_util.MultiplyLayer))

                def loss_fn(y_true, y_pred):
                    del y_true
                    return math_ops.reduce_mean(y_pred)

                model.compile(opt,
                              loss=loss_fn,
                              run_eagerly=testing_utils.should_run_eagerly())

        self.assertEqual(backend.eval(layer.v), 1)
        x = np.ones((batch_size, 1))
        y = np.ones((batch_size, 1))
        dataset = dataset_ops.Dataset.from_tensor_slices(
            (x, y)).batch(batch_size)
        model.fit(dataset)
        # The variables starts with 1 and has a gradient of 1, so will go down by 1
        # each step.
        self.assertEqual(backend.eval(layer.v), 0)

        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -1)

        # There have been two steps without NaNs, so the loss scale will double
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient * 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -2)

        # Next test with NaN gradients.
        backend.set_value(have_nan_gradients, True)
        model.fit(dataset)
        # Variable should not be updated
        self.assertEqual(backend.eval(layer.v), -2)

        # Test with finite gradients again
        backend.set_value(have_nan_gradients, False)
        # The loss scale will be halved due to the NaNs, so the gradient will also
        # be halved
        backend.set_value(expected_gradient,
                          backend.get_value(expected_gradient / 2))
        model.fit(dataset)
        self.assertEqual(backend.eval(layer.v), -3)
예제 #24
0
 def on_epoch_begin(self, epoch, logs=None):
     if self.verbose_ > 0:
         print("KL Divergence weight: %.3f" % K.get_value(self.scale_))
예제 #25
0
 def on_epoch_end(self, epoch, logs=None):
     lr = float(K.get_value(self.model.optimizer.lr))
     print(f"\nEnd epoch {epoch + 1}| LR={lr: 0.08f}\n\n")
예제 #26
0
 def on_train_batch_end(self, batch, logs=None):
     epoch = len(self.model.history.epoch)
     batch_id = epoch * self.params.get('steps', None) + batch
     new_lr = 1e-8 * 10**(batch_id / 20)
     K.set_value(self.model.optimizer.lr, K.get_value(new_lr))
     print(f"\n...Training: end of batch {batch} LR->{new_lr: 0.09f}\n\n")
예제 #27
0
 def on_epoch_begin(self, step, log=None):
     if self.model is not None:
         wd = self.wd_m * K.get_value(self.model.optimizer.lr)
         K.set_value(self.model.optimizer.weight_decay, wd)
     # wd = self.model.optimizer.weight_decay
     print("Weight decay for iter {} is {}".format(step + 1, wd))
예제 #28
0
 def on_train_begin(self, logs=None):
     self.step = K.get_value(self.model.optimizer.iterations)
예제 #29
0
    def test_save_model_with_dynamic_loss_scaling(
            self, strategy_fn, h5=False, use_v1_loss_scale_optimizer=False):
        # TODO(reedwm): Support and test saving model with a mixed_[b]float16 policy
        # as well.
        strategy = strategy_fn()
        if (isinstance(strategy, mirrored_strategy.MirroredStrategy)
                and not context.executing_eagerly()):
            # TODO(b/121381184): Enable running the test in this case.
            return

        # Create and run model.
        with strategy.scope():
            x = layers.Input(shape=(2, ), batch_size=2, dtype=dtypes.float32)
            y = mp_test_util.MultiplyLayer()(x)
            model = models.Model(inputs=x, outputs=y)

            opt = gradient_descent.SGD(1.)
            if use_v1_loss_scale_optimizer:
                loss_scale = loss_scale_module.DynamicLossScale(
                    initial_loss_scale=1., increment_period=2.)
                opt = loss_scale_optimizer.LossScaleOptimizerV1(
                    opt, loss_scale)
            else:
                opt = loss_scale_optimizer.LossScaleOptimizer(
                    opt, initial_scale=1., dynamic_growth_steps=2.)
            model.compile(optimizer=opt,
                          loss='mse',
                          run_eagerly=testing_utils.should_run_eagerly())
        # Run for 3 steps (6 examples with a batch size of 2)
        model.fit(np.ones((6, 2)), np.zeros((6, 2)), batch_size=2)
        self.assertEqual(backend.get_value(opt.loss_scale), 2)
        self.assertEqual(backend.get_value(opt.dynamic_counter), 1)
        (weight, ) = model.trainable_weights
        orig_weight = backend.get_value(weight)

        # Save model weights.
        save_path = os.path.join(self.get_temp_dir(), 'model')
        model.save(save_path, save_format='h5' if h5 else 'tf')

        # Run model again for 1 step (2 examples with a batch size of 2)
        model.fit(np.ones((2, 2)), np.zeros((2, 2)), batch_size=2)
        new_weight = backend.get_value(weight)
        self.assertNotEqual(new_weight, orig_weight)
        self.assertEqual(backend.get_value(opt.loss_scale), 4)
        self.assertEqual(backend.get_value(opt.dynamic_counter), 0)

        # Load model weights and ensure loss scale weights are restored.
        model = save.load_model(
            save_path,
            custom_objects={'MultiplyLayer': mp_test_util.MultiplyLayer})
        (weight, ) = model.trainable_weights
        loaded_weight = backend.get_value(weight)
        self.assertEqual(loaded_weight, orig_weight)
        # Currently the loss scale isn't always saved when the model is saved with
        # Model.save(). So we assert the loss scale either has the value when it was
        # saved, or the value it was initialized with.
        # TODO(reedwm): Always save/restore the loss scale with Model.save().
        self.assertIn(backend.get_value(model.optimizer.loss_scale), (1, 2))
        self.assertIn(backend.get_value(model.optimizer.dynamic_counter),
                      (0, 1))

        # Test optimizer attributes and type
        self.assertEqual(model.optimizer.initial_scale, 1.)
        self.assertEqual(model.optimizer.dynamic_growth_steps, 2.)
        self.assertEqual(type(model.optimizer),
                         loss_scale_optimizer.LossScaleOptimizer)
예제 #30
0
 def __call__(self, shape, dtype=None, partition_info=None):
     # set bias to -log((1 - p)/p) for foreground
     bias = -K.log((1 - self.probability) / self.probability)
     result = K.get_value(K.ones(shape, dtype=dtype)) * bias
     return result
예제 #31
0
    def train(self, reranker, train_dataset, train_output_path, dev_data, dev_output_path, qrels, metric, relevance_level=1):
        if self.tpu:
            # WARNING: not sure if pathlib is compatible with gs://
            train_output_path = Path(
                "{0}/{1}/{2}".format(
                    self.config["storage"], "train_output", hashlib.md5(str(train_output_path).encode("utf-8")).hexdigest()
                )
            )

        dev_best_weight_fn, weights_output_path, info_output_path, loss_fn, metric_fn = self.get_paths_for_early_stopping(
            train_output_path, dev_output_path
        )

        train_records = self.get_tf_train_records(reranker, train_dataset)
        dev_records = self.get_tf_dev_records(reranker, dev_data)
        dev_dist_dataset = self.strategy.experimental_distribute_dataset(dev_records)

        # Does not very much from https://www.tensorflow.org/tutorials/distribute/custom_training
        strategy_scope = self.strategy.scope()
        with strategy_scope:
            reranker.build_model()
            wrapped_model = self.get_wrapped_model(reranker.model)
            loss_object = self.get_loss(self.config["loss"])
            optimizer_1 = tf.keras.optimizers.Adam(learning_rate=self.config["lr"])
            optimizer_2 = tf.keras.optimizers.Adam(learning_rate=self.config["bertlr"])

            # "You should remove the use of the LossScaleOptimizer when TPUs are used."
            if self.amp and not self.tpu:
                optimizer_2 = mixed_precision.LossScaleOptimizer(optimizer_2, loss_scale="dynamic")

            def compute_loss(labels, predictions):
                per_example_loss = loss_object(labels, predictions)
                return tf.nn.compute_average_loss(per_example_loss, global_batch_size=self.config["batch"])

        def is_bert_variable(name):
            if "bert" in name:
                return True
            if "electra" in name:
                return True
            return False

        def train_step(inputs):
            data, labels = inputs

            with tf.GradientTape() as tape:
                train_predictions = wrapped_model(data, training=True)
                loss = compute_loss(labels, train_predictions)
                if self.amp and not self.tpu:
                    loss = optimizer_2.get_scaled_loss(loss)

            gradients = tape.gradient(loss, wrapped_model.trainable_variables)
            if self.amp and not self.tpu:
                optimizer_2.get_unscaled_gradients(gradients)

            bert_variables = [
                (gradients[i], variable)
                for i, variable in enumerate(wrapped_model.trainable_variables)
                if is_bert_variable(variable.name) and "classifier" not in variable.name
            ]
            classifier_vars = [
                (gradients[i], variable)
                for i, variable in enumerate(wrapped_model.trainable_variables)
                if "classifier" in variable.name
            ]
            other_vars = [
                (gradients[i], variable)
                for i, variable in enumerate(wrapped_model.trainable_variables)
                if not is_bert_variable(variable.name) and "classifier" not in variable.name
            ]

            assert len(bert_variables) + len(classifier_vars) + len(other_vars) == len(wrapped_model.trainable_variables)
            # TODO: Clean this up for general use
            # Making sure that we did not miss any variables
            optimizer_1.apply_gradients(classifier_vars)
            optimizer_2.apply_gradients(bert_variables)
            if other_vars:
                optimizer_1.apply_gradients(other_vars)

            return loss

        def test_step(inputs):
            data, labels = inputs
            predictions = wrapped_model.predict_step(data)

            return predictions

        @tf.function
        def distributed_train_step(dataset_inputs):
            per_replica_losses = self.strategy.run(train_step, args=(dataset_inputs,))

            return self.strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None)

        @tf.function
        def distributed_test_step(dataset_inputs):
            return self.strategy.run(test_step, args=(dataset_inputs,))

        train_records = train_records.shuffle(100000)
        train_dist_dataset = self.strategy.experimental_distribute_dataset(train_records)

        initial_iter, metrics = (
            self.fastforward_training(wrapped_model, weights_output_path, loss_fn, metric_fn)
            if self.config["fastforward"]
            else (0, {})
        )
        dev_best_metric = metrics.get(metric, -np.inf)
        logger.info("starting training from iteration %s/%s", initial_iter + 1, self.config["niters"])
        logger.info(f"Best metric loaded: {metric}={dev_best_metric}")

        cur_step = initial_iter * self.n_batch_per_iter
        initial_lr = self.change_lr(step=cur_step, lr=self.config["bertlr"])
        K.set_value(optimizer_2.lr, K.get_value(initial_lr))
        train_loss = self.load_loss_file(loss_fn) if initial_iter > 0 else []
        if 0 < initial_iter < self.config["niters"]:
            self.exhaust_used_train_data(train_dist_dataset, n_batch_to_exhaust=initial_iter * self.n_batch_per_iter)

        niter = initial_iter
        total_loss = 0
        trec_preds = {}
        iter_bar = tqdm(desc="Training iteration", total=self.n_batch_per_iter)
        # Goes through the dataset ONCE (i.e niters * itersize).
        # However, the dataset may already contain multiple instances of the same sample,
        # depending upon what Sampler was used.
        # If you want multiple epochs, achieve it by tweaking the niters and itersize values.
        for x in train_dist_dataset:
            total_loss += distributed_train_step(x)
            cur_step += 1
            iter_bar.update(1)

            # Do warmup and decay
            new_lr = self.change_lr(step=cur_step, lr=self.config["bertlr"])
            K.set_value(optimizer_2.lr, K.get_value(new_lr))

            if cur_step % self.n_batch_per_iter == 0:
                niter += 1

                iter_bar.close()
                iter_bar = tqdm(total=self.n_batch_per_iter)
                train_loss.append(total_loss / self.n_batch_per_iter)
                logger.info("iter={} loss = {}".format(niter, train_loss[-1]))
                self.write_to_loss_file(loss_fn, train_loss)
                total_loss = 0

                if self.config["fastforward"]:
                    wrapped_model.save_weights(f"{weights_output_path}/{niter}")

                if niter % self.config["validatefreq"] == 0:
                    dev_predictions = []
                    for x in tqdm(dev_dist_dataset, desc="validation"):
                        pred_batch = (
                            distributed_test_step(x).values
                            if self.strategy.num_replicas_in_sync > 1
                            else [distributed_test_step(x)]
                        )
                        for p in pred_batch:
                            dev_predictions.extend(p)

                    trec_preds = self.get_preds_in_trec_format(dev_predictions, dev_data)
                    metrics = evaluator.eval_runs(trec_preds, dict(qrels), evaluator.DEFAULT_METRICS, relevance_level)
                    logger.info("dev metrics: %s", " ".join([f"{metric}={v:0.3f}" for metric, v in sorted(metrics.items())]))
                    if metrics[metric] > dev_best_metric:
                        dev_best_metric = metrics[metric]
                        logger.info("new best dev metric: %0.4f", dev_best_metric)

                        self.write_to_metric_file(metric_fn, metrics)
                        wrapped_model.save_weights(dev_best_weight_fn)
                        Searcher.write_trec_run(trec_preds, outfn=(dev_output_path / "best").as_posix())

            if cur_step >= self.config["niters"] * self.n_batch_per_iter:
                break

        return trec_preds
예제 #32
0
 def on_epoch_end(self, epoch, logs={}):
     logs = logs or {}
     logs['lr'] = K.get_value(self.model.optimizer.lr)
예제 #33
0
 def on_batch_end(self, batch, logs={}):
     # Norm clipping:
     print(str(math.sqrt(sum(np.sum(K.get_value(w)) for w in self.model.optimizer.weights))) + '\n')
     return
예제 #34
0
def change_params(epoch, logs):
    if epoch <= 5 and epoch % 1 == 0:
        K.set_value(beta, K.get_value(beta) + 2e-5)
    if epoch == 30:
        K.set_value(alpha, 0.0)
    def set_vocabulary(self,
                       vocab,
                       df_data=None,
                       oov_df_value=None,
                       append=False):
        """Sets vocabulary (and optionally document frequency) data for this layer.

    This method sets the vocabulary and DF data for this layer directly, instead
    of analyzing a dataset through 'adapt'. It should be used whenever the vocab
    (and optionally document frequency) information is already known. If
    vocabulary data is already present in the layer, this method will either
    replace it, if 'append' is set to False, or append to it (if 'append' is set
    to True).

    Arguments:
      vocab: An array of string tokens.
      df_data: An array of document frequency data. Only necessary if the layer
        output_mode is TFIDF.
      oov_df_value: The document frequency of the OOV token. Only necessary if
        output_mode is TFIDF. OOV data is optional when appending additional
        data in TFIDF mode; if an OOV value is supplied it will overwrite the
        existing OOV value.
      append: Whether to overwrite or append any existing vocabulary data.

    Raises:
      ValueError: If there are too many inputs, the inputs do not match, or
        input data is missing.
    """
        current_table_size = self._get_table_size()
        total_vocab_size = len(vocab) + (current_table_size if append else 0)
        if self._max_tokens is not None and total_vocab_size > self._max_vocab_size:
            raise ValueError(
                "Attempted to set a vocabulary larger than the maximum vocab size. "
                "Passed vocab size is %s, max vocab size is %s. Note that the OOV "
                "token is automatically added to the number of tokens." %
                (total_vocab_size, self._max_vocab_size))

        # We're only _really_ appending if the table_size is nonzero. This is
        # important for some sanity checks in tfidf mode (specifically, checking if
        # oov_df_value is set or not) and handling existing tfidf weight data.
        append = append if current_table_size > 0 else False

        if self._output_mode == TFIDF:
            if df_data is None:
                raise ValueError("df_data must be set if output_mode is TFIDF")
            if len(vocab) != len(df_data):
                raise ValueError("df_data must be the same length as vocab. "
                                 "len(df_data) is %s, len(vocab) is %s" %
                                 (len(vocab), len(df_data)))
            if not append and oov_df_value is None:
                raise ValueError(
                    "You must pass an oov_df_value the first time "
                    "'set_vocabulary' is called when output_mode is "
                    "TFIDF.")
        else:
            if df_data is not None:
                raise ValueError(
                    "df_data should only be set if output_mode is TFIDF. "
                    "output_mode is %s." % self._output_mode)

        start_index = self._reserved_values + (self._get_table_size()
                                               if append else 0)
        values = np.arange(start_index,
                           len(vocab) + start_index,
                           dtype=np.int64)

        vocab = self._convert_to_ndarray(vocab)
        self._assert_same_type(dtypes.string, vocab, "vocab")

        values = self._convert_to_ndarray(values)
        self._assert_same_type(dtypes.int64, values, "values")

        if not append and self._vocab_size > 0:
            self._clear_table()
        self._insert_table_data(vocab, values)

        # When doing raw or integer output, we don't have a Vectorize layer to
        # manage. In this case, we can return directly.
        if self._output_mode in [None, INT]:
            return

        if not self._pad_to_max or self._max_tokens is None:
            num_tokens = total_vocab_size + self._reserved_values
            self._vectorize_layer.set_num_elements(num_tokens)

        if self._output_mode == TFIDF:
            df_data = self._convert_to_ndarray(df_data)
            if append:
                # The existing IDF data is stored in a Keras weight, so we can get it
                # by calling K.get_value() on the weight object. Take the first
                # table_size+1 values in case we're padding the weight with zeros
                existing_df_data = K.get_value(
                    self._vectorize_layer.tf_idf_weights)[:current_table_size +
                                                          1]
                df_data = np.append(existing_df_data, df_data, axis=0)
                # If we are appending and need to replace the OOV DF value, we can
                # assign it over the existing OOV DF value at index 0 of the (already-
                # concatenated) DF value array.
                if oov_df_value is not None:
                    df_data[0] = oov_df_value
            else:
                # If we are not appending (that is, we have only new data) we need to
                # insert the OOV value to the front of the array. (This is a append to
                # the head, not a replacement of the zeroth value.)
                if not isinstance(oov_df_value, np.ndarray):
                    oov_df_value = np.array([oov_df_value])
                df_data = np.insert(df_data, 0, oov_df_value)
            self._vectorize_layer.set_tfidf_data(df_data)
예제 #36
0
 def get_config(self):
   return {'a': backend.get_value(self.a),
           'b': self.b,
           'name': self.name}
예제 #37
0
 def value(self):
     return K.get_value(self.layers[-1].param)
예제 #38
0
                  batch_size=batch_size,
                  validation_split=0.1,
                  epochs=epochs,
                  verbose=1,
                  callbacks=callbacks_list,
                  shuffle=True)

    # load the saved best model weights
    asr.act_model.load_weights('best_model.hdf5')

    # predict outputs on validation images
    prediction = asr.act_model.predict(data.x_test[:10])

    # use CTC decoder
    out = K.get_value(
        K.ctc_decode(prediction,
                     input_length=np.ones(prediction.shape[0]) *
                     prediction.shape[1],
                     greedy=True)[0][0])

    # see the results
    i = 0
    for x in out:
        print("original_text =  ", data.ph_org_test[i])
        print("predicted text = ", end='')
        for p in x:
            if int(p) != -1:
                print("'" + data.phonemes[int(p)] + "', ", end='')
        print('\n')
        i += 1
def train(args, model, config):
    logger = logging.getLogger('tensorflow')

    train_dataset = config['train_dataset']
    eval_dataset = config['eval_dataset']
    steps = int(config['steps_per_epoch'])
    schedule = get_schedule(args=args, steps_per_epoch=steps)
    writer = tf.summary.create_file_writer(
        os.path.join(args.model_dir, 'event_files'))

    deep_optimizer = tf.keras.optimizers.RMSprop(
        learning_rate=args.deep_learning_rate, rho=0.5)

    wide_optimizer = tf.keras.optimizers.Ftrl(
        learning_rate=args.linear_learning_rate)

    compiled_loss = tf.keras.losses.BinaryCrossentropy()
    eval_loss = tf.keras.metrics.Mean()

    metrics = [tf.keras.metrics.BinaryAccuracy(), tf.keras.metrics.AUC()]

    current_step_var = tf.Variable(0, trainable=False, dtype=tf.int64)
    display_id_counter = tf.Variable(0., trainable=False, dtype=tf.float64)
    streaming_map = tf.Variable(0.,
                                name='STREAMING_MAP',
                                trainable=False,
                                dtype=tf.float64)

    checkpoint = tf.train.Checkpoint(deep_optimizer=deep_optimizer,
                                     wide_optimizer=wide_optimizer,
                                     model=model,
                                     current_step=current_step_var)
    manager = tf.train.CheckpointManager(checkpoint=checkpoint,
                                         directory=os.path.join(
                                             args.model_dir, 'checkpoint'),
                                         max_to_keep=1)

    if args.use_checkpoint:
        checkpoint.restore(manager.latest_checkpoint)
        if manager.latest_checkpoint:
            logger.warning(f'Model restored from checkpoint {args.model_dir}')
            if args.benchmark:
                current_step_var.assign(0)
        else:
            logger.warning(
                f'Failed to restore model from checkpoint {args.model_dir}')

    if args.amp:
        deep_optimizer = tf.keras.mixed_precision.experimental.LossScaleOptimizer(
            deep_optimizer, loss_scale='dynamic')
        wide_optimizer = tf.keras.mixed_precision.experimental.LossScaleOptimizer(
            wide_optimizer, loss_scale='dynamic')

    @tf.function
    def train_step(x, y, first_batch):
        with tf.GradientTape(persistent=True) as tape:
            y_pred = model(x, training=True)
            loss = compiled_loss(y, y_pred)
            linear_loss = wide_optimizer.get_scaled_loss(
                loss) if args.amp else loss
            deep_loss = deep_optimizer.get_scaled_loss(
                loss) if args.amp else loss

        if not args.cpu:
            tape = hvd.DistributedGradientTape(tape)

        for metric in metrics:
            metric.update_state(y, y_pred)

        linear_vars = model.linear_model.trainable_variables
        dnn_vars = model.dnn_model.trainable_variables
        linear_grads = tape.gradient(linear_loss, linear_vars)
        dnn_grads = tape.gradient(deep_loss, dnn_vars)
        if args.amp:
            linear_grads = wide_optimizer.get_unscaled_gradients(linear_grads)
            dnn_grads = deep_optimizer.get_unscaled_gradients(dnn_grads)

        wide_optimizer.apply_gradients(zip(linear_grads, linear_vars))
        deep_optimizer.apply_gradients(zip(dnn_grads, dnn_vars))
        if first_batch and not args.cpu:
            hvd.broadcast_variables(model.linear_model.variables, root_rank=0)
            hvd.broadcast_variables(model.dnn_model.variables, root_rank=0)
            hvd.broadcast_variables(wide_optimizer.variables(), root_rank=0)
            hvd.broadcast_variables(deep_optimizer.variables(), root_rank=0)
        return loss

    @tf.function
    def evaluation_step(x, y):
        predictions = model(x, training=False)
        loss = compiled_loss(y, predictions)

        for metric in metrics:
            metric.update_state(y, predictions)

        predictions = tf.reshape(predictions, [-1])
        predictions = tf.cast(predictions, tf.float64)
        display_ids = x[DISPLAY_ID_COLUMN]
        display_ids = tf.reshape(display_ids, [-1])
        labels = tf.reshape(y, [-1])
        sorted_ids = tf.argsort(display_ids)
        display_ids = tf.gather(display_ids, indices=sorted_ids)
        predictions = tf.gather(predictions, indices=sorted_ids)
        labels = tf.gather(labels, indices=sorted_ids)
        _, display_ids_idx, display_ids_ads_count = tf.unique_with_counts(
            display_ids, out_idx=tf.int64)
        pad_length = 30 - tf.reduce_max(display_ids_ads_count)
        preds = tf.RaggedTensor.from_value_rowids(predictions,
                                                  display_ids_idx).to_tensor()
        labels = tf.RaggedTensor.from_value_rowids(
            labels, display_ids_idx).to_tensor()

        labels_mask = tf.math.reduce_max(labels, 1)
        preds_masked = tf.boolean_mask(preds, labels_mask)
        labels_masked = tf.boolean_mask(labels, labels_mask)
        labels_masked = tf.argmax(labels_masked, axis=1, output_type=tf.int32)
        labels_masked = tf.reshape(labels_masked, [-1, 1])

        preds_masked = tf.pad(preds_masked, [(0, 0), (0, pad_length)])
        _, predictions_idx = tf.math.top_k(preds_masked, 12)
        indices = tf.math.equal(predictions_idx, labels_masked)
        indices_mask = tf.math.reduce_any(indices, 1)
        masked_indices = tf.boolean_mask(indices, indices_mask)

        res = tf.argmax(masked_indices, axis=1)
        ap_matrix = tf.divide(1, tf.add(res, 1))
        ap_sum = tf.reduce_sum(ap_matrix)
        shape = tf.cast(tf.shape(indices)[0], tf.float64)
        display_id_counter.assign_add(shape)
        streaming_map.assign_add(ap_sum)
        return loss

    t0 = None
    t_batch = None

    with writer.as_default():
        for epoch in range(1, args.num_epochs + 1):
            for step, (x, y) in enumerate(train_dataset):
                current_step = np.asscalar(current_step_var.numpy())
                schedule(optimizer=deep_optimizer, current_step=current_step)

                for metric in metrics:
                    metric.reset_states()
                loss = train_step(x, y, epoch == 1 and step == 0)
                if args.cpu or hvd.rank() == 0:
                    for metric in metrics:
                        tf.summary.scalar(f'{metric.name}',
                                          metric.result(),
                                          step=current_step)
                    tf.summary.scalar('loss', loss, step=current_step)
                    tf.summary.scalar('schedule',
                                      K.get_value(deep_optimizer.lr),
                                      step=current_step)
                    writer.flush()

                if args.benchmark:
                    boundary = max(args.benchmark_warmup_steps, 1)
                    if current_step == boundary:
                        t0 = time.time()
                    if current_step > boundary:
                        batch_time = time.time() - t_batch
                        samplesps = args.global_batch_size / batch_time
                        dllogger.log(data={'batch_samplesps': samplesps},
                                     step=(1, current_step))

                        if args.benchmark_steps <= current_step:
                            train_time = time.time() - t0
                            epochs = args.benchmark_steps - max(
                                args.benchmark_warmup_steps, 1)
                            train_throughput = (args.global_batch_size *
                                                epochs) / train_time
                            dllogger.log(
                                data={'train_throughput': train_throughput},
                                step=tuple())
                            return

                else:
                    if current_step % 100 == 0:
                        train_data = {
                            metric.name: f'{metric.result().numpy():.4f}'
                            for metric in metrics
                        }
                        train_data['loss'] = f'{loss.numpy():.4f}'
                        dllogger.log(data=train_data,
                                     step=(current_step,
                                           args.num_epochs * steps))

                    if step == steps:
                        break

                current_step_var.assign_add(1)
                t_batch = time.time()
            if args.benchmark:
                continue

            for metric in metrics:
                metric.reset_states()
            eval_loss.reset_states()

            for step, (x, y) in enumerate(eval_dataset):
                loss = evaluation_step(x, y)
                eval_loss.update_state(loss)

            map_metric = tf.divide(streaming_map, display_id_counter) if args.cpu else \
                hvd.allreduce(tf.divide(streaming_map, display_id_counter))

            map_metric = map_metric.numpy()
            eval_loss_reduced = eval_loss.result() if args.cpu else \
                hvd.allreduce(eval_loss.result())

            metrics_reduced = {
                f'{metric.name}_val':
                metric.result() if args.cpu else hvd.allreduce(metric.result())
                for metric in metrics
            }

            for name, result in metrics_reduced.items():
                tf.summary.scalar(f'{name}', result, step=steps * epoch)
            tf.summary.scalar('loss_val',
                              eval_loss_reduced,
                              step=steps * epoch)
            tf.summary.scalar('map_val', map_metric, step=steps * epoch)
            writer.flush()

            eval_data = {
                name: f'{result.numpy():.4f}'
                for name, result in metrics_reduced.items()
            }
            eval_data.update({
                'loss_val': f'{eval_loss_reduced.numpy():.4f}',
                'streaming_map_val': f'{map_metric:.4f}'
            })
            dllogger.log(data=eval_data,
                         step=(steps * epoch, args.num_epochs * steps))

            if args.cpu or hvd.rank() == 0:
                manager.save()

            display_id_counter.assign(0)
            streaming_map.assign(0)
        if args.cpu or hvd.rank() == 0:
            dllogger.log(data=eval_data, step=tuple())
예제 #40
0
    def set_vocabulary(self,
                       vocab,
                       df_data=None,
                       oov_df_value=None,
                       append=False):
        """Sets vocabulary (and optionally document frequency) data for this layer.

    This method sets the vocabulary and DF data for this layer directly, instead
    of analyzing a dataset through 'adapt'. It should be used whenever the vocab
    (and optionally document frequency) information is already known. If
    vocabulary data is already present in the layer, this method will either
    replace it, if 'append' is set to False, or append to it (if 'append' is set
    to True).

    Arguments:
      vocab: An array of string tokens.
      df_data: An array of document frequency data. Only necessary if the layer
        output_mode is TFIDF.
      oov_df_value: The document frequency of the OOV token. Only necessary if
        output_mode is TFIDF. OOV data is optional when appending additional
        data in TFIDF mode; if an OOV value is supplied it will overwrite the
        existing OOV value.
      append: Whether to overwrite or append any existing vocabulary data.

    Raises:
      ValueError: If there are too many inputs, the inputs do not match, or
        input data is missing.
      RuntimeError: If the vocabulary cannot be set when this function is
        called. This happens when "binary", "count", and "tfidf" modes,
        if "pad_to_max_tokens" is False and the layer itself has already been
        called.
    """
        if self._output_mode != TFIDF and df_data is not None:
            raise ValueError(
                "df_data should only be set if output_mode is TFIDF. "
                "output_mode is %s." % self._output_mode)

        if (self._output_mode in [BINARY, COUNT, TFIDF] and self._called
                and not self._pad_to_max):
            raise RuntimeError(
                ("When using TextVectorization in {mode} mode and "
                 "pad_to_max_tokens is False, the vocabulary cannot "
                 "be changed after the layer is "
                 "called.").format(mode=self._output_mode))

        current_table_size = self._index_lookup_layer.vocab_size()
        self._index_lookup_layer.set_vocabulary(vocab, append)

        # When doing raw or integer output, we don't have a Vectorize layer to
        # manage. In this case, we can return directly.
        if self._output_mode in [None, INT]:
            return

        if not self._pad_to_max or self._max_tokens is None:
            num_tokens = self._index_lookup_layer.vocab_size(
            ) + self._reserved_values
            self._vectorize_layer.set_num_elements(num_tokens)

        # We're only _really_ appending if the table_size is nonzero. This is
        # important for some sanity checks in tfidf mode (specifically, checking if
        # oov_df_value is set or not) and handling existing tfidf weight data.
        append = append if current_table_size > 0 else False

        if self._output_mode == TFIDF:
            if df_data is None:
                raise ValueError("df_data must be set if output_mode is TFIDF")
            if len(vocab) != len(df_data):
                raise ValueError("df_data must be the same length as vocab. "
                                 "len(df_data) is %s, len(vocab) is %s" %
                                 (len(vocab), len(df_data)))
            if not append and oov_df_value is None:
                raise ValueError(
                    "You must pass an oov_df_value the first time "
                    "'set_vocabulary' is called when output_mode is "
                    "TFIDF.")

            df_data = self._convert_to_ndarray(df_data)
            if append:
                # The existing IDF data is stored in a Keras weight, so we can get it
                # by calling K.get_value() on the weight object. Take the first
                # table_size+1 values in case we're padding the weight with zeros
                existing_df_data = K.get_value(
                    self._vectorize_layer.tf_idf_weights)[:current_table_size +
                                                          1]
                df_data = np.append(existing_df_data, df_data, axis=0)
                # If we are appending and need to replace the OOV DF value, we can
                # assign it over the existing OOV DF value at index 0 of the (already-
                # concatenated) DF value array.
                if oov_df_value is not None:
                    df_data[0] = oov_df_value
            else:
                # If we are not appending (that is, we have only new data) we need to
                # insert the OOV value to the front of the array. (This is a append to
                # the head, not a replacement of the zeroth value.)
                if not isinstance(oov_df_value, np.ndarray):
                    oov_df_value = np.array([oov_df_value])
                df_data = np.insert(df_data, 0, oov_df_value)
            self._vectorize_layer.set_tfidf_data(df_data)
예제 #41
0
    def call(self, inputs):
        self._called = True
        if self._max_tokens is None:
            out_depth = K.get_value(self.num_elements)
        else:
            out_depth = self._max_tokens

        if self._sparse:
            if self._output_mode != COUNT:
                raise ValueError(
                    "Only supports `sparse=True` when `output_mode` "
                    ' is \"count\", got {}'.format(self._output_mode))
            inputs = self._convert_to_sparse_inputs(inputs)

            # Consider having sparse.one_hot
            # Append values to indices, and reduce sum to get the counts.
            tokens = array_ops.expand_dims(math_ops.cast(
                inputs.values, dtypes.int64),
                                           axis=1)
            count_tokens = array_ops.concat([inputs.indices, tokens], axis=1)
            count_values = array_ops.ones_like(inputs.values,
                                               dtype=dtypes.int64)
            unreduced_count_shape = array_ops.concat(
                [inputs.dense_shape, [out_depth]], axis=0)
            counts = sparse_tensor.SparseTensor(
                indices=count_tokens,
                values=count_values,
                dense_shape=unreduced_count_shape)
            count_data = sparse_ops.sparse_reduce_sum_v2(counts,
                                                         axis=1,
                                                         output_is_sparse=True)
            return count_data

        # If the input is a sparse tensor, we densify it with the default value of
        # -1. Because -1 is ignored by one_hot, this effectively drops the non-set
        # positions from the output encoding.
        if isinstance(inputs, sparse_tensor.SparseTensor):
            inputs = sparse_ops.sparse_tensor_to_dense(inputs,
                                                       default_value=-1)

        if self._output_mode == BINARY:
            bool_one_hot_data = array_ops.one_hot(inputs,
                                                  depth=out_depth,
                                                  on_value=True,
                                                  off_value=False)
            reduced_bool_data = math_ops.reduce_any(bool_one_hot_data, axis=1)
            binary_data = math_ops.cast(reduced_bool_data, dtypes.int64)
            binary_data.set_shape(tensor_shape.TensorShape((None, out_depth)))
            return binary_data

        one_hot_data = array_ops.one_hot(inputs, depth=out_depth)
        counts = math_ops.reduce_sum(one_hot_data, axis=1)
        if self._output_mode == COUNT:
            count_data = math_ops.cast(counts, dtypes.int64)
            count_data.set_shape(tensor_shape.TensorShape((None, out_depth)))
            return count_data

        tf_idf_data = math_ops.multiply(counts, self.tf_idf_weights)
        tf_idf_data.set_shape(tensor_shape.TensorShape((None, out_depth)))
        if self._output_mode == TFIDF:
            return tf_idf_data

        # We can only get here if we didn't recognize the passed mode.
        raise ValueError("Unknown output mode %s" % self._output_mode)
예제 #42
0
def iterator_predict_loop(model, inputs, steps, verbose=0):
  """Predict function for eager execution when input is dataset iterator.

  Arguments:
      model: Instance of `Model`.
      inputs: Input dataset iterator.
      steps: Total number of steps (batches of samples) before declaring
          `_predict_loop` finished.
      verbose: Verbosity mode.

  Returns:
      Array of predictions (if the model has a single output)
      or list of arrays of predictions (if the model has multiple outputs).

  Raises:
      ValueError: In case of mismatch between given number of inputs and
        expectations of the model.
  """
  assert isinstance(inputs, iterator_ops.EagerIterator)
  if not isinstance(inputs.output_shapes,
                    (list, tuple)) or len(inputs.output_shapes) > 3:
    raise ValueError(
        'Please provide data as a list or tuple of 1, 2, or 3 elements '
        ' - `(input)`, or `(input, target)`, or `(input, target,'
        'sample_weights)`. Received %s. We do not use the `target` or'
        '`sample_weights` value here.' % inputs.output_shapes)
  outs = []
  if verbose == 1:
    progbar = generic_utils.Progbar(target=steps)
  for step_index in range(steps):
    # Get data from the iterator.
    try:
      next_element = inputs.get_next()
    except errors.OutOfRangeError:
      logging.warning(
          'Your dataset iterator ran out of data; interrupting prediction. '
          'Make sure that your dataset can generate at least `steps` batches '
          '(in this case, %d batches). You may need to use the repeat() '
          'function when building your dataset.', steps)
      break

    # expects a tuple, where first element of tuple represents inputs
    x = next_element[0]

    # Validate and standardize data.
    x, _, _ = model._standardize_user_data(x)
    x = training_utils.cast_if_floating_dtype(x)

    if isinstance(x, list) and len(x) == 1:
      x = x[0]

    if model._expects_training_arg:
      batch_outs = model.call(x, training=False)
    else:
      batch_outs = model.call(x)
    if not isinstance(batch_outs, list):
      batch_outs = [batch_outs]

    # We collect the results from every step and then concatenate them once
    # in the end. This is an expensive process. We are doing this because we
    # do not know the number of samples beforehand.
    if step_index == 0:
      for _ in batch_outs:
        outs.append([])
    for i, batch_out in enumerate(batch_outs):
      outs[i].append(backend.get_value(batch_out))

    if verbose == 1:
      progbar.update(step_index + 1)
  for i, out in enumerate(outs):
    outs[i] = np.concatenate(tuple(out), axis=0)
  if len(outs) == 1:
    return outs[0]
  return outs
예제 #43
0
    def test_simple_with_other(self):
        with self.test_session():
            # create simple FilterDetections layer
            layer = layers.FilterDetections()

            # create simple input
            boxes = np.array(
                [[
                    [0, 0, 10, 10],
                    [0, 0, 10, 10],  # this will be suppressed
                ]],
                dtype=K.floatx())
            boxes = K.constant(boxes)

            classification = np.array(
                [[
                    [0, 0.9],  # this will be suppressed
                    [0, 1],
                ]],
                dtype=K.floatx())
            classification = K.constant(classification)

            other = []
            other.append(
                np.array(
                    [[
                        [0, 1234],  # this will be suppressed
                        [0, 5678],
                    ]],
                    dtype=K.floatx()))
            other.append(
                np.array(
                    [[
                        5678,  # this will be suppressed
                        1234,
                    ]],
                    dtype=K.floatx()))
            other = [K.constant(o) for o in other]

            # compute output
            actual = layer.call([boxes, classification] + other)
            actual_boxes = K.get_value(actual[0])
            actual_scores = K.get_value(actual[1])
            actual_labels = K.get_value(actual[2])
            actual_other = [K.get_value(a) for a in actual[3:]]

            # define expected output
            expected_boxes = -1 * np.ones((1, 300, 4), dtype=K.floatx())
            expected_boxes[0, 0, :] = [0, 0, 10, 10]

            expected_scores = -1 * np.ones((1, 300), dtype=K.floatx())
            expected_scores[0, 0] = 1

            expected_labels = -1 * np.ones((1, 300), dtype=K.floatx())
            expected_labels[0, 0] = 1

            expected_other = []
            expected_other.append(-1 * np.ones((1, 300, 2), dtype=K.floatx()))
            expected_other[-1][0, 0, :] = [0, 5678]
            expected_other.append(-1 * np.ones((1, 300), dtype=K.floatx()))
            expected_other[-1][0, 0] = 1234

            # assert actual and expected are equal
            self.assertAllEqual(actual_boxes, expected_boxes)
            self.assertAllEqual(actual_scores, expected_scores)
            self.assertAllEqual(actual_labels, expected_labels)

            for a, e in zip(actual_other, expected_other):
                self.assertAllEqual(a, e)
예제 #44
0
    def train(self,
              reranker,
              train_dataset,
              train_output_path,
              dev_data,
              dev_output_path,
              qrels,
              metric,
              relevance_level=1,
              init_path=None):
        if self.tpu:
            train_output_path = "{0}/{1}/{2}".format(
                self.config["storage"], "train_output",
                hashlib.md5(
                    str(train_output_path).encode("utf-8")).hexdigest())
        os.makedirs(dev_output_path, exist_ok=True)
        start_epoch = self.config["niters"] if reranker.config.get(
            "modeltype", "") in ["nir", "cedr"] else 0
        train_records = self.get_tf_train_records(reranker, train_dataset)
        dev_records = self.get_tf_dev_records(reranker, dev_data)
        dev_dist_dataset = self.strategy.experimental_distribute_dataset(
            dev_records)

        # Does not very much from https://www.tensorflow.org/tutorials/distribute/custom_training
        strategy_scope = self.strategy.scope()
        with strategy_scope:
            reranker.build_model()
            wrapped_model = self.get_wrapped_model(reranker.model)
            if init_path:
                logger.info(f"Initializing model from checkpoint {init_path}")
                print("number of vars: ",
                      len(wrapped_model.trainable_variables))
                wrapped_model.load_weights(init_path)

            loss_object = self.get_loss(self.config["loss"])
            optimizer_1 = tf.keras.optimizers.Adam(
                learning_rate=self.config["lr"])
            optimizer_2 = tf.keras.optimizers.Adam(
                learning_rate=self.config["bertlr"])

            def compute_loss(labels, predictions):
                per_example_loss = loss_object(labels, predictions)
                return tf.nn.compute_average_loss(
                    per_example_loss, global_batch_size=self.config["batch"])

            def is_bert_parameters(name):
                name = name.lower()
                '''
                if "layer" in name:
                    if not ("9" in name or "10" in name or "11" in name or "12" in name):
                        return False
                '''
                if "/bert/" in name:
                    return True
                if "/electra/" in name:
                    return True
                if "/roberta/" in name:
                    return True
                if "/albert/" in name:
                    return True
                return False

        def train_step(inputs):
            data, labels = inputs

            with tf.GradientTape() as tape:
                train_predictions = wrapped_model(data, training=True)
                loss = compute_loss(labels, train_predictions)

            gradients = tape.gradient(loss, wrapped_model.trainable_variables)

            # TODO: Expose the layer names to lookout for as a ConfigOption?
            # TODO: Crystina mentioned that hugging face models have 'bert' in all the layers (including classifiers). Handle this case
            bert_variables = [
                (gradients[i], variable)
                for i, variable in enumerate(wrapped_model.trainable_variables)
                if is_bert_parameters(variable.name)
                and "classifier" not in variable.name
            ]
            classifier_vars = [
                (gradients[i], variable)
                for i, variable in enumerate(wrapped_model.trainable_variables)
                if "classifier" in variable.name
            ]
            other_vars = [
                (gradients[i], variable)
                for i, variable in enumerate(wrapped_model.trainable_variables)
                if (not is_bert_parameters(variable.name))
                and "classifier" not in variable.name
            ]

            assert len(bert_variables) + len(classifier_vars) + len(
                other_vars) == len(wrapped_model.trainable_variables)
            # TODO: Clean this up for general use
            # Making sure that we did not miss any variables

            if self.config["lr"] > 0:
                optimizer_1.apply_gradients(classifier_vars + other_vars)
            if self.config["bertlr"] > 0:
                optimizer_2.apply_gradients(bert_variables)

            return loss

        def test_step(inputs):
            data, labels = inputs
            predictions = wrapped_model.predict_step(data)

            return predictions

        @tf.function
        def distributed_train_step(dataset_inputs):
            per_replica_losses = self.strategy.run(train_step,
                                                   args=(dataset_inputs, ))

            return self.strategy.reduce(tf.distribute.ReduceOp.SUM,
                                        per_replica_losses,
                                        axis=None)

        @tf.function
        def distributed_test_step(dataset_inputs):
            return self.strategy.run(test_step, args=(dataset_inputs, ))

        best_metric = -np.inf
        epoch = 0
        num_batches = 0
        total_loss = 0
        iter_bar = tqdm(total=self.config["itersize"])

        initial_lr = self.change_lr(epoch,
                                    self.config["bertlr"],
                                    do_warmup=self.config["warmupbert"])
        K.set_value(optimizer_2.lr, K.get_value(initial_lr))
        wandb.log({"bertlr": K.get_value(initial_lr)},
                  step=epoch + start_epoch,
                  commit=False)

        initial_lr = self.change_lr(epoch,
                                    self.config["lr"],
                                    do_warmup=self.config["warmupnonbert"])
        K.set_value(optimizer_1.lr, K.get_value(initial_lr))
        wandb.log({"lr": K.get_value(initial_lr)},
                  step=epoch + start_epoch,
                  commit=False)

        train_records = train_records.shuffle(100000)
        train_dist_dataset = self.strategy.experimental_distribute_dataset(
            train_records)

        # Goes through the dataset ONCE (i.e niters * itersize * batch samples). However, the dataset may already contain multiple instances of the same sample,
        # depending upon what Sampler was used. If you want multiple epochs, achieve it by tweaking the niters and
        # itersize values.
        for x in train_dist_dataset:
            total_loss += distributed_train_step(x)
            train_loss = total_loss / num_batches
            num_batches += 1
            iter_bar.update(1)

            if num_batches % self.config["itersize"] == 0:
                epoch += 1

                # Do warmup and decay
                new_lr = self.change_lr(epoch,
                                        self.config["bertlr"],
                                        do_warmup=self.config["warmupbert"])
                K.set_value(optimizer_2.lr, K.get_value(new_lr))
                wandb.log({f"bertlr": K.get_value(new_lr)},
                          step=epoch + start_epoch,
                          commit=False)

                new_lr = self.change_lr(epoch,
                                        self.config["lr"],
                                        do_warmup=self.config["warmupnonbert"])
                K.set_value(optimizer_1.lr, K.get_value(new_lr))
                wandb.log({f"lr": K.get_value(new_lr)},
                          step=epoch + start_epoch,
                          commit=False)

                iter_bar.close()
                logger.info("train_loss for epoch {} is {}".format(
                    epoch, train_loss))
                wandb.log({f"loss": float(train_loss.numpy())},
                          step=epoch + start_epoch,
                          commit=False)
                total_loss = 0

                if epoch % self.config["validatefreq"] == 0:
                    dev_predictions = []
                    for x in tqdm(dev_dist_dataset, desc="validation"):
                        pred_batch = (distributed_test_step(x).values
                                      if self.strategy.num_replicas_in_sync > 1
                                      else [distributed_test_step(x)])
                        for p in pred_batch:
                            dev_predictions.extend(p)

                    trec_preds = self.get_preds_in_trec_format(
                        dev_predictions, dev_data)
                    metrics = evaluator.eval_runs(
                        trec_preds, dict(qrels),
                        evaluator.DEFAULT_METRICS + ["bpref"], relevance_level)
                    logger.info(
                        "dev metrics: %s", " ".join([
                            f"{metric}={v:0.3f}"
                            for metric, v in sorted(metrics.items())
                        ]))
                    if metrics[metric] > best_metric:
                        logger.info("Writing checkpoint")
                        best_metric = metrics[metric]
                        wrapped_model.save_weights(
                            "{0}/dev.best".format(train_output_path))

                    wandb.log(
                        {
                            f"dev-{k}": v
                            for k, v in metrics.items() if k in [
                                "map", "bpref", "P_20", "ndcg_cut_20",
                                "judged_10", "judged_20", "judged_200"
                            ]
                        },
                        step=epoch + start_epoch,
                        commit=False)

                iter_bar = tqdm(total=self.config["itersize"])

            if num_batches >= self.config["niters"] * self.config["itersize"]:
                break
예제 #45
0
def train():
    KITTI_train_gen = KITTILoader(subset='training')
    dim_avg, dim_cnt = KITTI_train_gen.get_average_dimension()

    new_data = orientation_confidence_flip(KITTI_train_gen.image_data, dim_avg)

    model = nn.network()
    #model.load_weights('model00000296.hdf5')

    early_stop = callbacks.EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10, mode='min', verbose=1)
    checkpoint = callbacks.ModelCheckpoint('model{epoch:08d}.hdf5', monitor='val_loss', verbose=1, save_best_only=False, mode='min', period=1)
    tensorboard = callbacks.TensorBoard(log_dir='logs/', histogram_freq=0, write_graph=True, write_images=False)

    

    all_examples = len(new_data)
    trv_split = int(cfg().split * all_examples) # train val split

    train_gen = data_gen(new_data[: trv_split])
    valid_gen = data_gen(new_data[trv_split : all_examples])

    print("READY FOR TRAINING")

    train_num = int(np.ceil(trv_split / cfg().batch_size))
    valid_num = int(np.ceil((all_examples - trv_split) / cfg().batch_size))

    #gen_flow = gen_flow_for_two_inputs(X_train, X_angle_train, y_train)

    # choose the minimizer to be sgd
    # minimizer = optimizer.SGD(lr=0.0001, momentum = 0.9)
    minimizer = optimizer.Adam(lr=0.0001)

    # multi task learning
    model.compile(optimizer=minimizer,  #minimizer,
                  loss={'dimensions': 'mean_squared_error', 'orientation': orientation_loss, 'confidence': 'categorical_crossentropy'},
                  loss_weights={'dimensions': 1., 'orientation': 10., 'confidence': 5.})

    print("####################################################")
    print(K.get_value(model.optimizer.lr))

    # Tambahan aing
    def scheduler(epoch):
        if epoch%10==0 and epoch!=0:
            lr = K.get_value(model.optimizer.lr)
            K.set_value(model.optimizer.lr, lr*.8)
            print("lr changed to {}".format(lr*.8))
            print("lr = ", K.get_value(model.optimizer.lr))
        return K.get_value(model.optimizer.lr)

    lr_sched = callbacks.LearningRateScheduler(scheduler)


    # d:0.0088 o:0.0042, c:0.0098
    # steps_per_epoch=train_num,
    # validation_steps=valid_num,
    # callbacks=[early_stop, checkpoint, tensorboard],
    model.fit_generator(generator=train_gen,
                        steps_per_epoch=train_num,
                        epochs=500,
                        verbose=1,
                        validation_data=valid_gen,
                        validation_steps=valid_num,
                        shuffle=True,
                        callbacks=[checkpoint, tensorboard, lr_sched],
                        max_queue_size=3)
예제 #46
0
  def test_dynamic_loss_scaling(self, strategy_fn, cloning=True):
    strategy = strategy_fn()
    initial_loss_scale = 2.
    batch_size = 4
    expected_gradient = backend.variable([initial_loss_scale / batch_size],
                                         dtype=dtypes.float16)
    # If this variable is set to True, the model below will have NaN gradients
    have_nan_gradients = backend.variable(False, dtype=dtypes.bool)
    with strategy.scope():
      with policy.policy_scope(policy.Policy('infer_float32_vars')):
        x = layers.Input(shape=(1,), batch_size=batch_size,
                         dtype=dtypes.float16)
        layer = AddLayer(assert_type=dtypes.float16)
        y = layer(x)
        identity_with_nan_grads = (
            mp_test_util.create_identity_with_nan_gradients_fn(
                have_nan_gradients))
        y = core.Lambda(identity_with_nan_grads)(y)
        identity_with_grad_check_fn = (
            mp_test_util.create_identity_with_grad_check_fn(
                expected_dtype=dtypes.float16,
                expected_gradient=expected_gradient))
        y = core.Lambda(identity_with_grad_check_fn)(y)
        y = math_ops.cast(y, dtypes.float32)
        model = models.Model(inputs=x, outputs=y)

        def loss_fn(y_true, y_pred):
          del y_true
          return math_ops.reduce_mean(y_pred)

        opt = gradient_descent.SGD(1.)
        loss_scale = loss_scale_module.DynamicLossScale(
            initial_loss_scale=initial_loss_scale, increment_period=2)
        opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale)
        model.compile(opt, loss=loss_fn, cloning=cloning)

    self.assertEqual(backend.eval(layer.v), 1)
    x = np.ones((batch_size, 1))
    y = np.ones((batch_size, 1))
    dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(batch_size)
    model.fit(dataset)
    # The variables starts with 1 and has a gradient of 1, so will go down by 1
    # each step.
    self.assertEqual(backend.eval(layer.v), 0)

    model.fit(dataset)
    self.assertEqual(backend.eval(layer.v), -1)

    # There have been two steps without NaNs, so the loss scale will double
    backend.set_value(expected_gradient,
                      backend.get_value(expected_gradient * 2))
    model.fit(dataset)
    self.assertEqual(backend.eval(layer.v), -2)

    # Next test with NaN gradients.
    backend.set_value(have_nan_gradients, True)
    model.fit(dataset)
    # Variable should not be updated
    self.assertEqual(backend.eval(layer.v), -2)

    # Test with finite gradients again
    backend.set_value(have_nan_gradients, False)
    # The loss scale will be halved due to the NaNs, so the gradient will also
    # be halved
    backend.set_value(expected_gradient,
                      backend.get_value(expected_gradient / 2))
    model.fit(dataset)
    self.assertEqual(backend.eval(layer.v), -3)
예제 #47
0
    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        logs['learning_rate'] = K.get_value(self.model.optimizer.lr)

        super().on_epoch_end(epoch, logs)
예제 #48
0
 def on_train_batch_end(self, batch, logs={}):
     logs.update(
         {'learning_rate': float(k.get_value(self.model.optimizer.lr))})
     index = tf.keras.backend.eval(self.model.optimizer.iterations)
     self._write_logs(logs, index)
예제 #49
0
 def on_epoch_end(self, epoch, logs=None):
     logs = logs or {}
     logs["lr"] = K.get_value(self.model.optimizer.lr)