def test_different_lr(self): """Test the usage of different learning rate.""" with self.test_session() as sess: bert_w = tf.get_variable("bert/w", shape=[3], initializer=tf.constant_initializer( [0.1, 0.1, 0.1])) non_bert_w = tf.get_variable("w", shape=[3], initializer=tf.constant_initializer( [0.1, 0.1, 0.1])) x = tf.constant([1.0, 2.0, 3.0]) loss = tf.reduce_mean(tf.square(x - bert_w - non_bert_w)) hparams = tf.contrib.training.HParams(learning_rate=0.001, num_train_steps=100, num_warmup_steps=0, lr_bert=0.00001, optimizer="bert_adam", use_horovod=False) train_op, _, _ = optimization.create_optimizer(hparams, loss) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) sess.run(train_op) bert_w_v, non_bert_w_v = sess.run((bert_w, non_bert_w)) print(bert_w_v, non_bert_w_v) # The difference of weight values (gradient) reflects the learning arte difference self.assertAllClose((bert_w_v - 0.1) / (non_bert_w_v - 0.1), [0.01, 0.01, 0.01], rtol=1e-2, atol=1e-2)
def get_bert_optimizer_fn(hparams): """ Returns function that creates an optimizer for bert parameters """ return lambda: create_optimizer(init_lr=hparams.lr_bert, num_train_steps=hparams.num_train_steps, num_warmup_steps=hparams.num_warmup_steps, optimizer=hparams.optimizer, use_lr_schedule=hparams.use_lr_schedule, use_bias_correction_for_adamw=hparams. use_bias_correction_for_adamw)
def testCreateOptimizer(self): """ Tests create_optimizer() """ init_lr = 0.05 num_train_steps = 10 num_warmup_steps = 3 num_bp_steps = 5 x = tf.constant([[0.1, 0.2], [0.3, 0.1]], dtype=tf.float32) y_true = x[:, 0] + x[:, 1] for optimizer_type in ['sgd', 'adam', 'adamw', 'lamb']: optimizer = optimization.create_optimizer( init_lr=init_lr, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, optimizer=optimizer_type, use_lr_schedule=True, use_bias_correction_for_adamw=False) model = tf.keras.Sequential( tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.zeros())) loss_obj = tf.keras.losses.MeanSquaredError() prev_loss = self._minimize(x, y_true, model, loss_obj, optimizer).numpy() prev_lr = optimizer._decayed_lr('float32').numpy() for step in range(1, num_bp_steps): loss = self._minimize(x, y_true, model, loss_obj, optimizer).numpy() # When warm up steps > 0, lr will be 0 when calculating prev_loss and therefore no backprop will be executed # This will cause loss_at_step_0 = prev_loss if step > 1: self.assertLess( loss, prev_loss, f"Loss should be declining at each step. Step:{step}") # Learning rate check lr = optimizer._decayed_lr('float32').numpy() if step < num_warmup_steps: self.assertGreater( lr, prev_lr, f"Learning rate should be increasing during warm up. Step:{step}" ) else: self.assertLess( lr, prev_lr, f"Learning rate should be decreasing after warm up. Step:{step}" ) prev_loss = loss prev_lr = lr
def _train_linear_model(self, x, y_true, init_lr, num_train_steps, num_warmup_steps, process_grads_and_vars_fn): """Helper function to train a linear model""" optimizer = optimization.create_optimizer( init_lr=init_lr, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, optimizer='sgd', use_lr_schedule=True, use_bias_correction_for_adamw=False) model = tf.keras.Sequential( tf.keras.layers.Dense( 1, use_bias=False, kernel_initializer=tf.keras.initializers.zeros())) loss_obj = tf.keras.losses.MeanSquaredError() for _ in range(2): loss, tape = self._get_loss(x, y_true, model, loss_obj) grads_and_vars = process_grads_and_vars_fn( tape, optimizer, loss, model.trainable_variables, []) optimizer.apply_gradients(grads_and_vars) return model
def model_fn(features, labels, mode, params): """ Defines the model_fn to feed in to estimator :param features: dict containing the features in data :param labels: dict containing labels in data :param mode: running mode, in TRAIN/EVAL/PREDICT :param params: hparams used :return: tf.estimator.EstimatorSpec """ query_field = features.get('query', None) uid = features.get('uid', None) weight = features.get('weight', None) wide_ftrs = features.get('wide_ftrs', None) wide_ftrs_sp_idx = features.get('wide_ftrs_sp_idx', None) wide_ftrs_sp_val = features.get('wide_ftrs_sp_val', None) doc_fields = [ features[ftr_name] for ftr_name in features if ftr_name.startswith('doc_') ] if len(doc_fields) == 0: doc_fields = None usr_fields = [ features[ftr_name] for ftr_name in features if ftr_name.startswith('usr_') ] if len(usr_fields) == 0: usr_fields = None doc_id_fields = [ features[ftr_name] for ftr_name in features if ftr_name.startswith('docId_') ] if len(doc_id_fields) == 0: doc_id_fields = None usr_id_fields = [ features[ftr_name] for ftr_name in features if ftr_name.startswith('usrId_') ] if len(usr_id_fields) == 0: usr_id_fields = None label_field = labels[ 'label'] if mode != tf.estimator.ModeKeys.PREDICT else None labels_passthrough = features['label'] group_size_field = features[ 'group_size'] if mode != tf.estimator.ModeKeys.PREDICT else None # build graph model = DeepMatch(query=query_field, wide_ftrs=wide_ftrs, doc_fields=doc_fields, usr_fields=usr_fields, doc_id_fields=doc_id_fields, usr_id_fields=usr_id_fields, hparams=params, mode=mode, wide_ftrs_sp_idx=wide_ftrs_sp_idx, wide_ftrs_sp_val=wide_ftrs_sp_val) if mode == tf.estimator.ModeKeys.TRAIN: loss = compute_loss(params, model.scores, label_field, group_size_field, weight) train_op, _, _ = optimization.create_optimizer(params, loss) global_step = tf.train.get_global_step() train_tensors_log = {'loss': loss, 'global_step': global_step} logging_hook = tf.train.LoggingTensorHook(train_tensors_log, every_n_iter=10) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: loss = compute_loss(params, model.scores, label_field, group_size_field, weight) eval_metric_ops = {} for metric_name in params.all_metrics: metric_op_name = 'metric/{}'.format(metric_name) topk = int(metric_name.split('@') [1]) if '@' in metric_name else 10 # Default topk if metric_name.startswith('ndcg'): eval_metric_ops[metric_op_name] = metrics.compute_ndcg_tfr( model.scores, label_field, features, topk) elif metric_name.startswith('mrr'): eval_metric_ops[metric_op_name] = metrics.compute_mrr_tfr( model.scores, label_field, features) elif metric_name.startswith('precision'): eval_metric_ops[ metric_op_name] = metrics.compute_precision_tfr( model.scores, label_field, features, topk) elif metric_name.startswith('traditional_ndcg'): eval_metric_ops[metric_op_name] = metrics.compute_ndcg( model.scores, label_field, group_size_field, topk) elif metric_name.startswith('li_mrr'): eval_metric_ops[metric_op_name] = metrics.compute_mrr( model.scores, labels['label'], features['group_size'], topk) elif metric_name == 'auc': eval_metric_ops[metric_op_name] = metrics.compute_auc( model.scores, label_field) elif metric_name == 'accuracy': eval_metric_ops[metric_op_name] = metrics.compute_accuracy( model.scores, label_field) elif metric_name == 'confusion_matrix': eval_metric_ops[ metric_op_name] = metrics.compute_confusion_matrix( model.scores, label_field, params.num_classes) else: raise ValueError("Unsupported metrics: %s" % (metric_name)) return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=eval_metric_ops) elif mode == tf.estimator.ModeKeys.PREDICT: # Prediction field for scoring models predictions = { 'uid': uid, 'scores': model.original_scores, 'weight': weight, 'label': labels_passthrough } # multiclass classification: export the probabilities across classes by applying softmax if params.num_classes > 1: predictions['multiclass_probabilities'] = tf.nn.softmax( model.scores) export_outputs = { 'prediction': tf.estimator.export.PredictOutput(predictions) } # Provide an estimator spec for `ModeKeys.PREDICT` mode. return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs) else: raise ValueError("Only support mode as TRAIN/EVAL/PREDICT")
def model_fn(features, labels, mode, params): """ Defines the model_fn to feed in to estimator :param features: dict containing the features in data :param labels: dict containing labels in data :param mode: running mode, in TRAIN/EVAL/PREDICT :param params: hparams used :return: tf.estimator.EstimatorSpec """ query_field = features.get('query', None) uid = features.get('uid', None) weight = features.get('weight', None) wide_ftrs = features.get('wide_ftrs', None) wide_ftrs_sp_idx = features.get('wide_ftrs_sp_idx', None) wide_ftrs_sp_val = features.get('wide_ftrs_sp_val', None) doc_fields = [features[ftr_name] for ftr_name in features if ftr_name.startswith('doc_')] if len(doc_fields) == 0: doc_fields = None usr_fields = [features[ftr_name] for ftr_name in features if ftr_name.startswith('usr_')] if len(usr_fields) == 0: usr_fields = None doc_id_fields = [features[ftr_name] for ftr_name in features if ftr_name.startswith('docId_')] if len(doc_id_fields) == 0: doc_id_fields = None usr_id_fields = [features[ftr_name] for ftr_name in features if ftr_name.startswith('usrId_')] if len(usr_id_fields) == 0: usr_id_fields = None label_field = labels['label'] if mode != tf.estimator.ModeKeys.PREDICT else None labels_passthrough = features['label'] group_size_field = features['group_size'] if mode != tf.estimator.ModeKeys.PREDICT else None # For multitask training task_id_field = features.get('task_id', None) # shape=[batch_size,] # Update the weight with each task's weight such that weight per document = weight * task_weight if params.task_ids is not None: task_ids = params.task_ids # e.g. [0, 1, 2] task_weights = params.task_weights # e.g. [0.1, 0.3, 0.6] # Expand task_id_field with shape [batch_size, num_tasks] expanded_task_id_field = tf.transpose(tf.broadcast_to(task_id_field, [len(task_ids), tf.shape(task_id_field)[0]])) task_mask = tf.cast(tf.equal(expanded_task_id_field, task_ids), dtype=tf.float32) weight *= tf.reduce_sum(task_mask * task_weights, 1) # shape=[batch_size,] # build graph model = DeepMatch(query=query_field, wide_ftrs=wide_ftrs, doc_fields=doc_fields, usr_fields=usr_fields, doc_id_fields=doc_id_fields, usr_id_fields=usr_id_fields, hparams=params, mode=mode, wide_ftrs_sp_idx=wide_ftrs_sp_idx, wide_ftrs_sp_val=wide_ftrs_sp_val, task_id_field=task_id_field) if mode == tf.estimator.ModeKeys.TRAIN: loss = compute_loss(params, model.scores, label_field, group_size_field, weight) train_op, _, _ = optimization.create_optimizer(params, loss) global_step = tf.train.get_global_step() train_tensors_log = {'loss': loss, 'global_step': global_step} logging_hook = tf.train.LoggingTensorHook(train_tensors_log, every_n_iter=10) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: loss = compute_loss(params, model.scores, label_field, group_size_field, weight) eval_metric_ops = {} for metric_name in params.all_metrics: metric_op_name = 'metric/{}'.format(metric_name) topk = int(metric_name.split('@')[1]) if '@' in metric_name else 10 # Default topk if metric_name.startswith('ndcg'): metric = metrics.compute_ndcg_tfr(model.scores, label_field, features, topk) elif metric_name.startswith('mrr'): metric = metrics.compute_mrr_tfr(model.scores, label_field, features) elif metric_name.startswith('precision'): metric = metrics.compute_precision_tfr(model.scores, label_field, features, topk) elif metric_name.startswith('traditional_ndcg'): metric = metrics.compute_ndcg(model.scores, label_field, group_size_field, topk) elif metric_name.startswith('li_mrr'): metric = metrics.compute_mrr(model.scores, labels['label'], features['group_size'], topk) elif metric_name == 'auc': metric = metrics.compute_auc(model.scores, label_field) elif metric_name == 'accuracy': metric = metrics.compute_accuracy(model.scores, label_field) elif metric_name == 'confusion_matrix': metric = metrics.compute_confusion_matrix(model.scores, label_field, params.num_classes) else: raise ValueError(f"Unsupported metrics: {metric_name}") eval_metric_ops[metric_op_name] = metric return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=eval_metric_ops) elif mode == tf.estimator.ModeKeys.PREDICT: # Prediction field for scoring models predictions = { 'uid': uid, 'scores': model.original_scores, 'weight': weight, 'label': labels_passthrough } # multiclass classification: export the probabilities across classes by applying softmax if params.num_classes > 1: predictions['multiclass_probabilities'] = tf.nn.softmax(model.scores) export_outputs = { 'prediction': tf.estimator.export.PredictOutput(predictions) } # Provide an estimator spec for `ModeKeys.PREDICT` mode. return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs) else: raise ValueError("Only support mode as TRAIN/EVAL/PREDICT")