def _get_train_ops(self, features, targets): """Method that builds model graph and returns trainer ops. Expected to be overriden by sub-classes that require custom support. This implementation uses `model_fn` passed as parameter to constructor to build model. Args: features: `Tensor` or `dict` of `Tensor` objects. targets: `Tensor` or `dict` of `Tensor` objects. Returns: Tuple of train `Operation` and loss `Tensor`. """ _, loss = self._model_fn(features, targets, ModeKeys.TRAIN) # TODO(ipolosukhin): Move this to TensorFlowEstimator when # moving out training. if isinstance(self.learning_rate, types.FunctionType): learning_rate = self.learning_rate( contrib_framework.get_global_step()) else: learning_rate = self.learning_rate if isinstance(self.optimizer, types.FunctionType): optimizer = self.optimizer(learning_rate) else: optimizer = self.optimizer train_op = layers.optimize_loss(loss, contrib_framework.get_global_step(), learning_rate=learning_rate, optimizer=optimizer, clip_gradients=self.clip_gradients) # Add update ops. train_op = control_flow_ops.group(train_op, *ops.get_collection('update_ops')) return train_op, loss
def _get_train_ops(self, features, targets): """Method that builds model graph and returns trainer ops. Expected to be overriden by sub-classes that require custom support. This implementation uses `model_fn` passed as parameter to constructor to build model. Args: features: `Tensor` or `dict` of `Tensor` objects. targets: `Tensor` or `dict` of `Tensor` objects. Returns: Tuple of train `Operation` and loss `Tensor`. """ _, loss = self._model_fn(features, targets, ModeKeys.TRAIN) # TODO(ipolosukhin): Move this to TensorFlowEstimator when # moving out training. if isinstance(self.learning_rate, types.FunctionType): learning_rate = self.learning_rate(contrib_framework.get_global_step()) else: learning_rate = self.learning_rate if isinstance(self.optimizer, types.FunctionType): optimizer = self.optimizer(learning_rate) else: optimizer = self.optimizer train_op = layers.optimize_loss( loss, contrib_framework.get_global_step(), learning_rate=learning_rate, optimizer=optimizer, clip_gradients=self.clip_gradients) # Add update ops. train_op = control_flow_ops.group( train_op, *ops.get_collection('update_ops')) return train_op, loss
def before_run(self, run_context): loss = (self.loss_op if self.loss_op is not None else run_context.session.graph.get_operation_by_name( LOSS_NAME).outputs[0]) return session_run_hook.SessionRunArgs( {'global_step': contrib_framework.get_global_step(), 'current_loss': loss})
def lenet5_model(X, y, mode, image_size=(-1, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE, 1), pool_size=(1, 2, 2, 1)): X = tf.pad(tf.reshape(X, image_size), [[0, 0], [2, 2], [2, 2], [0, 0]], mode="CONSTANT") print("x ", X.shape) print("y ", y.shape) layer1 = lenet5_layer(X, 6, [5, 5], pool_size) print("layer1 ", layer1.shape) layer2 = lenet5_layer(layer1, 16, [5, 5], pool_size) print("layer2 ", layer2.shape) layer3 = layers.conv2d(layer2, num_outputs=120, kernel_size=[5, 5], activation_fn=tf.nn.softmax, padding='VALID') print("layer3 ", layer3.shape) result = dense_layer(layer3, [84, 10], keep_prob=0.5) result = tf.reshape(result, [-1, 10]) print("result ", result.shape) prediction, loss = learn.models.logistic_regression_zero_init(result, y) train_op = layers.optimize_loss(loss, framework.get_global_step(), optimizer='Adagrad', learning_rate=0.1) return prediction, loss, train_op
def conv_model(X, Y_, mode): XX = tf.reshape(X, [-1, 28, 28, 1]) biasInit = tf.constant_initializer(0.1, dtype=tf.float32) Y1 = layers.conv2d(XX, num_outputs=6, kernel_size=[6, 6], biases_initializer=biasInit) Y2 = layers.conv2d(Y1, num_outputs=12, kernel_size=[5, 5], stride=2, biases_initializer=biasInit) Y3 = layers.conv2d(Y2, num_outputs=24, kernel_size=[4, 4], stride=2, biases_initializer=biasInit) Y4 = layers.flatten(Y3) Y5 = layers.relu(Y4, 200, biases_initializer=biasInit) Ylogits = layers.linear(Y5, 10) predict = tf.nn.softmax(Ylogits) classes = tf.cast(tf.argmax(predict, 1), tf.uint8) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(Ylogits, tf.one_hot(Y_, 10))) * 100 train_op = layers.optimize_loss(loss, framework.get_global_step(), 0.001, "Adam") return {"predictions": predict, "classes": classes}, loss, train_op
def model_fn(features, labels, mode): """Builds generic graph for training or eval.""" # TODO logits = A tensor representing the pre-softmax likelyhood of # each digit. tensors = {} # Add to the Graph the Ops for loss calculation. if mode == ModeKeys.INFER: # TODO tensors['digit'] = Tensor representing the predicted digit for 'features' # Since 'labels' is None we can't calculate a loss loss_op = None else: # TODO loss_op = Operation to calculate loss tensors['loss'] = loss_op tf.scalar_summary('loss', loss_op) # Add to the Graph the Ops for accuracy calculation. if mode == ModeKeys.EVAL: # TODO accuracy_op = Calculate the accuracy of the inferred digits given 'labels' tensors['accuracy'] = accuracy_op tf.scalar_summary('training/hptuning/metric', accuracy_op) # Add to the Graph the Ops that calculate and apply gradients. if mode == ModeKeys.TRAIN: global_step = framework.get_global_step() # TODO train_op = the gradient descent optimizer with the given learning rate # that minimizes the loss else: train_op = None return tensors, loss_op, train_op
def softmax_model(X, Y_, mode): Ylogits = layers.linear(X, 10) predict = tf.nn.softmax(Ylogits) classes = tf.cast(tf.argmax(predict, 1), tf.uint8) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(Ylogits, tf.one_hot(Y_, 10)))*100 train_op = layers.optimize_loss(loss, framework.get_global_step(), 0.003, "Adam") return {"predictions":predict, "classes": classes}, loss, train_op
def _get_train_ops(self, features, targets): """Method that builds model graph and returns trainer ops. Args: features: `Tensor` or `dict` of `Tensor` objects. targets: `Tensor` or `dict` of `Tensor` objects. Returns: Tuple of train `Operation` and loss `Tensor`. """ features, _, spec = data_ops.ParseDataTensorOrDict(features) labels = data_ops.ParseLabelTensorOrDict(targets) _assert_float32(features) _assert_float32(labels) graph_builder = self.graph_builder_class( self.params, device_assigner=self.device_assigner, **self.construction_args) epoch = None if self.data_feeder: epoch = self.data_feeder.make_epoch_variable() train = control_flow_ops.group( graph_builder.training_graph( features, labels, data_spec=spec, epoch=epoch, **self.training_args), state_ops.assign_add(contrib_framework.get_global_step(), 1)) self.training_loss = graph_builder.training_loss(features, targets) return train, self.training_loss
def model_fn(features, labels, mode, params): scores = predict_scores(features) if mode == ModeKeys.INFER: return EstimatorSpec(mode, predictions=scores) positive_scores = lookup_positives(scores, labels['click_position']) logits = create_diffs(positive_scores, scores) lbls = create_label(labels['click_position']) ele_loss = elementwise_loss(lbls, logits, labels['normal_mask']) * lbls loss = reduce_sum(ele_loss) true_lbl = true_label(features, labels) if mode == ModeKeys.EVAL: return EstimatorSpec(mode, loss=loss, eval_metric_ops={ 'acc': mean( accuracy( argmax(noise_label(labels), axis=1), argmax(to_one_hot(scores), axis=1))) }) else: optimizer = AdamOptimizer(learning_rate=params['learning_rate']) train_op = optimizer.minimize(loss, global_step=get_global_step()) return EstimatorSpec(mode, loss=loss, train_op=train_op)
def _get_train_ops(self, features, targets): """Method that builds model graph and returns trainer ops. Args: features: `Tensor` or `dict` of `Tensor` objects. targets: `Tensor` or `dict` of `Tensor` objects. Returns: Tuple of train `Operation` and loss `Tensor`. """ features, spec = data_ops.ParseDataTensorOrDict(features) labels = data_ops.ParseLabelTensorOrDict(targets) graph_builder = self.graph_builder_class( self.params, device_assigner=self.device_assigner, **self.construction_args) epoch = None if self.data_feeder: epoch = self.data_feeder.make_epoch_variable() train = control_flow_ops.group( graph_builder.training_graph( features, labels, data_spec=spec, epoch=epoch, **self.training_args), state_ops.assign_add(contrib_framework.get_global_step(), 1)) self.training_loss = graph_builder.training_loss() return train, self.training_loss
def auto_encoder(x_1, x_2, x_mask_1, x_mask_2, y, dropout, opt): x_1_emb, W_emb = embedding(x_1, opt) # batch L emb x_2_emb = tf.nn.embedding_lookup(W_emb, x_2) x_1_emb = tf.nn.dropout(x_1_emb, dropout) # batch L emb x_2_emb = tf.nn.dropout(x_2_emb, dropout) # batch L emb biasInit = tf.constant_initializer(0.001, dtype=tf.float32) x_1_emb = layers.fully_connected(tf.squeeze(x_1_emb), num_outputs=opt.embed_size, biases_initializer=biasInit, activation_fn=tf.nn.relu, scope='trans', reuse=None) # batch L emb x_2_emb = layers.fully_connected(tf.squeeze(x_2_emb), num_outputs=opt.embed_size, biases_initializer=biasInit, activation_fn=tf.nn.relu, scope='trans', reuse=True) x_1_emb = tf.expand_dims(x_1_emb, 3) # batch L emb 1 x_2_emb = tf.expand_dims(x_2_emb, 3) if opt.encoder == 'aver': H_enc_1 = aver_emb_encoder(x_1_emb, x_mask_1) H_enc_2 = aver_emb_encoder(x_2_emb, x_mask_2) elif opt.encoder == 'max': H_enc_1 = max_emb_encoder(x_1_emb, x_mask_1, opt) H_enc_2 = max_emb_encoder(x_2_emb, x_mask_2, opt) elif opt.encoder == 'concat': H_enc_1 = concat_emb_encoder(x_1_emb, x_mask_1, opt) H_enc_2 = concat_emb_encoder(x_2_emb, x_mask_2, opt) # discriminative loss term if opt.combine_enc == 'mult': H_enc = tf.multiply(H_enc_1, H_enc_2) # batch * n_gan if opt.combine_enc == 'concat': H_enc = tf.concat([H_enc_1, H_enc_2], 1) if opt.combine_enc == 'sub': H_enc = tf.subtract(H_enc_1, H_enc_2) if opt.combine_enc == 'mix': H_1 = tf.multiply(H_enc_1, H_enc_2) H_2 = tf.concat([H_enc_1, H_enc_2], 1) H_3 = tf.subtract(H_enc_1, H_enc_2) H_enc = tf.concat([H_1, H_2, H_3], 1) # calculate the accuracy logits = discriminator_2layer(H_enc, opt, dropout, prefix='classify_', num_outputs=opt.category, is_reuse=None) prob = tf.nn.softmax(logits) correct_prediction = tf.equal(tf.argmax(prob, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)) train_op = layers.optimize_loss( loss, framework.get_global_step(), optimizer='Adam', # variables=d_vars, learning_rate=opt.lr) return accuracy, loss, train_op, W_emb
def _model_fn(features, labels, mode): """Function that returns predictions, training loss, and training op.""" weights = None if weights_name and weights_name in features: weights = features.pop(weights_name) graph_builder = graph_builder_class(params, device_assigner=device_assigner) inference = {} if (mode == model_fn_lib.ModeKeys.EVAL or mode == model_fn_lib.ModeKeys.INFER): inference[eval_metrics.INFERENCE_PROB_NAME] = ( graph_builder.inference_graph(features)) if not params.regression: inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax( inference[eval_metrics.INFERENCE_PROB_NAME], 1) # labels might be None if we're doing prediction (which brings up the # question of why we force everything to adhere to a single model_fn). loss_deps = [] training_graph = None if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN: training_graph = control_flow_ops.group( graph_builder.training_graph(features, labels, input_weights=weights, num_trainers=num_trainers, trainer_id=trainer_id), state_ops.assign_add(contrib_framework.get_global_step(), 1)) loss_deps.append(training_graph) training_loss = None if (mode == model_fn_lib.ModeKeys.EVAL or mode == model_fn_lib.ModeKeys.TRAIN): with ops.control_dependencies(loss_deps): training_loss = graph_builder.training_loss(features, labels, name=LOSS_NAME) if report_feature_importances and mode == model_fn_lib.ModeKeys.EVAL: training_loss = logging_ops.Print( training_loss, [graph_builder.feature_importances()], summarize=1000) # Put weights back in if weights is not None: features[weights_name] = weights training_hooks = [] if early_stopping_rounds: training_hooks.append(TensorForestLossHook(early_stopping_rounds)) return model_fn_lib.ModelFnOps(mode=mode, predictions=inference, loss=training_loss, train_op=training_graph, training_hooks=training_hooks)
def conv_model_train_op(loss, mode): return layers.optimize_loss( loss, framework.get_global_step(), learning_rate=0.003, optimizer="Adam", # to remove learning rate decay, comment the next line learning_rate_decay_fn=lambda lr, step: 0.0001 + tf.train. exponential_decay(lr, step, -2000, math.e )) if mode == learn.ModeKeys.TRAIN else None
def my_model(features, target): target = tf.one_hot(target, 3, 1, 0) logits, loss = learn.models.logistic_regression(features, target) train_op = layers.optimize_loss(loss, framework.get_global_step(), optimizer='Adagrad', learning_rate=0.01) return tf.argmax(logits, 1), loss, train_op
def _model_fn(features, targets, mode): ops.get_default_graph().add_to_collection('IS_TRAINING', mode == 'train') if self.class_weight is not None: constant_op.constant(self.class_weight, name='class_weight') predictions, loss = model_fn(features, targets) if isinstance(self.learning_rate, types.FunctionType): learning_rate = self.learning_rate(contrib_framework.get_global_step()) else: learning_rate = self.learning_rate if isinstance(self.optimizer, types.FunctionType): optimizer = self.optimizer(learning_rate) else: optimizer = self.optimizer train_op = layers.optimize_loss( loss, contrib_framework.get_global_step(), learning_rate=learning_rate, optimizer=optimizer, clip_gradients=self.clip_gradients) return predictions, loss, train_op
def _build_model(self, data, target): ids = tensorflow.split(1, self.n_ids, data) node_vectors = [ learn.ops.categorical_variable(ids[i], self.vocabulary_sizes[i], self.layer_size, str(i)) for i in range(self.n_ids) ] activation_in = tensorflow.squeeze(tensorflow.concat(2, node_vectors), [1]) activation_out = layers.stack(activation_in, layers.fully_connected, self.hidden_units_formation) prediction, loss = learn.models.linear_regression(activation_out, target) train_op = layers.optimize_loss(loss, framework.get_global_step(), self.learning_rate, "SGD") return prediction, loss, train_op
def _loss_to_train_op(self, loss): """Map `loss` to a training op.""" with ops.name_scope('loss_to_train_op'): trainable_variables = ops.get_default_graph().get_collection( ops.GraphKeys.TRAINABLE_VARIABLES) global_step = contrib_framework.get_global_step() gradients = self._optimizer.compute_gradients( loss=loss, var_list=trainable_variables) processed_gradients = self._process_gradients(gradients) return self._optimizer.apply_gradients(processed_gradients, global_step=global_step)
def _loss_to_train_op(self, loss): """Map `loss` to a training op.""" with ops.name_scope('loss_to_train_op'): trainable_variables = ops.get_default_graph().get_collection( ops.GraphKeys.TRAINABLE_VARIABLES) global_step = contrib_framework.get_global_step() gradients = self._optimizer.compute_gradients( loss=loss, var_list=trainable_variables) processed_gradients = self._process_gradients(gradients) return self._optimizer.apply_gradients( processed_gradients, global_step=global_step)
def _model_fn(features, labels, mode): """Function that returns predictions, training loss, and training op.""" weights = None if weights_name and weights_name in features: weights = features.pop(weights_name) graph_builder = graph_builder_class(params, device_assigner=device_assigner) inference = {} if (mode == model_fn_lib.ModeKeys.EVAL or mode == model_fn_lib.ModeKeys.INFER): inference[eval_metrics.INFERENCE_PROB_NAME] = ( graph_builder.inference_graph(features)) if not params.regression: inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax( inference[eval_metrics.INFERENCE_PROB_NAME], 1) # labels might be None if we're doing prediction (which brings up the # question of why we force everything to adhere to a single model_fn). loss_deps = [] training_graph = None if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN: training_graph = control_flow_ops.group( graph_builder.training_graph( features, labels, input_weights=weights, num_trainers=num_trainers, trainer_id=trainer_id), state_ops.assign_add(contrib_framework.get_global_step(), 1)) loss_deps.append(training_graph) training_loss = None if (mode == model_fn_lib.ModeKeys.EVAL or mode == model_fn_lib.ModeKeys.TRAIN): with ops.control_dependencies(loss_deps): training_loss = graph_builder.training_loss( features, labels, name=LOSS_NAME) if report_feature_importances and mode == model_fn_lib.ModeKeys.EVAL: training_loss = logging_ops.Print(training_loss, [graph_builder.feature_importances()], summarize=1000) # Put weights back in if weights is not None: features[weights_name] = weights training_hooks = [] if early_stopping_rounds: training_hooks.append(TensorForestLossHook(early_stopping_rounds)) return model_fn_lib.ModelFnOps( mode=mode, predictions=inference, loss=training_loss, train_op=training_graph, training_hooks=training_hooks)
def _model_fn(features, targets, mode): """Model function.""" ops.get_default_graph().add_to_collection('IS_TRAINING', mode == 'train') if self.class_weight is not None: constant_op.constant(self.class_weight, name='class_weight') predictions, loss = model_fn(features, targets) if isinstance(self.learning_rate, types.FunctionType): learning_rate = self.learning_rate(contrib_framework.get_global_step()) else: learning_rate = self.learning_rate if isinstance(self.optimizer, types.FunctionType): optimizer = self.optimizer(learning_rate) else: optimizer = self.optimizer train_op = layers.optimize_loss( loss, contrib_framework.get_global_step(), learning_rate=learning_rate, optimizer=optimizer, clip_gradients=self.clip_gradients) return predictions, loss, train_op
def before_run(self, run_context): return session_run_hook.SessionRunArgs({ 'global_step': contrib_framework.get_global_step(), 'current_loss': run_context.session.graph.get_operation_by_name( 'rf_training_loss').outputs[0], 'confusion_matrix_print': run_context.session.graph.get_operation_by_name( 'confusion_matrix_print').outputs[0], 'regression_ornot': run_context.session.graph.get_operation_by_name( 'regression_ornot').outputs[0], })
def conv_model(X, Y_, mode): XX = tf.reshape(X, [-1, 28, 28, 1]) biasInit = tf.constant_initializer(0.1, dtype=tf.float32) Y1 = layers.conv2d(XX, num_outputs=6, kernel_size=[6, 6], biases_initializer=biasInit) Y2 = layers.conv2d(Y1, num_outputs=12, kernel_size=[5, 5], stride=2, biases_initializer=biasInit) Y3 = layers.conv2d(Y2, num_outputs=24, kernel_size=[4, 4], stride=2, biases_initializer=biasInit) Y4 = layers.flatten(Y3) Y5 = layers.relu(Y4, 200, biases_initializer=biasInit) Ylogits = layers.linear(Y5, 10) predict = tf.nn.softmax(Ylogits) classes = tf.cast(tf.argmax(predict, 1), tf.uint8) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(Ylogits, tf.one_hot(Y_, 10)))*100 train_op = layers.optimize_loss(loss, framework.get_global_step(), 0.001, "Adam") return {"predictions":predict, "classes": classes}, loss, train_op
def _model_fn(features, labels, mode): """Function that returns predictions, training loss, and training op.""" weights = None keys = None if weights_name and weights_name in features: weights = features.pop(weights_name) if keys_name and keys_name in features: keys = features.pop(keys_name) graph_builder = graph_builder_class(params, device_assigner=device_assigner) inference = {} if (mode == model_fn_lib.ModeKeys.EVAL or mode == model_fn_lib.ModeKeys.INFER): inference[eval_metrics.INFERENCE_PROB_NAME] = ( graph_builder.inference_graph(features)) if not params.regression: inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax( inference[eval_metrics.INFERENCE_PROB_NAME], 1) if keys: inference[KEYS_NAME] = keys # labels might be None if we're doing prediction (which brings up the # question of why we force everything to adhere to a single model_fn). loss_deps = [] training_graph = None if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN: training_graph = control_flow_ops.group( graph_builder.training_graph(features, labels, input_weights=weights, num_trainers=num_trainers, trainer_id=trainer_id), state_ops.assign_add(contrib_framework.get_global_step(), 1)) loss_deps.append(training_graph) training_loss = None if (mode == model_fn_lib.ModeKeys.EVAL or mode == model_fn_lib.ModeKeys.TRAIN): with ops.control_dependencies(loss_deps): training_loss = graph_builder.training_loss(features, labels, name=LOSS_NAME) # Put weights back in if weights is not None: features[weights_name] = weights return (inference, training_loss, training_graph)
def _build_model(self, data, target): ids = tensorflow.split(1, self.n_ids, data) node_vectors = [ learn.ops.categorical_variable(ids[i], self.vocabulary_sizes[i], self.layer_size, str(i)) for i in range(self.n_ids) ] activation_in = tensorflow.squeeze(tensorflow.concat(2, node_vectors), [1]) activation_out = layers.stack(activation_in, layers.fully_connected, self.hidden_units_formation) prediction, loss = learn.models.linear_regression( activation_out, target) train_op = layers.optimize_loss(loss, framework.get_global_step(), self.learning_rate, 'SGD') return prediction, loss, train_op
def _model_fn(features, labels, mode): """Function that returns predictions, training loss, and training op.""" weights = None keys = None if weights_name and weights_name in features: weights = features.pop(weights_name) if keys_name and keys_name in features: keys = features.pop(keys_name) graph_builder = graph_builder_class(params, device_assigner=device_assigner) inference = {} if (mode == model_fn_lib.ModeKeys.EVAL or mode == model_fn_lib.ModeKeys.INFER): inference[eval_metrics.INFERENCE_PROB_NAME] = ( graph_builder.inference_graph(features)) if not params.regression: inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax( inference[eval_metrics.INFERENCE_PROB_NAME], 1) if keys: inference[KEYS_NAME] = keys # labels might be None if we're doing prediction (which brings up the # question of why we force everything to adhere to a single model_fn). loss_deps = [] training_graph = None if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN: training_graph = control_flow_ops.group( graph_builder.training_graph( features, labels, input_weights=weights, num_trainers=num_trainers, trainer_id=trainer_id), state_ops.assign_add(contrib_framework.get_global_step(), 1)) loss_deps.append(training_graph) training_loss = None if (mode == model_fn_lib.ModeKeys.EVAL or mode == model_fn_lib.ModeKeys.TRAIN): with ops.control_dependencies(loss_deps): training_loss = graph_builder.training_loss( features, labels, name=LOSS_NAME) # Put weights back in if weights is not None: features[weights_name] = weights return (inference, training_loss, training_graph)
def _model_fn(features, labels): """Function that returns predictions, training loss, and training op.""" weights = None keys = None if weights_name and weights_name in features: weights = features.pop(weights_name) if keys_name and keys_name in features: keys = features.pop(keys_name) processed_features, spec = data_ops.ParseDataTensorOrDict(features) _assert_float32(processed_features) if labels is not None: labels = data_ops.ParseLabelTensorOrDict(labels) _assert_float32(labels) graph_builder = graph_builder_class(params, device_assigner=device_assigner) inference = { eval_metrics.INFERENCE_PROB_NAME: graph_builder.inference_graph(processed_features, data_spec=spec) } if not params.regression: inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax( inference[eval_metrics.INFERENCE_PROB_NAME], 1) if keys: inference[KEYS_NAME] = keys # labels might be None if we're doing prediction (which brings up the # question of why we force everything to adhere to a single model_fn). training_loss = None training_graph = None if labels is not None: training_loss = graph_builder.training_loss(processed_features, labels, data_spec=spec, name=LOSS_NAME) training_graph = control_flow_ops.group( graph_builder.training_graph(processed_features, labels, data_spec=spec, input_weights=weights), state_ops.assign_add(contrib_framework.get_global_step(), 1)) # Put weights back in if weights is not None: features[weights_name] = weights return (inference, training_loss, training_graph)
def emb_classifier(x, x_mask, y, dropout, opt): # print x.get_shape() # batch L x_emb, W_emb = embedding(x, opt) # batch L emb x_emb = tf.expand_dims(x_emb, 3) # batch L emb 1 x_emb = tf.nn.dropout(x_emb, dropout) # batch L emb 1 x_mask = tf.expand_dims(x_mask, axis=-1) x_mask = tf.expand_dims(x_mask, axis=-1) # batch L 1 1 x_sum = tf.multiply(x_emb, x_mask) # batch L emb 1 H_enc = tf.reduce_sum(x_sum, axis=1, keep_dims=True) # batch 1 emb 1 H_enc = tf.squeeze(H_enc) # batch emb x_mask_sum = tf.reduce_sum(x_mask, axis=1, keep_dims=True) # batch 1 1 1 x_mask_sum = tf.squeeze(x_mask_sum, [2, 3]) # batch 1 H_enc_1 = H_enc / x_mask_sum # batch emb H_enc_2 = tf.nn.max_pool(x_emb, [1, opt.maxlen, 1, 1], [1, 1, 1, 1], 'VALID') H_enc_2 = tf.squeeze(H_enc_2) H_enc = tf.concat([H_enc_1, H_enc_2], 1) H_enc = tf.squeeze(H_enc) logits = discriminator_2layer(H_enc, opt, dropout, prefix='classify_', num_outputs=10, is_reuse=None) # batch * 10 prob = tf.nn.softmax(logits) correct_prediction = tf.equal(tf.argmax(prob, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)) train_op = layers.optimize_loss(loss, framework.get_global_step(), optimizer='Adam', learning_rate=opt.lr) return accuracy, loss, train_op, W_emb
def cons_disc(x_1, x_2, y, opt, l_temp = 1): # print x.get_shape() # batch L res = {} logits, H_1, H_2, H_1_1, H_2_1 = pair_discriminator(x_1, x_2, opt, l_temp) corr1 = correlation_cost(H_1_1) corr2 = correlation_cost(H_2_1) res['logits'] = logits res['y_pred'] = (logits > 0) # res['H_1'] = H_1 # res['H_2'] = H_2 res['H_1'] = H_1_1 res['H_2'] = H_2_1 res['corr'] = tf.sqrt((corr1 + corr2)/2) if opt.model == 'D': y_pred = logits loss = tf.reduce_mean(y * tf.log(y_pred)) else: loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = y, logits = logits)) # encourage binary and disentangle loss = loss \ + opt.reg* tf.reduce_mean( tf.square(tf.ones_like(H_1_1)-H_1_1) * tf.square(H_1_1) ) \ + opt.reg* tf.reduce_mean( tf.square(tf.ones_like(H_2_1)-H_2_1) * tf.square(H_2_1) ) if opt.reg_corr != 0: loss += opt.reg_corr* (corr1 + corr2) tf.summary.scalar('loss', loss) train_op = layers.optimize_loss( loss, framework.get_global_step(), optimizer='Adam', learning_rate=opt.lr) return res, loss, train_op
def _model_fn(features, labels): """Function that returns predictions, training loss, and training op.""" weights = None keys = None if weights_name and weights_name in features: weights = features.pop(weights_name) if keys_name and keys_name in features: keys = features.pop(keys_name) processed_features, spec = data_ops.ParseDataTensorOrDict(features) _assert_float32(processed_features) if labels is not None: labels = data_ops.ParseLabelTensorOrDict(labels) _assert_float32(labels) graph_builder = graph_builder_class(params, device_assigner=device_assigner) inference = {eval_metrics.INFERENCE_PROB_NAME: graph_builder.inference_graph(processed_features, data_spec=spec)} if not params.regression: inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax( inference[eval_metrics.INFERENCE_PROB_NAME], 1) if keys: inference[KEYS_NAME] = keys # labels might be None if we're doing prediction (which brings up the # question of why we force everything to adhere to a single model_fn). training_loss = None training_graph = None if labels is not None: training_loss = graph_builder.training_loss(processed_features, labels, data_spec=spec, name=LOSS_NAME) training_graph = control_flow_ops.group( graph_builder.training_graph( processed_features, labels, data_spec=spec, input_weights=weights), state_ops.assign_add(contrib_framework.get_global_step(), 1)) # Put weights back in if weights is not None: features[weights_name] = weights return (inference, training_loss, training_graph)
def _get_train_ops(self, features, targets): """Method that builds model graph and returns trainer ops. Expected to be overriden by sub-classes that require custom support. This implementation uses `model_fn` passed as parameter to constructor to build model. Args: features: `Tensor` or `dict` of `Tensor` objects. targets: `Tensor` or `dict` of `Tensor` objects. Returns: Tuple of train `Operation` and loss `Tensor`. """ _, loss = self._model_fn(features, targets, ModeKeys.TRAIN) train_op = layers.optimize_loss( loss, contrib_framework.get_global_step(), learning_rate=self.learning_rate, optimizer=self.optimizer, clip_gradients=self.clip_gradients) return train_op, loss
def conditional_s2s(src, tgt, z, opt, opt_t=None, is_reuse_generator = None): if not opt_t: opt_t = opt res = {} if opt.use_tgt_z: W_norm_d = embedding_only(opt, prefix = 'd_', is_reuse = None) z, _ = encoder(tgt, W_norm_d, opt, l_temp = 1, prefix = 'd_' , is_reuse = None, is_prob=None, is_padded= False) syn_sent, syn_one_hot, H_dec, sup_loss, sample_loss, sup_loss_all = s2s(z, src, tgt, opt, is_reuse = is_reuse_generator, prefix ='g_') if opt.global_feature: z_hat, _ = encoder(syn_one_hot, W_norm_d, opt, l_temp = 1, prefix = 'd_' , is_reuse = True, is_prob=True, is_padded= False) z_loss = tf.reduce_sum(tf.square(z - z_hat))/opt.batch_size/opt.n_hid res['z'] = z res['z_hat'] = z_hat res['z_loss'] = z_loss res['syn_sent'] = syn_sent g_cost = sup_loss + (z_loss*opt.lambda_z if opt.global_feature else 0) tf.summary.scalar('sup_loss', sup_loss) if opt.global_feature: tf.summary.scalar('z_loss', z_loss) summaries = [ "learning_rate", "loss", ] t_vars = tf.trainable_variables() g_vars = [var for var in t_vars if 'g_' in var.name] train_op_g = layers.optimize_loss( g_cost, framework.get_global_step(), optimizer=opt.optimizer, clip_gradients=(lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None, variables=g_vars, learning_rate=opt.lr_g, summaries=summaries) return res, g_cost, train_op_g
def _get_train_ops(self, features, targets): """Method that builds model graph and returns trainer ops. Args: features: `Tensor` or `dict` of `Tensor` objects. targets: `Tensor` or `dict` of `Tensor` objects. Returns: Tuple of train `Operation` and loss `Tensor`. """ features, _, weights, spec = data_ops.ParseDataTensorOrDict(features) labels = data_ops.ParseLabelTensorOrDict(targets) features, labels = self._feature_engineering_fn(features, labels) _assert_float32(features) _assert_float32(labels) if weights is not None: if 'input_weights' in self.training_args: logging.warning('Replacing input_weights in training_args.') self.training_args['input_weights'] = weights graph_builder = self.graph_builder_class( self.params, device_assigner=self.device_assigner, **self.construction_args) epoch = None if self.data_feeder: epoch = self.data_feeder.make_epoch_variable() train = control_flow_ops.group( graph_builder.training_graph( features, labels, data_spec=spec, epoch=epoch, **self.training_args), state_ops.assign_add(contrib_framework.get_global_step(), 1)) self.training_loss = graph_builder.training_loss(features, targets) return train, self.training_loss
def model_fn(features, labels, mode): """Builds generic graph for training or eval.""" # Build a Graph that computes predictions from the inference model. logits = inference(features, args.hidden1, args.hidden2) tensors = {} # Add to the Graph the Ops for loss calculation. if mode == ModeKeys.INFER: softmax = tf.nn.softmax(logits) tensors['digit'] = tf.argmax(softmax, 1) loss_op = None else: loss_op = loss(logits, labels) tensors['loss'] = loss_op tf.scalar_summary('loss', loss_op) if mode == ModeKeys.EVAL: # Add to the Graph the Ops for accuracy calculation. accuracy_op = evaluation(logits, labels) tensors['accuracy'] = accuracy_op tf.scalar_summary('training/hptuning/metric', accuracy_op) # Add to the Graph the Ops that calculate and apply gradients. if mode == ModeKeys.TRAIN: global_step = framework.get_global_step() # Create the gradient descent optimizer with the given learning rate. optimizer = tf.train.GradientDescentOptimizer(args.learning_rate) # Create a variable to track the global step. # Use the optimizer to apply the gradients that minimize the loss # (and also increment the global step counter) as a single training step. train_op = optimizer.minimize(loss_op, global_step=global_step) # Add streaming means. else: train_op = None return tensors, loss_op, train_op
def model_fn(features, labels, mode): """Builds generic graph for training or eval.""" # Build a Graph that computes predictions from the inference model. logits = inference(features, args.hidden1, args.hidden2) tensors = {} # Add to the Graph the Ops for loss calculation. if mode == ModeKeys.INFER: tensors['digit'] = tf.argmax(logits, 1) loss_op = None else: loss_op = loss(logits, labels) tensors['loss'] = loss_op tf.scalar_summary('loss', loss_op) if mode == ModeKeys.EVAL: # Add to the Graph the Ops for accuracy calculation. accuracy_op = evaluation(logits, labels) tensors['accuracy'] = accuracy_op tf.scalar_summary('training/hptuning/metric', accuracy_op) # Add to the Graph the Ops that calculate and apply gradients. if mode == ModeKeys.TRAIN: global_step = framework.get_global_step() # Create the gradient descent optimizer with the given learning rate. optimizer = tf.train.GradientDescentOptimizer(args.learning_rate) # Create a variable to track the global step. # Use the optimizer to apply the gradients that minimize the loss # (and also increment the global step counter) as a single training step. train_op = optimizer.minimize(loss_op, global_step=global_step) # Add streaming means. else: train_op = None return tensors, loss_op, train_op
def model_fn(features, labels, mode): """BaselineModel model_fn. Args: features: `Tensor` or `dict` of `Tensor`. labels: A `dict` of `Tensor` Objects. Expects to have a key/value pair for the key self.label_column_name, "IPS_example_weights_with_label", and "IPS_example_weights_without_label". IPS stands for inverse propensity score, wherein each example is assigned a weight inversely proportionate their propensity of appearing in training distribution. Concretely, ips-weight = 1/p(x), where p(x) is the probability of x in training distribution. In "IPS_without_label", each example is given a weight as the inverse propensity score of their subgroup. For example, 1/p("Black Female"). In "IPS_with_label", each example is assigned a weight as the inverse propensity score of their subgroup and class membership. For example, 1/p("Black Female", "class 0")). mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. Currently PREDICT mode is not implemented. Returns: An instance of `tf.estimator.EstimatorSpec', which encapsulates the `mode`, `predictions`, `loss` and the `train_op`. Note that here `predictions` is either a `Tensor` or a `dict` of `Tensor` objects, representing the prediction of the bianry classification model. 'loss` is a scalar containing the loss of the step and `train_op` is the op for training. """ # Instantiates a tensor with true class labels class_labels = labels[self._label_column_name] ips_example_weights_with_label = labels[ IPS_WITH_LABEL_TARGET_COLUMN_NAME] ips_example_weights_without_label = labels[ IPS_WITHOUT_LABEL_TARGET_COLUMN_NAME] tf.logging.info('model_fn for mode: {}'.format(mode)) with tf.name_scope('model'): input_layer = tf.feature_column.input_layer( features, self._feature_columns) layer = input_layer for unit in self._hidden_units: layer = tf.layers.Dense(unit, activation=self._activation)(layer) logits = tf.layers.Dense(1)(layer) sigmoid_output = tf.nn.sigmoid(logits, name='sigmoid') class_predictions = tf.cast(tf.greater(sigmoid_output, 0.5), tf.float32) # pylint: disable=line-too-long tf.summary.histogram('class_predictions', class_predictions) if self._reweighting_type == 'IPS_with_label': example_weights = ips_example_weights_with_label elif self._reweighting_type == 'IPS_without_label': example_weights = ips_example_weights_without_label # Initializes Loss Functions loss = self._loss(class_labels, logits, example_weights) # Sets up dictionaries used for computing performance metrics predictions = { (self._label_column_name, 'class_ids'): tf.reshape(class_predictions, [-1]), (self._label_column_name, 'logistic'): tf.reshape(sigmoid_output, [-1]) } class_id_kwargs = { 'labels': class_labels, 'predictions': class_predictions } logistics_kwargs = { 'labels': class_labels, 'predictions': sigmoid_output } # EVAL Mode if mode == tf_estimator.ModeKeys.EVAL: with tf.name_scope('eval_metrics'): eval_metric_ops = { 'accuracy': tf.metrics.accuracy(**class_id_kwargs), 'precision': tf.metrics.precision(**class_id_kwargs), 'recall': tf.metrics.recall(**class_id_kwargs), 'fp': tf.metrics.false_positives(**class_id_kwargs), 'fn': tf.metrics.false_negatives(**class_id_kwargs), 'tp': tf.metrics.true_positives(**class_id_kwargs), 'tn': tf.metrics.true_negatives(**class_id_kwargs), 'fpr': contrib_metrics.streaming_false_positive_rate( **class_id_kwargs), # pylint: disable=line-too-long 'fnr': contrib_metrics.streaming_false_negative_rate( **class_id_kwargs), # pylint: disable=line-too-long 'auc': tf.metrics.auc(curve='ROC', **logistics_kwargs), 'aucpr': tf.metrics.auc(curve='PR', **logistics_kwargs) } # EstimatorSpec object for evaluation estimator_spec = tf_estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) # TRAIN Mode if mode == tf_estimator.ModeKeys.TRAIN: train_op_primary = contrib_layers.optimize_loss( loss=loss, learning_rate=self._learning_rate, global_step=contrib_framework.get_global_step(), optimizer=self._optimizer) estimator_spec = tf_estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op_primary) return estimator_spec
def optimize_loss(loss, global_step, learning_rate, optimizer, gradient_noise_scale=None, gradient_multipliers=None, clip_gradients=None, learning_rate_decay_fn=None, update_ops=None, variables=None, name=None, summaries=None, colocate_gradients_with_ops=False): """Given loss and parameters for optimizer, returns a training op. Various ways of passing optimizers, include: - string, name of the optimizer like 'SGD', 'Adam', see OPTIMIZER_CLS_NAMES for full list. E.g. `optimize_loss(..., optimizer='Adam')`. - function, takes learning rate `Tensor` as argument and must return `Optimizer` instance. E.g. `optimize_loss(..., optimizer=lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5))`. Alternatively, if `learning_rate` is `None`, the function takes no arguments. E.g. `optimize_loss(..., learning_rate=None, optimizer=lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5))`. - class, subclass of `Optimizer` that takes only one required argument - learning rate, such as AdamOptimizer, AdagradOptimizer. E.g. `optimize_loss(..., optimizer=tf.train.AdagradOptimizer)`. - object, instance of subclass of `Optimizer`. E.g., `optimizer_loss(..., optimizer=tf.train.AdagradOptimizer(0.5))`. Args: loss: Scalar `Tensor`. global_step: Scalar int `Tensor`, step counter for each update. If not supplied, it will be fetched from the default graph (see `tf.contrib.framework.get_global_step` for details). If it's not been created, no step will be incremented with each weight update. `learning_rate_decay_fn` requires `global_step`. learning_rate: float or `Tensor`, magnitude of update per each training step. Can be `None`. optimizer: string, class or optimizer instance, used as trainer. string should be name of optimizer, like 'SGD', 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant. class should be sub-class of `tf.Optimizer` that implements `compute_gradients` and `apply_gradients` functions. optimizer instance should be instantiation of `tf.Optimizer` sub-class and have `compute_gradients` and `apply_gradients` functions. gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this value. gradient_multipliers: dict of variables or variable names to floats. If present, gradients for specified variables will be multiplied by given constant. clip_gradients: float, callable or `None`. If float, is provided, a global clipping is applied to prevent the norm of the gradient to exceed this value. Alternatively, a callable can be provided e.g.: adaptive_clipping. This callable takes a `list` of `(gradients, variables)` `tuple`s and returns the same thing with the gradients modified. learning_rate_decay_fn: function, takes `learning_rate` and `global_step` `Tensor`s, returns `Tensor`. Can be used to implement any learning rate decay functions. For example: `tf.train.exponential_decay`. Ignored if `learning_rate` is not supplied. update_ops: list of update `Operation`s to execute at each step. If `None`, uses elements of UPDATE_OPS collection. The order of execution between `update_ops` and `loss` is non-deterministic. variables: list of variables to optimize or `None` to use all trainable variables. name: The name for this operation is used to scope operations and summaries. summaries: List of internal quantities to visualize on tensorboard. If not set only the loss and the learning rate will be reported. The complete list is in OPTIMIZER_SUMMARIES. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. Returns: Training op. Raises: ValueError: if: * `loss` is an invalid type or shape. * `global_step` is an invalid type or shape. * `learning_rate` is an invalid type or value. * `optimizer` is wrong type. * `clip_gradients` is not float or callable. * `learning_rate` and `learning_rate_decay_fn` are supplied, but no `global_step` is available. """ loss = ops.convert_to_tensor(loss) contrib_framework.assert_scalar(loss) if global_step is None: global_step = contrib_framework.get_global_step() else: contrib_framework.assert_global_step(global_step) with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]): # Update ops take UPDATE_OPS collection if not provided. if update_ops is None: update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)) # Make sure update ops are ran before computing loss. if update_ops: loss = control_flow_ops.with_dependencies(list(update_ops), loss) # Learning rate variable, with possible decay. lr = None if learning_rate is not None: if (isinstance(learning_rate, ops.Tensor) and learning_rate.get_shape().ndims == 0): lr = learning_rate elif isinstance(learning_rate, float): if learning_rate < 0.0: raise ValueError("Invalid learning_rate %s.", learning_rate) lr = vs.get_variable( "learning_rate", [], trainable=False, initializer=init_ops.constant_initializer(learning_rate)) else: raise ValueError( "Learning rate should be 0d Tensor or float. " "Got %s of type %s" % (str(learning_rate), str(type(learning_rate)))) if summaries is None: summaries = ["loss", "learning_rate"] else: for summ in summaries: if summ not in OPTIMIZER_SUMMARIES: raise ValueError( "Summaries should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_SUMMARIES), summ)) if learning_rate is not None and learning_rate_decay_fn is not None: if global_step is None: raise ValueError( "global_step is required for learning_rate_decay_fn.") lr = learning_rate_decay_fn(lr, global_step) if "learning_rate" in summaries: summary.scalar("learning_rate", lr) # Create optimizer, given specified parameters. if isinstance(optimizer, six.string_types): if lr is None: raise ValueError( "Learning rate is None, but should be specified if " "optimizer is string (%s)." % optimizer) if optimizer not in OPTIMIZER_CLS_NAMES: raise ValueError( "Optimizer name should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_CLS_NAMES), optimizer)) opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr) elif (isinstance(optimizer, type) and issubclass(optimizer, optimizer_.Optimizer)): if lr is None: raise ValueError( "Learning rate is None, but should be specified if " "optimizer is class (%s)." % optimizer) opt = optimizer(learning_rate=lr) elif isinstance(optimizer, optimizer_.Optimizer): opt = optimizer elif callable(optimizer): if learning_rate is not None: opt = optimizer(lr) else: opt = optimizer() if not isinstance(opt, optimizer_.Optimizer): raise ValueError( "Unrecognized optimizer: function should return " "subclass of Optimizer. Got %s." % str(opt)) else: raise ValueError( "Unrecognized optimizer: should be string, " "subclass of Optimizer, instance of " "subclass of Optimizer or function with one argument. " "Got %s." % str(optimizer)) # All trainable variables, if specific variables are not specified. if variables is None: variables = vars_.trainable_variables() # Compute gradients. gradients = opt.compute_gradients( loss, variables, colocate_gradients_with_ops=colocate_gradients_with_ops) # Optionally add gradient noise. if gradient_noise_scale is not None: gradients = _add_scaled_noise_to_gradients(gradients, gradient_noise_scale) # Multiply some gradients. if gradient_multipliers is not None: gradients = _multiply_gradients(gradients, gradient_multipliers) if "gradient_norm" in summaries: summary.scalar("global_norm/gradient_norm", clip_ops.global_norm(list(zip(*gradients))[0])) # Optionally clip gradients by global norm. if isinstance(clip_gradients, float): gradients = _clip_gradients_by_norm(gradients, clip_gradients) elif callable(clip_gradients): gradients = clip_gradients(gradients) elif clip_gradients is not None: raise ValueError("Unknown type %s for clip_gradients" % type(clip_gradients)) # Add scalar summary for loss. if "loss" in summaries: summary.scalar("loss", loss) # Add histograms for variables, gradients and gradient norms. for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient if grad_values is not None: var_name = variable.name.replace(":", "_") if "gradients" in summaries: summary.histogram("gradients/%s" % var_name, grad_values) if "gradient_norm" in summaries: summary.scalar("gradient_norm/%s" % var_name, clip_ops.global_norm([grad_values])) if clip_gradients is not None and "gradient_norm" in summaries: summary.scalar("global_norm/clipped_gradient_norm", clip_ops.global_norm(list(zip(*gradients))[0])) # Create gradient updates. grad_updates = opt.apply_gradients(gradients, global_step=global_step, name="train") # Ensure the train_tensor computes grad_updates. train_tensor = control_flow_ops.with_dependencies([grad_updates], loss) return train_tensor
def model_fn(features, labels, mode): """BaselineModel model_fn. Args: features: `Tensor` or `dict` of `Tensor`. labels: A `dict` of `Tensor` Objects. Expects to have a key/value pair for the key self.label_column_name. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. Currently PREDICT mode is not implemented. Returns: An instance of `tf.estimator.EstimatorSpec', which encapsulates the `mode`, `predictions`, `loss` and the `train_op`. Note that here `predictions` is either a `Tensor` or a `dict` of `Tensor` objects, representing the prediction of the bianry classification model. 'loss` is a scalar containing the loss of the step and `train_op` is the op for training. """ # Instantiates a tensor with true class labels class_labels = labels[self._label_column_name] tf.logging.info('model_fn for mode: {}'.format(mode)) with tf.name_scope('model'): input_layer = tf.feature_column.input_layer(features, self._feature_columns) layer = input_layer for unit in self._hidden_units: layer = tf.layers.Dense(unit, activation=self._activation)(layer) logits = tf.layers.Dense(1)(layer) sigmoid_output = tf.nn.sigmoid(logits, name='sigmoid') class_predictions = tf.cast(tf.greater(sigmoid_output, 0.5), tf.float32) tf.summary.histogram('class_predictions', class_predictions) # Initializes Loss Functions loss = self._loss(class_labels, logits) # Sets up dictionaries used for computing performance metrics predictions = { (self._label_column_name, 'class_ids'): tf.reshape(class_predictions, [-1]), (self._label_column_name, 'logistic'): tf.reshape(sigmoid_output, [-1]) } class_id_kwargs = { 'labels': class_labels, 'predictions': class_predictions } logistics_kwargs = {'labels': class_labels, 'predictions': sigmoid_output} # EVAL Mode if mode == tf.estimator.ModeKeys.EVAL: with tf.name_scope('eval_metrics'): eval_metric_ops = { 'accuracy': tf.metrics.accuracy(**class_id_kwargs), 'precision': tf.metrics.precision(**class_id_kwargs), 'recall': tf.metrics.recall(**class_id_kwargs), 'fp': tf.metrics.false_positives(**class_id_kwargs), 'fn': tf.metrics.false_negatives(**class_id_kwargs), 'tp': tf.metrics.true_positives(**class_id_kwargs), 'tn': tf.metrics.true_negatives(**class_id_kwargs), 'fpr': contrib_metrics.streaming_false_positive_rate(**class_id_kwargs), # pylint: disable=line-too-long 'fnr': contrib_metrics.streaming_false_negative_rate(**class_id_kwargs), # pylint: disable=line-too-long 'auc': tf.metrics.auc(curve='ROC', **logistics_kwargs), 'aucpr': tf.metrics.auc(curve='PR', **logistics_kwargs) } # EstimatorSpec object for evaluation estimator_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, eval_metric_ops=eval_metric_ops) # TRAIN Mode if mode == tf.estimator.ModeKeys.TRAIN: train_op_primary = contrib_layers.optimize_loss( loss=loss, learning_rate=self._learning_rate, global_step=contrib_framework.get_global_step(), optimizer=self._optimizer) estimator_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=loss, train_op=train_op_primary) return estimator_spec
def model_fn(features, labels, mode): """robustModel model_fn. Args: features: `dict` of `Tensor`. labels: A `dict` of `Tensor` Objects. Expects to have a key/value pair for the key self.label_column_name. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. Currently PREDICT mode is not implemented. Returns: An instance of `tf.estimator.EstimatorSpec', which encapsulates the `mode`, `predictions`, `loss` and the `train_op`. Note that here `predictions` is either a `Tensor` or a `dict` of `Tensor` objects, representing the prediction of the bianry classification model. 'loss` is a scalar containing the loss of the step and `train_op` is the op for training. Raises: ValueError: if protected_column_names not in feature_columns """ for col in self._protected_column_names: if col not in features.keys(): raise ValueError( 'Protected column <{}> should be in features.'.format( col)) # Instantiates a tensor with true class labels class_labels = labels[self._label_column_name] # Initialize a global step variable used for alternate training current_step = self._get_or_create_global_step_var() tf.logging.info('model_fn for mode: {}'.format(mode)) with tf.name_scope('primary_NN'): with tf.variable_scope('primary'): input_layer = tf.feature_column.input_layer( features, self._feature_columns) layer = input_layer for unit in self._primary_hidden_units: layer = tf.layers.Dense( unit, activation=self._activation)(layer) logits = tf.layers.Dense(1)(layer) sigmoid_output = tf.nn.sigmoid(logits, name='sigmoid') class_predictions = tf.cast(tf.greater(sigmoid_output, 0.5), tf.float32) # pylint: disable=line-too-long tf.summary.histogram('class_predictions', class_predictions) with tf.name_scope('adversary_NN'): with tf.variable_scope('adversary'): # Filters and keeps only protected features and feature columns. adversarial_features, adversary_feature_columns = self._get_adversary_features_and_feature_columns(features) # pylint: disable=line-too-long adv_input_layer = tf.feature_column.input_layer( adversarial_features, adversary_feature_columns) adv_layer = adv_input_layer for adv_unit in self._adversary_hidden_units: adv_layer = tf.layers.Dense(adv_unit)(adv_layer) adv_output_layer = tf.layers.Dense( 1, use_bias=True)(adv_layer) example_weights = tf.cond( tf.greater(current_step, self._pretrain_steps), true_fn=lambda: self._compute_example_weights( adv_output_layer), false_fn=lambda: tf.ones_like(class_labels)) # Initializes Loss Functions primary_loss = self._primary_loss(class_labels, logits, example_weights) adversary_loss = self._adversary_loss(class_labels, logits, example_weights) # Sets up dictionaries used for computing performance metrics predictions = { (self._label_column_name, 'class_ids'): tf.reshape(class_predictions, [-1]), (self._label_column_name, 'logistic'): tf.reshape(sigmoid_output, [-1]), ('example_weights'): tf.reshape(example_weights, [-1]) } class_id_kwargs = { 'labels': class_labels, 'predictions': class_predictions } logistics_kwargs = { 'labels': class_labels, 'predictions': sigmoid_output } # EVAL Mode if mode == tf.estimator.ModeKeys.EVAL: with tf.name_scope('eval_metrics'): eval_metric_ops = { 'accuracy': tf.metrics.accuracy(**class_id_kwargs), 'precision': tf.metrics.precision(**class_id_kwargs), 'recall': tf.metrics.recall(**class_id_kwargs), 'fp': tf.metrics.false_positives(**class_id_kwargs), 'fn': tf.metrics.false_negatives(**class_id_kwargs), 'tp': tf.metrics.true_positives(**class_id_kwargs), 'tn': tf.metrics.true_negatives(**class_id_kwargs), 'fpr': contrib_metrics.streaming_false_positive_rate( **class_id_kwargs), # pylint: disable=line-too-long 'fnr': contrib_metrics.streaming_false_negative_rate( **class_id_kwargs), # pylint: disable=line-too-long 'auc': tf.metrics.auc(curve='ROC', **logistics_kwargs), 'aucpr': tf.metrics.auc(curve='PR', **logistics_kwargs) } # EstimatorSpec object for evaluation estimator_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=primary_loss, eval_metric_ops=eval_metric_ops) # TRAIN Mode if mode == tf.estimator.ModeKeys.TRAIN: # Filters trainable variables for each task all_trainable_vars = tf.trainable_variables() primary_trainable_vars = [ v for v in all_trainable_vars if 'primary' in v.op.name ] adversary_trainable_vars = [ v for v in all_trainable_vars if 'adversary' in v.op.name ] # TRAIN_OP for adversary DNN train_op_adversary = contrib_layers.optimize_loss( loss=adversary_loss, variables=adversary_trainable_vars, global_step=contrib_framework.get_global_step(), learning_rate=self._adversary_learning_rate, optimizer=self._optimizer) # TRAIN_OP for primary DNN train_op_primary = contrib_layers.optimize_loss( loss=primary_loss, variables=primary_trainable_vars, global_step=contrib_framework.get_global_step(), learning_rate=self._primary_learning_rate, optimizer=self._optimizer) # Upto ``pretrain_steps'' trains primary only. # Beyond ``pretrain_steps'' alternates between primary and adversary. estimator_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=primary_loss + adversary_loss, train_op=tf.cond( tf.greater(current_step, self._pretrain_steps), true_fn=lambda: tf.group( [train_op_primary, train_op_adversary]), # pylint: disable=line-too-long false_fn=lambda: tf.group([train_op_primary]))) return estimator_spec
def random_forest_model_fn(features, labels, mode, params, config): """Function that returns predictions, training loss, and training op.""" labels_tensor = labels if isinstance(labels, dict) and len(labels) == 1: labels_tensor = labels.values()[0] weights_name = params["weights_name"] keys_name = params["keys_name"] num_classes = tf.identity(params['num_classes'], name='num_classes') params_toGraphs = tensor_forest.ForestHParams( num_classes=params['num_classes'], num_features=params['num_features'], num_trees=params['num_trees'], max_nodes=params['max_nodes'], regression=params['regression'], split_after_samples=params['split_after_samples']) # 注意第90行 fill() # https://github.com/tensorflow/tensorflow/blob/r1.2/tensorflow/contrib # /tensor_forest/python/tensor_forest.py params_toGraphs = params_toGraphs.fill() graph_builder_class = tensor_forest.RandomForestGraphs early_stopping_rounds = params["early_stopping_rounds"] num_trainers = 1 trainer_id = 0 report_feature_importances = False model_dir = None local_eval = False device_assigner = None weights = None if weights_name and weights_name in features: weights = features.pop(weights_name) keys = None if keys_name and keys_name in features: keys = features.pop(keys_name) # If we're doing eval, optionally ignore device_assigner. # Also ignore device assigner if we're exporting (mode == INFER) dev_assn = device_assigner if (mode == model_fn_lib.ModeKeys.INFER or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)): dev_assn = None graph_builder = graph_builder_class(params_toGraphs, device_assigner=dev_assn) inference = {} predictions = {} output_alternatives = None # if (mode == model_fn_lib.ModeKeys.EVAL or # mode == model_fn_lib.ModeKeys.INFER): if True: inference[eval_metrics.INFERENCE_PROB_NAME] = ( graph_builder.inference_graph(features)) if params_toGraphs.regression: predictions = {None: inference[eval_metrics.INFERENCE_PROB_NAME]} output_alternatives = { None: (constants.ProblemType.LINEAR_REGRESSION, predictions) } else: inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax( inference[eval_metrics.INFERENCE_PROB_NAME], 1) predictions = { prediction_key.PredictionKey.PROBABILITIES: inference[eval_metrics.INFERENCE_PROB_NAME], prediction_key.PredictionKey.CLASSES: inference[eval_metrics.INFERENCE_PRED_NAME] } output_alternatives = { None: (constants.ProblemType.CLASSIFICATION, predictions) } if report_feature_importances: inference[eval_metrics.FEATURE_IMPORTANCE_NAME] = ( graph_builder.feature_importances()) if keys is not None: inference[keys_name] = keys # labels might be None if we're doing prediction (which brings up the # question of why we force everything to adhere to a single model_fn). loss_deps = [] training_graph = None training_hooks = [] scaffold = None if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN: training_graph = control_flow_ops.group( graph_builder.training_graph(features, labels, input_weights=weights, num_trainers=num_trainers, trainer_id=trainer_id), state_ops.assign_add(contrib_framework.get_global_step(), 1)) loss_deps.append(training_graph) if hasattr(graph_builder, 'finalize_training'): finalize_listener = EveryCheckpointPreSaveListener( graph_builder.finalize_training()) scaffold = monitored_session.Scaffold() training_hooks.append( basic_session_run_hooks.CheckpointSaverHook( model_dir, save_secs=600, save_steps=None, scaffold=scaffold, listeners=[finalize_listener])) training_loss = None if (mode == model_fn_lib.ModeKeys.EVAL or mode == model_fn_lib.ModeKeys.TRAIN): with ops.control_dependencies(loss_deps): training_loss = graph_builder.training_loss( features, labels, name='rf_training_loss') # 命名以传到 hook 中 if not params['regression']: confusion_matrix_print = confusion_matrix( labels=labels_tensor, predictions=predictions['classes'], num_classes=num_classes, ) confusion_matrix_print = tf.identity(confusion_matrix_print, name='confusion_matrix_print') else: confusion_matrix_print = tf.identity(0, name='confusion_matrix_print') regression_ornot = tf.identity(params['regression'], name='regression_ornot') # Put weights back in if weights is not None: features[weights_name] = weights if early_stopping_rounds: training_hooks.append(TensorForestLossHook(early_stopping_rounds)) metrics = {} # metrics[metric_key.MetricKey.AUC] = metrics_lib.streaming_auc( # labels=labels_tensor, # predictions=inference[eval_metrics.INFERENCE_PRED_NAME] # ) if not params_toGraphs.regression: metrics['eval_confusion_matrix'] = confusion_matrix( labels=labels_tensor, predictions=predictions['classes'], num_classes=params['num_classes'], ) return model_fn_lib.ModelFnOps(mode=mode, predictions=inference, loss=training_loss, train_op=training_graph, training_hooks=training_hooks, scaffold=scaffold, eval_metric_ops=metrics, output_alternatives=output_alternatives)
def dnn_sampled_softmax_classifier_model_fn(features, target_indices, mode, params): """model_fn that uses candidate sampling. Args: features: Single Tensor or dict of Tensor (depends on data passed to `fit`) target_indices: A single Tensor of shape [batch_size, n_labels] containing the target indices. mode: Represents if this training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters that are listed below. hidden_units- List of hidden units per layer. All layers are fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second one has 32. feature_columns- An iterable containing all the feature columns used by the model. All items in the set should be instances of classes derived from `FeatureColumn`. n_classes- number of target classes. It must be greater than 2. n_samples- number of sample target classes. Needs to be tuned - A good starting point could be 2% of n_classes. n_labels- number of labels in each example. top_k- The number of classes to predict. optimizer- An instance of `tf.Optimizer` used to train the model. If `None`, will use an Adagrad optimizer. dropout- When not `None`, the probability we will drop out a given coordinate. gradient_clip_norm- A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. See tf.clip_by_global_norm for more details. num_ps_replicas- The number of parameter server replicas. Returns: predictions: A single Tensor or a dict of Tensors. loss: A scalar containing the loss of the step. train_op: The op for training. """ hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] n_classes = params["n_classes"] n_samples = params["n_samples"] n_labels = params["n_labels"] top_k = params["top_k"] optimizer = params["optimizer"] dropout = params["dropout"] gradient_clip_norm = params["gradient_clip_norm"] num_ps_replicas = params["num_ps_replicas"] parent_scope = "dnn_ss" # Setup the input layer partitioner. input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20)) # Create the input layer. with variable_scope.variable_scope( parent_scope + "/input_from_feature_columns", features.values(), partitioner=input_layer_partitioner) as scope: net = layers.input_from_feature_columns( features, feature_columns, weight_collections=[parent_scope], scope=scope) # Setup the hidden layer partitioner. hidden_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) final_hidden_layer_dim = None # Create hidden layers using fully_connected. for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( parent_scope + "/hiddenlayer_%d" % layer_id, [net], partitioner=hidden_layer_partitioner) as scope: net = layers.fully_connected(net, num_hidden_units, variables_collections=[parent_scope], scope=scope) final_hidden_layer_dim = num_hidden_units # Add dropout if it is enabled. if dropout is not None and mode == estimator.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) # Create the weights and biases for the logit layer. with variable_scope.variable_scope( parent_scope + "/logits", [net], partitioner=hidden_layer_partitioner) as scope: dtype = net.dtype.base_dtype weights_shape = [n_classes, final_hidden_layer_dim] weights = variables.model_variable( "weights", shape=weights_shape, dtype=dtype, initializer=initializers.xavier_initializer(), trainable=True, collections=[parent_scope]) biases = variables.model_variable( "biases", shape=[n_classes,], dtype=dtype, initializer=init_ops.zeros_initializer, trainable=True, collections=[parent_scope]) if mode == estimator.ModeKeys.TRAIN: # Call the candidate sampling APIs and calculate the loss. sampled_values = nn.learned_unigram_candidate_sampler( true_classes=math_ops.to_int64(target_indices), num_true=n_labels, num_sampled=n_samples, unique=True, range_max=n_classes) sampled_softmax_loss = nn.sampled_softmax_loss( weights=weights, biases=biases, inputs=net, labels=math_ops.to_int64(target_indices), num_sampled=n_samples, num_classes=n_classes, num_true=n_labels, sampled_values=sampled_values) loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss") train_op = optimizers.optimize_loss( loss=loss, global_step=contrib_framework.get_global_step(), learning_rate=_DEFAULT_LEARNING_RATE, optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm, name=parent_scope) return None, loss, train_op elif mode == estimator.ModeKeys.EVAL: logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)), biases) predictions = {} predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) _, predictions[_TOP_K] = nn.top_k(logits, top_k) # Since the targets have multiple labels, setup the target probabilities # as 1.0/n_labels for each of the labels. target_one_hot = array_ops.one_hot( indices=target_indices, depth=n_classes, on_value=1.0 / n_labels) target_one_hot = math_ops.reduce_sum( input_tensor=target_one_hot, reduction_indices=[1]) loss = math_ops.reduce_mean( nn.softmax_cross_entropy_with_logits(logits, target_one_hot)) return predictions, loss, None elif mode == estimator.ModeKeys.INFER: logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)), biases) predictions = {} predictions[_PROBABILITIES] = nn.softmax(logits) predictions[_CLASSES] = math_ops.argmax(logits, 1) _, predictions[_TOP_K] = nn.top_k(logits, top_k) return predictions, None, None
def _model_fn(features, labels, mode): """Function that returns predictions, training loss, and training op.""" weights = None if weights_name and weights_name in features: weights = features.pop(weights_name) keys = None if keys_name and keys_name in features: keys = features.pop(keys_name) # If we're doing eval, optionally ignore device_assigner. # Also ignore device assigner if we're exporting (mode == INFER) dev_assn = device_assigner if (mode == model_fn_lib.ModeKeys.INFER or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)): dev_assn = None graph_builder = graph_builder_class(params, device_assigner=dev_assn) inference = {} output_alternatives = None if (mode == model_fn_lib.ModeKeys.EVAL or mode == model_fn_lib.ModeKeys.INFER): inference[eval_metrics.INFERENCE_PROB_NAME] = ( graph_builder.inference_graph(features)) if params.regression: predictions = { None: inference[eval_metrics.INFERENCE_PROB_NAME]} output_alternatives = { None: (constants.ProblemType.LINEAR_REGRESSION, predictions)} else: inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax( inference[eval_metrics.INFERENCE_PROB_NAME], 1) predictions = { prediction_key.PredictionKey.PROBABILITIES: inference[eval_metrics.INFERENCE_PROB_NAME], prediction_key.PredictionKey.CLASSES: inference[eval_metrics.INFERENCE_PRED_NAME]} output_alternatives = { None: (constants.ProblemType.CLASSIFICATION, predictions)} if keys is not None: inference[keys_name] = keys # labels might be None if we're doing prediction (which brings up the # question of why we force everything to adhere to a single model_fn). loss_deps = [] training_graph = None training_hooks = [] scaffold = None if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN: training_graph = control_flow_ops.group( graph_builder.training_graph( features, labels, input_weights=weights, num_trainers=num_trainers, trainer_id=trainer_id), state_ops.assign_add(contrib_framework.get_global_step(), 1)) loss_deps.append(training_graph) training_loss = None if (mode == model_fn_lib.ModeKeys.EVAL or mode == model_fn_lib.ModeKeys.TRAIN): with ops.control_dependencies(loss_deps): training_loss = graph_builder.training_loss( features, labels, name=LOSS_NAME) # Put weights back in if weights is not None: features[weights_name] = weights if early_stopping_rounds: training_hooks.append(TensorForestLossHook(early_stopping_rounds, loss_op=training_loss)) if report_feature_importances: training_hooks.append(TensorForestRunOpAtEndHook( {'feature_importances': graph_builder.feature_importances()})) return model_fn_lib.ModelFnOps( mode=mode, predictions=inference, loss=training_loss, train_op=training_graph, training_hooks=training_hooks, scaffold=scaffold, output_alternatives=output_alternatives)
def _model_fn(features, labels, mode): """Function that returns predictions, training loss, and training op.""" if (isinstance(features, ops.Tensor) or isinstance(features, sparse_tensor.SparseTensor)): features = {'features': features} if feature_columns: features = features.copy() features.update( layers.transform_features(features, feature_columns)) weights = None if weights_name and weights_name in features: weights = features.pop(weights_name) keys = None if keys_name and keys_name in features: keys = features.pop(keys_name) # If we're doing eval, optionally ignore device_assigner. # Also ignore device assigner if we're exporting (mode == INFER) dev_assn = device_assigner if (mode == model_fn_lib.ModeKeys.INFER or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)): dev_assn = None graph_builder = graph_builder_class(params, device_assigner=dev_assn) logits, tree_paths, regression_variance = graph_builder.inference_graph( features) summary.scalar('average_tree_size', graph_builder.average_size()) # For binary classification problems, convert probabilities to logits. # Includes hack to get around the fact that a probability might be 0 or 1. if not params.regression and params.num_classes == 2: class_1_probs = array_ops.slice(logits, [0, 1], [-1, 1]) logits = math_ops.log( math_ops.maximum( class_1_probs / math_ops.maximum(1.0 - class_1_probs, EPSILON), EPSILON)) # labels might be None if we're doing prediction (which brings up the # question of why we force everything to adhere to a single model_fn). training_graph = None training_hooks = [] if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN: with ops.control_dependencies([logits.op]): training_graph = control_flow_ops.group( graph_builder.training_graph(features, labels, input_weights=weights, num_trainers=num_trainers, trainer_id=trainer_id), state_ops.assign_add(contrib_framework.get_global_step(), 1)) # Put weights back in if weights is not None: features[weights_name] = weights # TensorForest's training graph isn't calculated directly from the loss # like many other models. def _train_fn(unused_loss): return training_graph model_ops = model_head.create_model_fn_ops(features=features, labels=labels, mode=mode, train_op_fn=_train_fn, logits=logits, scope=head_scope) # Ops are run in lexigraphical order of their keys. Run the resource # clean-up op last. all_handles = graph_builder.get_all_resource_handles() ops_at_end = { '9: clean up resources': control_flow_ops.group(*[ resource_variable_ops.destroy_resource_op(handle) for handle in all_handles ]) } if report_feature_importances: ops_at_end['1: feature_importances'] = ( graph_builder.feature_importances()) training_hooks.append(TensorForestRunOpAtEndHook(ops_at_end)) if early_stopping_rounds: training_hooks.append( TensorForestLossHook( early_stopping_rounds, early_stopping_loss_threshold=early_stopping_loss_threshold, loss_op=model_ops.loss)) model_ops.training_hooks.extend(training_hooks) if keys is not None: model_ops.predictions[keys_name] = keys if params.inference_tree_paths: model_ops.predictions[TREE_PATHS_PREDICTION_KEY] = tree_paths if params.regression: model_ops.predictions[ VARIANCE_PREDICTION_KEY] = regression_variance return model_ops
def _model_fn(features, labels, mode): """Function that returns predictions, training loss, and training op.""" weights = None if weights_name and weights_name in features: weights = features.pop(weights_name) keys = None if keys_name and keys_name in features: keys = features.pop(keys_name) # If we're doing eval, optionally ignore device_assigner. # Also ignore device assigner if we're exporting (mode == INFER) dev_assn = device_assigner if (mode == model_fn_lib.ModeKeys.INFER or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)): dev_assn = None graph_builder = graph_builder_class(params, device_assigner=dev_assn) inference = {} output_alternatives = None if (mode == model_fn_lib.ModeKeys.EVAL or mode == model_fn_lib.ModeKeys.INFER): inference[eval_metrics.INFERENCE_PROB_NAME] = ( graph_builder.inference_graph(features)) if params.regression: predictions = { None: inference[eval_metrics.INFERENCE_PROB_NAME] } output_alternatives = { None: (constants.ProblemType.LINEAR_REGRESSION, predictions) } else: inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax( inference[eval_metrics.INFERENCE_PROB_NAME], 1) predictions = { prediction_key.PredictionKey.PROBABILITIES: inference[eval_metrics.INFERENCE_PROB_NAME], prediction_key.PredictionKey.CLASSES: inference[eval_metrics.INFERENCE_PRED_NAME] } output_alternatives = { None: (constants.ProblemType.CLASSIFICATION, predictions) } if keys is not None: inference[keys_name] = keys # labels might be None if we're doing prediction (which brings up the # question of why we force everything to adhere to a single model_fn). loss_deps = [] training_graph = None training_hooks = [] scaffold = None if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN: training_graph = control_flow_ops.group( graph_builder.training_graph(features, labels, input_weights=weights, num_trainers=num_trainers, trainer_id=trainer_id), state_ops.assign_add(contrib_framework.get_global_step(), 1)) loss_deps.append(training_graph) if hasattr(graph_builder, 'finalize_training'): finalize_listener = EveryCheckpointPreSaveListener( graph_builder.finalize_training()) scaffold = monitored_session.Scaffold() training_hooks.append( basic_session_run_hooks.CheckpointSaverHook( model_dir, save_secs=600, save_steps=None, scaffold=scaffold, listeners=[finalize_listener])) training_loss = None if (mode == model_fn_lib.ModeKeys.EVAL or mode == model_fn_lib.ModeKeys.TRAIN): with ops.control_dependencies(loss_deps): training_loss = graph_builder.training_loss(features, labels, name=LOSS_NAME) # Put weights back in if weights is not None: features[weights_name] = weights if early_stopping_rounds: training_hooks.append(TensorForestLossHook(early_stopping_rounds)) if report_feature_importances: training_hooks.append( TensorForestRunOpAtEndHook({ 'feature_importances': graph_builder.feature_importances() })) return model_fn_lib.ModelFnOps(mode=mode, predictions=inference, loss=training_loss, train_op=training_graph, training_hooks=training_hooks, scaffold=scaffold, output_alternatives=output_alternatives)
def _model_fn(features, labels, mode): """Function that returns predictions, training loss, and training op.""" weights = None if weights_name and weights_name in features: weights = features.pop(weights_name) # If we're doing eval, optionally ignore device_assigner. # Also ignore device assigner if we're exporting (mode == INFER) dev_assn = device_assigner if (mode == model_fn_lib.ModeKeys.INFER or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)): dev_assn = None graph_builder = graph_builder_class(params, device_assigner=dev_assn) inference = {} if (mode == model_fn_lib.ModeKeys.EVAL or mode == model_fn_lib.ModeKeys.INFER): inference[eval_metrics.INFERENCE_PROB_NAME] = ( graph_builder.inference_graph(features)) if not params.regression: inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax( inference[eval_metrics.INFERENCE_PROB_NAME], 1) if report_feature_importances: inference[eval_metrics.FEATURE_IMPORTANCE_NAME] = ( graph_builder.feature_importances()) # labels might be None if we're doing prediction (which brings up the # question of why we force everything to adhere to a single model_fn). loss_deps = [] training_graph = None training_hooks = [] scaffold = None if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN: training_graph = control_flow_ops.group( graph_builder.training_graph( features, labels, input_weights=weights, num_trainers=num_trainers, trainer_id=trainer_id), state_ops.assign_add(contrib_framework.get_global_step(), 1)) loss_deps.append(training_graph) if hasattr(graph_builder, 'finalize_training'): finalize_listener = EveryCheckpointPreSaveListener( graph_builder.finalize_training()) scaffold = monitored_session.Scaffold() training_hooks.append( basic_session_run_hooks.CheckpointSaverHook( model_dir, save_secs=600, save_steps=None, scaffold=scaffold, listeners=[finalize_listener])) training_loss = None if (mode == model_fn_lib.ModeKeys.EVAL or mode == model_fn_lib.ModeKeys.TRAIN): with ops.control_dependencies(loss_deps): training_loss = graph_builder.training_loss( features, labels, name=LOSS_NAME) # Put weights back in if weights is not None: features[weights_name] = weights if early_stopping_rounds: training_hooks.append(TensorForestLossHook(early_stopping_rounds)) return model_fn_lib.ModelFnOps( mode=mode, predictions=inference, loss=training_loss, train_op=training_graph, training_hooks=training_hooks, scaffold=scaffold)
def _model_fn(features, labels, mode): """Function that returns predictions, training loss, and training op.""" if (isinstance(features, ops.Tensor) or isinstance(features, sparse_tensor.SparseTensor)): features = {'features': features} weights = None if weights_name and weights_name in features: weights = features.pop(weights_name) keys = None if keys_name and keys_name in features: keys = features.pop(keys_name) # If we're doing eval, optionally ignore device_assigner. # Also ignore device assigner if we're exporting (mode == INFER) dev_assn = device_assigner if (mode == model_fn_lib.ModeKeys.INFER or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)): dev_assn = None graph_builder = graph_builder_class(params, device_assigner=dev_assn) logits, tree_paths, regression_variance = graph_builder.inference_graph( features) summary.scalar('average_tree_size', graph_builder.average_size()) # For binary classification problems, convert probabilities to logits. # Includes hack to get around the fact that a probability might be 0 or 1. if not params.regression and params.num_classes == 2: class_1_probs = array_ops.slice(logits, [0, 1], [-1, 1]) logits = math_ops.log( math_ops.maximum(class_1_probs / math_ops.maximum( 1.0 - class_1_probs, EPSILON), EPSILON)) # labels might be None if we're doing prediction (which brings up the # question of why we force everything to adhere to a single model_fn). training_graph = None training_hooks = [] if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN: with ops.control_dependencies([logits.op]): training_graph = control_flow_ops.group( graph_builder.training_graph( features, labels, input_weights=weights, num_trainers=num_trainers, trainer_id=trainer_id), state_ops.assign_add(contrib_framework.get_global_step(), 1)) # Put weights back in if weights is not None: features[weights_name] = weights # TensorForest's training graph isn't calculated directly from the loss # like many other models. def _train_fn(unused_loss): return training_graph model_ops = model_head.create_model_fn_ops( features=features, labels=labels, mode=mode, train_op_fn=_train_fn, logits=logits, scope=head_scope) # Ops are run in lexigraphical order of their keys. Run the resource # clean-up op last. all_handles = graph_builder.get_all_resource_handles() ops_at_end = { '9: clean up resources': control_flow_ops.group( *[resource_variable_ops.destroy_resource_op(handle) for handle in all_handles])} if report_feature_importances: ops_at_end['1: feature_importances'] = ( graph_builder.feature_importances()) training_hooks.append(TensorForestRunOpAtEndHook(ops_at_end)) if early_stopping_rounds: training_hooks.append( TensorForestLossHook( early_stopping_rounds, early_stopping_loss_threshold=early_stopping_loss_threshold, loss_op=model_ops.loss)) model_ops.training_hooks.extend(training_hooks) if keys is not None: model_ops.predictions[keys_name] = keys if params.inference_tree_paths: model_ops.predictions[TREE_PATHS_PREDICTION_KEY] = tree_paths if params.regression: model_ops.predictions[VARIANCE_PREDICTION_KEY] = regression_variance return model_ops
def optimize_loss(loss, global_step, learning_rate, optimizer, gradient_noise_scale=None, gradient_multipliers=None, clip_gradients=None, learning_rate_decay_fn=None, update_ops=None, variables=None, name=None, summaries=None, colocate_gradients_with_ops=False, increment_global_step=True): """Given loss and parameters for optimizer, returns a training op. Various ways of passing optimizers, include: - string, name of the optimizer like 'SGD', 'Adam', see OPTIMIZER_CLS_NAMES for full list. E.g. `optimize_loss(..., optimizer='Adam')`. - function, takes learning rate `Tensor` as argument and must return `Optimizer` instance. E.g. `optimize_loss(..., optimizer=lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.5))`. Alternatively, if `learning_rate` is `None`, the function takes no arguments. E.g. `optimize_loss(..., learning_rate=None, optimizer=lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5))`. - class, subclass of `Optimizer` that takes only one required argument - learning rate, such as AdamOptimizer, AdagradOptimizer. E.g. `optimize_loss(..., optimizer=tf.train.AdagradOptimizer)`. - object, instance of subclass of `Optimizer`. E.g., `optimizer_loss(..., optimizer=tf.train.AdagradOptimizer(0.5))`. Args: loss: Scalar `Tensor`. global_step: Scalar int `Tensor`, step counter to update on each step unless `increment_global_step` is `False`. If not supplied, it will be fetched from the default graph (see `tf.train.get_global_step` for details). If it's not been created, no step will be incremented with each weight update. `learning_rate_decay_fn` requires `global_step`. learning_rate: float or `Tensor`, magnitude of update per each training step. Can be `None`. optimizer: string, class or optimizer instance, used as trainer. string should be name of optimizer, like 'SGD', 'Adam', 'Adagrad'. Full list in OPTIMIZER_CLS_NAMES constant. class should be sub-class of `tf.Optimizer` that implements `compute_gradients` and `apply_gradients` functions. optimizer instance should be instantiation of `tf.Optimizer` sub-class and have `compute_gradients` and `apply_gradients` functions. gradient_noise_scale: float or None, adds 0-mean normal noise scaled by this value. gradient_multipliers: dict of variables or variable names to floats. If present, gradients for specified variables will be multiplied by given constant. clip_gradients: float, callable or `None`. If float, is provided, a global clipping is applied to prevent the norm of the gradient to exceed this value. Alternatively, a callable can be provided e.g.: adaptive_clipping. This callable takes a `list` of `(gradients, variables)` `tuple`s and returns the same thing with the gradients modified. learning_rate_decay_fn: function, takes `learning_rate` and `global_step` `Tensor`s, returns `Tensor`. Can be used to implement any learning rate decay functions. For example: `tf.train.exponential_decay`. Ignored if `learning_rate` is not supplied. update_ops: list of update `Operation`s to execute at each step. If `None`, uses elements of UPDATE_OPS collection. The order of execution between `update_ops` and `loss` is non-deterministic. variables: list of variables to optimize or `None` to use all trainable variables. name: The name for this operation is used to scope operations and summaries. summaries: List of internal quantities to visualize on tensorboard. If not set only the loss and the learning rate will be reported. The complete list is in OPTIMIZER_SUMMARIES. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. increment_global_step: Whether to increment `global_step`. If your model calls `optimize_loss` multiple times per training step (e.g. to optimize different parts of the model), use this arg to avoid incrementing `global_step` more times than necessary. Returns: Training op. Raises: ValueError: if: * `loss` is an invalid type or shape. * `global_step` is an invalid type or shape. * `learning_rate` is an invalid type or value. * `optimizer` is wrong type. * `clip_gradients` is not float or callable. * `learning_rate` and `learning_rate_decay_fn` are supplied, but no `global_step` is available. * `gradients` is empty """ loss = ops.convert_to_tensor(loss) contrib_framework.assert_scalar(loss) if global_step is None: global_step = contrib_framework.get_global_step() else: contrib_framework.assert_global_step(global_step) with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]): # Update ops take UPDATE_OPS collection if not provided. if update_ops is None: update_ops = set(ops.get_collection(ops.GraphKeys.UPDATE_OPS)) # Make sure update ops are ran before computing loss. if update_ops: loss = control_flow_ops.with_dependencies(list(update_ops), loss) # Learning rate variable, with possible decay. lr = None if learning_rate is not None: if (isinstance(learning_rate, ops.Tensor) and learning_rate.get_shape().ndims == 0): lr = learning_rate elif isinstance(learning_rate, float): if learning_rate < 0.0: raise ValueError("Invalid learning_rate %s.", learning_rate) lr = vs.get_variable( "learning_rate", [], trainable=False, initializer=init_ops.constant_initializer(learning_rate)) else: raise ValueError("Learning rate should be 0d Tensor or float. " "Got %s of type %s" % (str(learning_rate), str(type(learning_rate)))) if summaries is None: summaries = ["loss", "learning_rate"] else: for summ in summaries: if summ not in OPTIMIZER_SUMMARIES: raise ValueError("Summaries should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_SUMMARIES), summ)) if learning_rate is not None and learning_rate_decay_fn is not None: if global_step is None: raise ValueError("global_step is required for learning_rate_decay_fn.") lr = learning_rate_decay_fn(lr, global_step) if "learning_rate" in summaries: summary.scalar("learning_rate", lr) # Create optimizer, given specified parameters. if isinstance(optimizer, six.string_types): if lr is None: raise ValueError("Learning rate is None, but should be specified if " "optimizer is string (%s)." % optimizer) if optimizer not in OPTIMIZER_CLS_NAMES: raise ValueError( "Optimizer name should be one of [%s], you provided %s." % (", ".join(OPTIMIZER_CLS_NAMES), optimizer)) opt = OPTIMIZER_CLS_NAMES[optimizer](learning_rate=lr) elif (isinstance(optimizer, type) and issubclass(optimizer, optimizer_.Optimizer)): if lr is None: raise ValueError("Learning rate is None, but should be specified if " "optimizer is class (%s)." % optimizer) opt = optimizer(learning_rate=lr) elif isinstance(optimizer, optimizer_.Optimizer): opt = optimizer elif callable(optimizer): if learning_rate is not None: opt = optimizer(lr) else: opt = optimizer() if not isinstance(opt, optimizer_.Optimizer): raise ValueError("Unrecognized optimizer: function should return " "subclass of Optimizer. Got %s." % str(opt)) else: raise ValueError("Unrecognized optimizer: should be string, " "subclass of Optimizer, instance of " "subclass of Optimizer or function with one argument. " "Got %s." % str(optimizer)) # All trainable variables, if specific variables are not specified. if variables is None: variables = vars_.trainable_variables() # Compute gradients. gradients = opt.compute_gradients( loss, variables, colocate_gradients_with_ops=colocate_gradients_with_ops) # Optionally add gradient noise. if gradient_noise_scale is not None: gradients = _add_scaled_noise_to_gradients(gradients, gradient_noise_scale) # Multiply some gradients. if gradient_multipliers is not None: gradients = _multiply_gradients(gradients, gradient_multipliers) if not gradients: raise ValueError( "Empty list of (gradient, var) pairs encountered. This is most " "likely to be caused by an improper value of gradient_multipliers.") if "gradient_norm" in summaries: summary.scalar("global_norm/gradient_norm", clip_ops.global_norm(list(zip(*gradients))[0])) # Optionally clip gradients by global norm. if isinstance(clip_gradients, float): gradients = _clip_gradients_by_norm(gradients, clip_gradients) elif callable(clip_gradients): gradients = clip_gradients(gradients) elif clip_gradients is not None: raise ValueError( "Unknown type %s for clip_gradients" % type(clip_gradients)) # Add scalar summary for loss. if "loss" in summaries: summary.scalar("loss", loss) # Add histograms for variables, gradients and gradient norms. for gradient, variable in gradients: if isinstance(gradient, ops.IndexedSlices): grad_values = gradient.values else: grad_values = gradient if grad_values is not None: var_name = variable.name.replace(":", "_") if "gradients" in summaries: summary.histogram("gradients/%s" % var_name, grad_values) if "gradient_norm" in summaries: summary.scalar("gradient_norm/%s" % var_name, clip_ops.global_norm([grad_values])) if clip_gradients is not None and "gradient_norm" in summaries: summary.scalar("global_norm/clipped_gradient_norm", clip_ops.global_norm(list(zip(*gradients))[0])) # Create gradient updates. grad_updates = opt.apply_gradients( gradients, global_step=global_step if increment_global_step else None, name="train") # Ensure the train_tensor computes grad_updates. train_tensor = control_flow_ops.with_dependencies([grad_updates], loss) return train_tensor
def conv_model_train_op(loss, mode): return layers.optimize_loss(loss, framework.get_global_step(), learning_rate=0.003, optimizer="Adam", # to remove learning rate decay, comment the next line learning_rate_decay_fn=lambda lr, step: 0.0001 + tf.train.exponential_decay(lr, step, -2000, math.e) ) if mode == learn.ModeKeys.TRAIN else None