def get_predict_ops(encoder_predict_input, decoder_predict_input, params, reuse=False): encoder_predict_target = None decoder_predict_target = None my_encoder = encoder.Model(encoder_predict_input, encoder_predict_target, params, tf.estimator.ModeKeys.PREDICT, 'Encoder', reuse) encoder_outputs = my_encoder.encoder_outputs encoder_state = my_encoder.arch_emb encoder_state.set_shape([None, params['decoder_hidden_size']]) encoder_state = tf.contrib.rnn.LSTMStateTuple(encoder_state, encoder_state) encoder_state = (encoder_state, ) * params['decoder_num_layers'] my_decoder = decoder.Model(encoder_outputs, encoder_state, decoder_predict_input, decoder_predict_target, params, tf.estimator.ModeKeys.PREDICT, 'Decoder', reuse) arch_emb, predict_value, new_arch_emb, new_arch_outputs = my_encoder.infer( ) sample_id = my_decoder.decode() encoder_state = new_arch_emb encoder_state.set_shape([None, params['decoder_hidden_size']]) encoder_state = tf.contrib.rnn.LSTMStateTuple(encoder_state, encoder_state) encoder_state = (encoder_state, ) * params['decoder_num_layers'] tf.get_variable_scope().reuse_variables() my_decoder = decoder.Model(new_arch_outputs, encoder_state, decoder_predict_input, decoder_predict_target, params, tf.estimator.ModeKeys.PREDICT, 'Decoder') new_sample_id = my_decoder.decode() return predict_value, sample_id, new_sample_id
def get_test_ops(encoder_test_input, encoder_test_target, decoder_test_input, decoder_test_target, params, reuse=False): my_encoder = encoder.Model(encoder_test_input, encoder_test_target, params, tf.estimator.ModeKeys.EVAL, 'Encoder', reuse) encoder_outputs = my_encoder.encoder_outputs encoder_state = my_encoder.arch_emb encoder_state.set_shape([None, params['decoder_hidden_size']]) encoder_state = tf.contrib.rnn.LSTMStateTuple(encoder_state, encoder_state) encoder_state = (encoder_state, ) * params['decoder_num_layers'] my_decoder = decoder.Model(encoder_outputs, encoder_state, decoder_test_input, decoder_test_target, params, tf.estimator.ModeKeys.EVAL, 'Decoder', reuse) encoder_loss = my_encoder.loss decoder_loss = my_decoder.loss predict_value = my_encoder.predict_value cross_entropy = decoder_loss total_loss = params['trade_off'] * encoder_loss + ( 1 - params['trade_off'] ) * decoder_loss + params['weight_decay'] * tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables()]) return cross_entropy, total_loss, predict_value, encoder_test_target
def get_train_ops(encoder_train_input, encoder_train_target, decoder_train_input, decoder_train_target, params, reuse=False): global_step = tf.train.get_or_create_global_step() learning_rate = tf.constant(params['lr']) if params['optimizer'] == "sgd": learning_rate = tf.cond( global_step < params['start_decay_step'], lambda: learning_rate, lambda: tf.train.exponential_decay( learning_rate, (global_step - params['start_decay_step']), params['decay_steps'], params['decay_factor'], staircase=True), name="calc_learning_rate") opt = tf.train.GradientDescentOptimizer(learning_rate) elif params['optimizer'] == "adam": assert float(params['lr']) <= 0.001, "! High Adam learning rate %g" % params['lr'] opt = tf.train.AdamOptimizer(learning_rate) elif params['optimizer'] == 'adadelta': opt = tf.train.AdadeltaOptimizer(learning_rate=learning_rate) tf.summary.scalar("learning_rate", learning_rate) my_encoder = encoder.Model(encoder_train_input, encoder_train_target, params, tf.estimator.ModeKeys.TRAIN, 'Encoder', reuse) encoder_outputs = my_encoder.encoder_outputs encoder_state = my_encoder.arch_emb encoder_state.set_shape([None, params['decoder_hidden_size']]) encoder_state = tf.contrib.rnn.LSTMStateTuple(encoder_state, encoder_state) encoder_state = (encoder_state,) * params['decoder_num_layers'] my_decoder = decoder.Model(encoder_outputs, encoder_state, decoder_train_input, decoder_train_target, params, tf.estimator.ModeKeys.TRAIN, 'Decoder', reuse) encoder_loss = my_encoder.loss decoder_loss = my_decoder.loss mse = encoder_loss cross_entropy = decoder_loss total_loss = params['trade_off'] * encoder_loss + (1 - params['trade_off']) * decoder_loss + params['weight_decay'] * tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables()]) tf.summary.scalar('training_loss', total_loss) var_list = tf.trainable_variables() update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): gradients, variables = zip(*opt.compute_gradients(total_loss)) grad_norm = tf.global_norm(gradients) clipped_gradients, _ = tf.clip_by_global_norm(gradients, params['max_gradient_norm']) train_op = opt.apply_gradients( zip(clipped_gradients, variables), global_step=global_step) return mse, cross_entropy, total_loss, learning_rate, train_op, global_step, grad_norm
def model_fn(features, labels, mode, params): if mode == tf.estimator.ModeKeys.TRAIN: encoder_input = features['encoder_input'] encoder_target = features['encoder_target'] decoder_input = features['decoder_input'] decoder_target = features['decoder_target'] my_encoder = encoder.Model(encoder_input, encoder_target, params, mode, 'Encoder') #my_encoder_sym = encoder.Model(encoder_input[:,params['source_length']:], encoder_target, params, mode, 'Encoder', True) encoder_outputs = my_encoder.encoder_outputs encoder_state = my_encoder.arch_emb encoder_state.set_shape([None, params['decoder_hidden_size']]) encoder_state = tf.contrib.rnn.LSTMStateTuple(encoder_state, encoder_state) encoder_state = (encoder_state, ) * params['decoder_num_layers'] my_decoder = decoder.Model(encoder_outputs, encoder_state, decoder_input, decoder_target, params, mode, 'Decoder') encoder_loss = my_encoder.loss decoder_loss = my_decoder.loss total_loss = params['trade_off'] * encoder_loss + ( 1 - params['trade_off'] ) * decoder_loss + params['weight_decay'] * tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables()]) global_step = tf.train.get_or_create_global_step() learning_rate = tf.constant(params['lr']) if params['optimizer'] == "sgd": learning_rate = tf.cond( global_step < params['start_decay_step'], lambda: learning_rate, lambda: tf.train.exponential_decay(learning_rate, ( global_step - params['start_decay_step']), params['decay_steps'], params['decay_factor'], staircase=True), name="calc_learning_rate") opt = tf.train.GradientDescentOptimizer(learning_rate) elif params['optimizer'] == "adam": assert float( params['lr'] ) <= 0.001, "! High Adam learning rate %g" % params['lr'] opt = tf.train.AdamOptimizer(learning_rate) elif params['optimizer'] == 'adadelta': opt = tf.train.AdadeltaOptimizer(learning_rate=learning_rate) global_step = tf.train.get_or_create_global_step() learning_rate = tf.constant(params['lr']) if params['optimizer'] == "sgd": learning_rate = tf.cond( global_step < params['start_decay_step'], lambda: learning_rate, lambda: tf.train.exponential_decay(learning_rate, ( global_step - params['start_decay_step']), params['decay_steps'], params['decay_factor'], staircase=True), name="calc_learning_rate") opt = tf.train.GradientDescentOptimizer(learning_rate) elif params['optimizer'] == "adam": assert float( params['lr'] ) <= 0.001, "! High Adam learning rate %g" % params['lr'] opt = tf.train.AdamOptimizer(learning_rate) elif params['optimizer'] == 'adadelta': opt = tf.train.AdadeltaOptimizer(learning_rate=learning_rate) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): gradients, variables = zip(*opt.compute_gradients(total_loss)) clipped_gradients, _ = tf.clip_by_global_norm( gradients, params['max_gradient_norm']) train_op = opt.apply_gradients(zip(clipped_gradients, variables), global_step=global_step) tf.identity(learning_rate, 'learning_rate') tf.summary.scalar("learning_rate", learning_rate), tf.summary.scalar("total_loss", total_loss), #_log_variable_sizes(tf.trainable_variables(), "Trainable Variables") return tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: encoder_input = features['encoder_input'] encoder_target = features['encoder_target'] decoder_input = features['decoder_input'] decoder_target = features['decoder_target'] my_encoder = encoder.Model(encoder_input, encoder_target, params, mode, 'Encoder') encoder_outputs = my_encoder.encoder_outputs #encoder_state = my_encoder.encoder_state encoder_state = my_encoder.arch_emb encoder_state.set_shape([None, params['decoder_hidden_size']]) encoder_state = tf.contrib.rnn.LSTMStateTuple(encoder_state, encoder_state) encoder_state = (encoder_state, ) * params['decoder_num_layers'] my_decoder = decoder.Model(encoder_outputs, encoder_state, decoder_input, decoder_target, params, mode, 'Decoder') encoder_loss = my_encoder.loss decoder_loss = my_decoder.loss total_loss = params['trade_off'] * encoder_loss + ( 1 - params['trade_off'] ) * decoder_loss + params['weight_decay'] * tf.add_n( [tf.nn.l2_loss(v) for v in tf.trainable_variables()]) #_log_variable_sizes(tf.trainable_variables(), "Trainable Variables") return tf.estimator.EstimatorSpec(mode=mode, loss=total_loss) elif mode == tf.estimator.ModeKeys.PREDICT: encoder_input = features['encoder_input'] encoder_target = features.get('encoder_target', None) decoder_input = features.get('decoder_input', None) decoder_target = features.get('decoder_target', None) my_encoder = encoder.Model(encoder_input, encoder_target, params, mode, 'Encoder') encoder_outputs = my_encoder.encoder_outputs #encoder_state = my_encoder.encoder_state encoder_state = my_encoder.arch_emb encoder_state.set_shape([None, params['decoder_hidden_size']]) encoder_state = tf.contrib.rnn.LSTMStateTuple(encoder_state, encoder_state) encoder_state = (encoder_state, ) * params['decoder_num_layers'] my_decoder = decoder.Model(encoder_outputs, encoder_state, decoder_input, decoder_target, params, mode, 'Decoder') res = my_encoder.infer() predict_value = res['predict_value'] arch_emb = res['arch_emb'] new_arch_emb = res['new_arch_emb'] new_arch_outputs = res['new_arch_outputs'] res = my_decoder.decode() sample_id = res['sample_id'] encoder_state = new_arch_emb encoder_state.set_shape([None, params['decoder_hidden_size']]) encoder_state = tf.contrib.rnn.LSTMStateTuple(encoder_state, encoder_state) encoder_state = (encoder_state, ) * params['decoder_num_layers'] tf.get_variable_scope().reuse_variables() my_decoder = decoder.Model(new_arch_outputs, encoder_state, decoder_input, decoder_target, params, mode, 'Decoder') res = my_decoder.decode() new_sample_id = res['sample_id'] #_log_variable_sizes(tf.trainable_variables(), "Trainable Variables") predictions = { 'arch': decoder_target, 'ground_truth_value': encoder_target, 'predict_value': predict_value, 'sample_id': sample_id, 'new_sample_id': new_sample_id, } _del_dict_nones(predictions) return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)