def train_model(model_name): """ train a model and save it to folder '/trained_model/model_name' """ from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("MNIST_data", one_hot=True) model = CNNModel(image_size=[28, 28], char_number=10, channel=1) model.addLayer(Convolution2D(size=[5, 5], features=32)) model.addLayer(ReLU()) model.addLayer(MaxPool(size=[2, 2])) model.addLayer(Convolution2D(size=[3, 3], features=64)) model.addLayer(ReLU()) model.addLayer(MaxPool(size=[2, 2])) model.addLayer(FullyConnected(features=512)) model.addLayer(ReLU()) # model.addLayer(FullyConnected(features=512)) # model.addLayer(ReLU()) model.addLayer(FullyConnected(features=10)) model.addOutputLayer(Softmax()) model.train(dataset=mnist, eval_every=5, epochs=1000, evaluation_size=500, batch_size=100, optimizer=train.AdamOptimizer(0.005)) model_path = "trained_model/" + model_name + "/" + model_name model.save(model_path)
def train(X): r"""Optimization over variational lower bound. Args: X: A pixel matrix. Returns: KL_divergence: Distribution distance between the posterior and the prior, which can be analytically computed. generated_X: Generated X by decoding z. marginal_likelihood: Distribution similarity of X and Generated X, which is computed in a form of cross entropy. VLB: Variational lower bound. train_step: Optimization step. """ if FLAGS.decoder == 'Bernoulli': decoding_network = Bernoulli_decoding_network elif FLAGS.decoder == 'Gaussian': decoding_network = Gaussian_decoding_network KL_divergence, sampled_z = encoding_network(X, FLAGS.hidden_layer_neurons, FLAGS.z_dim, FLAGS.reg_coef) generated_X, marginal_likelihood = decoding_network( X, sampled_z, FLAGS.hidden_layer_neurons, FLAGS.reg_coef) VLB = KL_divergence + marginal_likelihood train_step = tft.AdamOptimizer(FLAGS.learning_rate).minimize(-VLB) return KL_divergence, generated_X, marginal_likelihood, VLB, train_step
class MyTfOptimizer(train.Optimizer): wrapping_optimizer = train.AdamOptimizer() def compute_gradients(self, loss, **kwargs): return super(MyTfOptimizer, self).compute_gradients(loss, **kwargs) def apply_gradients(self, grads_and_vars, **kwargs): return self.wrapping_optimizer.apply_gradients(grads_and_vars, **kwargs)
def test_tfoptimizer(): from keras import constraints from tensorflow import train optimizer = optimizers.TFOptimizer(train.AdamOptimizer()) model = Sequential() model.add(Dense(num_classes, input_shape=(3,), kernel_constraint=constraints.MaxNorm(1))) model.compile(loss='mean_squared_error', optimizer=optimizer) model.fit(np.random.random((5, 3)), np.random.random((5, num_classes)), epochs=1, batch_size=5, verbose=0) # not supported with pytest.raises(NotImplementedError): optimizer.weights with pytest.raises(NotImplementedError): optimizer.get_config() with pytest.raises(NotImplementedError): optimizer.from_config(None)
def model_fn(features, labels, mode): if 'images/encoded' in features: inputs = tf.map_fn(preprocess_image, features['images/encoded'], dtype=tf.float32) else: inputs = features['images'] inputs = tf.image.convert_image_dtype(inputs, dtype=tf.float32) inputs = (inputs - 0.5) * 2.0 model = getattr(sys.modules[__name__], 'model_' + flags.model) logits = model(inputs, mode == tf.estimator.ModeKeys.TRAIN, len(CATEGORIES)) predictions = tf.nn.softmax(logits) loss, train_op, metrics = None, None, None export_outputs = { 'classified': tf.estimator.export.ClassificationOutput( scores=tf.identity(predictions, name="scores"), classes=tf.constant(CATEGORIES, dtype=tf.string, name='classes') ) } if mode != tf.estimator.ModeKeys.PREDICT: labels = tf.cast(labels, tf.int64) loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(labels, logits)) if mode == tf.estimator.ModeKeys.TRAIN: tf.summary.scalar('loss', loss) tf.summary.scalar('image', inputs) for i, category in enumerate(CATEGORIES): tf.summary.image('image/' + category, tf.boolean_mask(inputs, tf.equal(labels, i))) batch_accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.arg_max(predictions, 1), labels), tf.float32), name='batch_accuracy') tf.summary.scalar('batch_accuracy', batch_accuracy) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = train.AdamOptimizer(learning_rate=flags.lr).minimize(loss, train.get_global_step()) if mode == tf.estimator.ModeKeys.EVAL: metrics = {'accuracy': tf.metrics.accuracy(labels, tf.arg_max(predictions, 1))} return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=metrics, export_outputs=export_outputs)
def __init__(self, learning_rate=0.1, scope="value_estimator"): with tf.variable_scope(scope): self.state = tf.placeholder(tf.int32, [], name='state') self.target = tf.placeholder(tf.float32, name='target') # Table lookup estimator state_one_hot = tf.one_hot(self.state, int(OBSERVATION_SPACE)) self.output_layer = layers.fully_connected( inputs=tf.expand_dims(state_one_hot, 0), num_outputs=1, activation_fn=None, weights_initializer=tf.zeros_initializer ) self.value_estimate = tf.squeeze(self.output_layer) self.loss = tf.squared_difference(self.value_estimate, self.target) self.optimizer = train.AdamOptimizer(learning_rate) self.train_op = self.optimizer.minimize( self.loss, global_step=train.get_global_step() )
def get_optimizer(cfg_parser, loss_op, var_list, global_step): required_params = ["OPTIMIZER_TYPE"] optim_cfg = cfg_parser.parse_and_return_dictionary("OPTIMIZER", required_params) gradient_clipping = None if "GRADIENT_CLIPPING" in optim_cfg: print("Found Gradient Clipping, will use", optim_cfg["GRADIENT_CLIPPING"], " for clipping norm.") gradient_clipping = optim_cfg["GRADIENT_CLIPPING"] if optim_cfg["OPTIMIZER_TYPE"] == "ADAM": required_params = ["LEARNING_RATE", "EPSILON"] adam_cfg = cfg_parser.parse_and_return_dictionary( "OPTIMIZER", required_params) from tensorflow import train optimizer = train.AdamOptimizer( learning_rate=adam_cfg["LEARNING_RATE"], epsilon=adam_cfg["EPSILON"]) else: raise NotImplementedError if gradient_clipping is None: return optimizer.minimize(loss_op, var_list=var_list, global_step=global_step) else: import tensorflow as tf gradients, variables = zip( *optimizer.compute_gradients(loss_op, var_list)) gradients, _ = tf.clip_by_global_norm(gradients, gradient_clipping) return optimizer.apply_gradients(zip(gradients, variables), global_step=global_step)
def __init__(self, learning_rate=0.01, scope='policy_estimator'): with tf.variable_scope(scope): self.state = tf.placeholder(tf.int32, [], 'state') self.action = tf.placeholder(dtype=tf.int32, name='action') self.target = tf.placeholder(dtype=tf.float32, name='target') # Table look up estimator state_one_hot = tf.one_hot(self.state, int(OBSERVATION_SPACE)) self.output_layer = layers.fully_connected( inputs=tf.expand_dims(state_one_hot, 0), num_outputs=ACTION_SPACE, activation_fn=None, weights_initializer=tf.zeros_initializer ) self.action_probs = tf.squeeze(nn.softmax(self.output_layer)) self.picked_action_probs = tf.gather(self.action_probs, self.action) # Loss and train op self.loss = -tf.log(self.picked_action_probs) * self.target self.optimizer = train.AdamOptimizer(learning_rate=learning_rate) self.train_op = self.optimizer.minimize(self.loss, global_step=train.get_global_step())
def __init__( self, name, coder, dataset, z_dim=300, supervised_weight=1.0, distance_weight=1.0, learning_rate=1e-3, cw_weight=1.0, init=1.0): tf.reset_default_graph() self.name = name self.init = init self.optimizer = tft.AdamOptimizer(learning_rate) self.cw_weight = cw_weight self.z_dim = z_dim x_dim = dataset.x_dim # Prepare placeholders tensor_x = tf.placeholder( shape=[None, x_dim], dtype=tf.float32, name='input_x') tensor_labels = tf.placeholder( shape=[None, dataset.classes_num], dtype=tf.float32, name='target_y') train_labeled = tf.placeholder_with_default(True, shape=[]) tensor_cw_weight = tf.placeholder_with_default(cw_weight, shape=[]) tensor_training = tf.placeholder_with_default(False, shape=[]) labeled_mask = get_labels_mask(tensor_labels) tensor_z = coder.encode(tensor_x, z_dim, tensor_training) tensor_y = coder.decode(tensor_z, x_dim, tensor_training) # Unsupervised examples are treated differently than supervised: unsupervised_tensor_z = tf.cond( train_labeled, lambda: tensor_z, lambda: tf.boolean_mask(tensor_z, tf.logical_not(labeled_mask))) N0 = tf.shape(unsupervised_tensor_z)[0] means, variances, probs = get_gaussians( z_dim, init, dataset, dataset.classes_num) gamma = tf.pow(4 / (3 * N0 / dataset.classes_num), 0.4) gamma = tf.cast(gamma, tf.float32) class_logits = calculate_logits( tensor_z, means, variances, probs) class_probs = tf.nn.softmax(class_logits) class_cost = calculate_logits_cost( class_logits, tensor_labels, labeled_mask) cw_cost = cramer_wold_distance( unsupervised_tensor_z, means, variances, probs, gamma) log_cw_cost = tf.log(cw_cost) log_cw_cost *= tensor_cw_weight # MSE rec_cost = norm_squared(tensor_x - tensor_y, axis=-1) rec_cost = tf.cond( train_labeled, lambda: tf.reduce_mean(rec_cost), lambda: tf.reduce_mean( tf.boolean_mask(rec_cost, tf.logical_not(labeled_mask))) ) distance_cost = linear_distance_penalty( z_dim, means, variances, probs, dataset.classes_num) unsupervised_cost = tf.reduce_mean( rec_cost + log_cw_cost + distance_weight * distance_cost) full_cost = tf.reduce_mean( rec_cost + log_cw_cost + supervised_weight * class_cost + distance_weight * distance_cost ) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # Prepare various train ops grads, gvars = zip(*self.optimizer.compute_gradients(full_cost)) grads, _ = tf.clip_by_global_norm(grads, 5.0) capped_gvs = [ (tf.clip_by_value(grad, -1., 1.), var) for grad, var in zip(grads, gvars) ] train_op = self.optimizer.apply_gradients(capped_gvs) class_train_op = self.optimizer.minimize(class_cost) rec_train_op = self.optimizer.minimize(rec_cost) cw_train_op = self.optimizer.minimize(log_cw_cost) supervised_train_op = self.optimizer.minimize(class_cost) # Prepare variables for outside use self.z_dim = z_dim self.x_dim = x_dim self.saver = tf.train.Saver(max_to_keep=10000) self.placeholders = { "X": tensor_x, "y": tensor_labels, "train_labeled": train_labeled, "cw_weight": tensor_cw_weight, "training": tensor_training, } self.out = { "logits": class_logits, "probs": class_probs, "z": tensor_z, "y": tensor_y, } self.gausses = { "means": means, "variations": variances, "probs": probs} self.costs = { "class": class_cost, "cw": log_cw_cost, "reconstruction": rec_cost, "distance": distance_cost, "full": full_cost, "unsupervised": unsupervised_cost, } self.train_ops = { "full": train_op, "supervised": supervised_train_op, "rec": rec_train_op, "class": class_train_op, "cw": cw_train_op, } self.train_op = train_op self.supervised_train_op = supervised_train_op self.preds = class_logits
def loss(target_y, predicted_y): return tf.reduce_mean(tf.square(target_y - predicted_y)) # Loss for the embedder network loss = tf.losses.mean_squared_error(df, X_tilde) E_loss0 = 10 * tf.sqrt(loss) from tensorflow.train import GradientDescentOptimizer opt = GradientDescentOptimizer(0.1) grads_and_vars = opt.compute_gradients(loss, e_vars) opt.apply_gradients(grads_and_vars) from tensorflow import train # Define the optimizer and the list of variables it should update E0_solver = train.AdamOptimizer().minimize(E_loss0, var_list= e_vars + r_vars) # Start the tensorflow sessions sess = tf.Session() sess.run(tf.global_variables_initializer()) # Start the embedding learning print('Start Embedding Network Training') for itt in range(10): sess.run([E0_solver], feed_dict={df: df}) # Build a RNN generator network def generator(Z): generator = Sequential(name= 'generator') generator.add(LSTM(units=15, return_sequences=True, input_shape=(20, 5)))
def main(x, y, training_fraction=0.80, learning_rate=0.001, epochs=1000, batch_size=1000, update_summary_at=100): """ :param x: shape = m * 786 :param y: shape = m * 10 :param training_fraction: :param epochs: :param batch_size: :param update_summary_at: :return: """ training_size = int(len(x) * training_fraction) # if last batch size is less than half of desired batch size then throwing exception. # In future, instead of throwing exception we may avoid using this last batch. assert training_size % batch_size == 0 or training_size % batch_size > batch_size / 2 last_batch_size = training_size % batch_size _data = train_test_split(x, y, train_size=training_fraction, stratify=y.argmax(1), random_state=0) # training_data_x, training_data_y = x[:training_size], y[:training_size] # testing_data_x, testing_data_y = x[training_size:], y[training_size:] training_data_x, training_data_y = _data[0], _data[2] testing_data_x, testing_data_y = _data[1], _data[3] feature_size = training_data_x.shape[1] hidden_nu = 20 output_size = training_data_y.shape[1] x = placeholder(float32, [None, feature_size], name='x') y = placeholder(float32, [None, output_size], name='y') # also check xavier_initializer W1 = Variable(random_normal([feature_size, hidden_nu], seed=1, dtype=float32), name='W1') b1 = Variable(random_normal([hidden_nu], dtype=float32, seed=2), name='b1') # use zeros also W2 = Variable(random_normal([hidden_nu, output_size], seed=3, dtype=float32), name='W2') b2 = Variable(random_normal([output_size], dtype=float32, seed=4), name='b2') L0_L1 = x @ W1 + b1 L1_L1 = nn.relu(L0_L1) L1_L2 = L1_L1 @ W2 + b2 L2_L2 = nn.softmax(L1_L2) cost = reduce_mean(nn.softmax_cross_entropy_with_logits_v2(logits=L2_L2, labels=y), name='cost') optimization = train.AdamOptimizer(learning_rate=learning_rate).minimize( cost, name='optimization') init = global_variables_initializer() current_predictions = equal(argmax(L2_L2, axis=1), argmax(y, axis=1)) accuracy = tf.round( 10000 * reduce_mean(cast(current_predictions, float32))) / 100 with Session() as sess: writer = summary.FileWriter('mnist/visualize', graph=sess.graph) cost_summary = summary.scalar('cost', cost) training_accuracy_summary = summary.scalar('training accuracy', accuracy) testing_accuracy_summary = summary.scalar('testing accuracy', accuracy) sess.run(init) # --------------------------------------------------------------------------------- for e in range(epochs): _idx = RandomState(e).permutation( training_size) # check how much does it matter to add # uniformity of data in each batch. total_cost = 0 def mini_batch(start_idx, end_idx): curr_idx = _idx[start_idx:end_idx] _x = training_data_x[curr_idx] _y = training_data_y[curr_idx] _, c = sess.run([optimization, cost], feed_dict={x: _x, y: _y}) return (end_idx - start_idx) * c for i in range(0, training_size, batch_size): total_cost += mini_batch(i, min(i + batch_size, training_size)) if last_batch_size != 0: total_cost += mini_batch(training_size - last_batch_size, training_size) print('epoch:', e, 'total cost:', round( total_cost, 3)) # check how this 'total_cost' can be fed into summary. if e % update_summary_at == 0: _total_cost, training_accuracy = sess.run( [cost_summary, training_accuracy_summary], feed_dict={ x: training_data_x, y: training_data_y }) writer.add_summary(_total_cost, e) writer.add_summary(training_accuracy, e) testing_accuracy = sess.run(testing_accuracy_summary, feed_dict={ x: testing_data_x, y: testing_data_y }) writer.add_summary(testing_accuracy, e) writer.close()