def train_model(model_name):
    """
        train a model and save it to folder '/trained_model/model_name'
    """
    from tensorflow.examples.tutorials.mnist import input_data
    mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
    model = CNNModel(image_size=[28, 28], char_number=10, channel=1)

    model.addLayer(Convolution2D(size=[5, 5], features=32))
    model.addLayer(ReLU())
    model.addLayer(MaxPool(size=[2, 2]))

    model.addLayer(Convolution2D(size=[3, 3], features=64))
    model.addLayer(ReLU())
    model.addLayer(MaxPool(size=[2, 2]))

    model.addLayer(FullyConnected(features=512))
    model.addLayer(ReLU())

    # model.addLayer(FullyConnected(features=512))
    # model.addLayer(ReLU())

    model.addLayer(FullyConnected(features=10))
    model.addOutputLayer(Softmax())

    model.train(dataset=mnist,
                eval_every=5,
                epochs=1000,
                evaluation_size=500,
                batch_size=100,
                optimizer=train.AdamOptimizer(0.005))

    model_path = "trained_model/" + model_name + "/" + model_name
    model.save(model_path)
Exemple #2
0
def train(X):
    r"""Optimization over variational lower bound.

    Args:
      X: A pixel matrix.

    Returns:
      KL_divergence: Distribution distance between the posterior and the prior, 
                     which can be analytically computed.
      generated_X: Generated X by decoding z.
      marginal_likelihood: Distribution similarity of X and Generated X, 
                           which is computed in a form of cross entropy.
      VLB: Variational lower bound.
      train_step: Optimization step.
    """
    if FLAGS.decoder == 'Bernoulli':
        decoding_network = Bernoulli_decoding_network
    elif FLAGS.decoder == 'Gaussian':
        decoding_network = Gaussian_decoding_network
    KL_divergence, sampled_z = encoding_network(X, FLAGS.hidden_layer_neurons,
                                                FLAGS.z_dim, FLAGS.reg_coef)
    generated_X, marginal_likelihood = decoding_network(
        X, sampled_z, FLAGS.hidden_layer_neurons, FLAGS.reg_coef)
    VLB = KL_divergence + marginal_likelihood
    train_step = tft.AdamOptimizer(FLAGS.learning_rate).minimize(-VLB)
    return KL_divergence, generated_X, marginal_likelihood, VLB, train_step
Exemple #3
0
    class MyTfOptimizer(train.Optimizer):
        wrapping_optimizer = train.AdamOptimizer()

        def compute_gradients(self, loss, **kwargs):
            return super(MyTfOptimizer, self).compute_gradients(loss, **kwargs)

        def apply_gradients(self, grads_and_vars, **kwargs):
            return self.wrapping_optimizer.apply_gradients(grads_and_vars,
                                                           **kwargs)
Exemple #4
0
def test_tfoptimizer():
    from keras import constraints
    from tensorflow import train
    optimizer = optimizers.TFOptimizer(train.AdamOptimizer())
    model = Sequential()
    model.add(Dense(num_classes, input_shape=(3,), kernel_constraint=constraints.MaxNorm(1)))
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    model.fit(np.random.random((5, 3)), np.random.random((5, num_classes)),
              epochs=1, batch_size=5, verbose=0)
    # not supported
    with pytest.raises(NotImplementedError):
        optimizer.weights
    with pytest.raises(NotImplementedError):
        optimizer.get_config()
    with pytest.raises(NotImplementedError):
        optimizer.from_config(None)
Exemple #5
0
def model_fn(features, labels, mode):
    if 'images/encoded' in features:
        inputs = tf.map_fn(preprocess_image, features['images/encoded'], dtype=tf.float32)
    else:
        inputs = features['images']
        inputs = tf.image.convert_image_dtype(inputs, dtype=tf.float32)
        inputs = (inputs - 0.5) * 2.0
    model = getattr(sys.modules[__name__], 'model_' + flags.model)

    logits = model(inputs, mode == tf.estimator.ModeKeys.TRAIN, len(CATEGORIES))

    predictions = tf.nn.softmax(logits)
    loss, train_op, metrics = None, None, None
    export_outputs = {
        'classified': tf.estimator.export.ClassificationOutput(
            scores=tf.identity(predictions, name="scores"),
            classes=tf.constant(CATEGORIES, dtype=tf.string, name='classes')
        )
    }

    if mode != tf.estimator.ModeKeys.PREDICT:
        labels = tf.cast(labels, tf.int64)
        loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(labels, logits))

    if mode == tf.estimator.ModeKeys.TRAIN:
        tf.summary.scalar('loss', loss)
        tf.summary.scalar('image', inputs)
        for i, category in enumerate(CATEGORIES):
            tf.summary.image('image/' + category, tf.boolean_mask(inputs, tf.equal(labels, i)))

        batch_accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.arg_max(predictions, 1), labels), tf.float32), name='batch_accuracy')
        tf.summary.scalar('batch_accuracy', batch_accuracy)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = train.AdamOptimizer(learning_rate=flags.lr).minimize(loss, train.get_global_step())

    if mode == tf.estimator.ModeKeys.EVAL:
        metrics = {'accuracy': tf.metrics.accuracy(labels, tf.arg_max(predictions, 1))}

    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=metrics,
                                      export_outputs=export_outputs)
    def __init__(self, learning_rate=0.1, scope="value_estimator"):
        with tf.variable_scope(scope):
            self.state = tf.placeholder(tf.int32, [], name='state')
            self.target = tf.placeholder(tf.float32, name='target')

            # Table lookup estimator
            state_one_hot = tf.one_hot(self.state, int(OBSERVATION_SPACE))
            self.output_layer = layers.fully_connected(
                inputs=tf.expand_dims(state_one_hot, 0),
                num_outputs=1,
                activation_fn=None,
                weights_initializer=tf.zeros_initializer
            )

            self.value_estimate = tf.squeeze(self.output_layer)
            self.loss = tf.squared_difference(self.value_estimate, self.target)

            self.optimizer = train.AdamOptimizer(learning_rate)
            self.train_op = self.optimizer.minimize(
                self.loss, global_step=train.get_global_step()
            )
Exemple #7
0
def get_optimizer(cfg_parser, loss_op, var_list, global_step):
    required_params = ["OPTIMIZER_TYPE"]
    optim_cfg = cfg_parser.parse_and_return_dictionary("OPTIMIZER",
                                                       required_params)

    gradient_clipping = None

    if "GRADIENT_CLIPPING" in optim_cfg:
        print("Found Gradient Clipping, will use",
              optim_cfg["GRADIENT_CLIPPING"], " for clipping norm.")
        gradient_clipping = optim_cfg["GRADIENT_CLIPPING"]

    if optim_cfg["OPTIMIZER_TYPE"] == "ADAM":
        required_params = ["LEARNING_RATE", "EPSILON"]
        adam_cfg = cfg_parser.parse_and_return_dictionary(
            "OPTIMIZER", required_params)

        from tensorflow import train
        optimizer = train.AdamOptimizer(
            learning_rate=adam_cfg["LEARNING_RATE"],
            epsilon=adam_cfg["EPSILON"])

    else:
        raise NotImplementedError

    if gradient_clipping is None:
        return optimizer.minimize(loss_op,
                                  var_list=var_list,
                                  global_step=global_step)

    else:
        import tensorflow as tf

        gradients, variables = zip(
            *optimizer.compute_gradients(loss_op, var_list))
        gradients, _ = tf.clip_by_global_norm(gradients, gradient_clipping)
        return optimizer.apply_gradients(zip(gradients, variables),
                                         global_step=global_step)
    def __init__(self, learning_rate=0.01, scope='policy_estimator'):
        with tf.variable_scope(scope):
            self.state = tf.placeholder(tf.int32, [], 'state')
            self.action = tf.placeholder(dtype=tf.int32, name='action')
            self.target = tf.placeholder(dtype=tf.float32, name='target')

            # Table look up estimator
            state_one_hot = tf.one_hot(self.state, int(OBSERVATION_SPACE))
            self.output_layer = layers.fully_connected(
                inputs=tf.expand_dims(state_one_hot, 0),
                num_outputs=ACTION_SPACE,
                activation_fn=None,
                weights_initializer=tf.zeros_initializer
            )

            self.action_probs = tf.squeeze(nn.softmax(self.output_layer))
            self.picked_action_probs = tf.gather(self.action_probs, self.action)

            # Loss and train op
            self.loss = -tf.log(self.picked_action_probs) * self.target

            self.optimizer = train.AdamOptimizer(learning_rate=learning_rate)
            self.train_op = self.optimizer.minimize(self.loss,
                                                    global_step=train.get_global_step())
Exemple #9
0
    def __init__(
            self, name, coder, dataset,
            z_dim=300, supervised_weight=1.0, distance_weight=1.0,
            learning_rate=1e-3, cw_weight=1.0, init=1.0):

        tf.reset_default_graph()
        self.name = name
        self.init = init
        self.optimizer = tft.AdamOptimizer(learning_rate)
        self.cw_weight = cw_weight

        self.z_dim = z_dim
        x_dim = dataset.x_dim

        # Prepare placeholders
        tensor_x = tf.placeholder(
                shape=[None, x_dim],
                dtype=tf.float32, name='input_x')
        tensor_labels = tf.placeholder(
                shape=[None, dataset.classes_num],
                dtype=tf.float32, name='target_y')

        train_labeled = tf.placeholder_with_default(True, shape=[])
        tensor_cw_weight = tf.placeholder_with_default(cw_weight, shape=[])
        tensor_training = tf.placeholder_with_default(False, shape=[])

        labeled_mask = get_labels_mask(tensor_labels)
        tensor_z = coder.encode(tensor_x, z_dim, tensor_training)
        tensor_y = coder.decode(tensor_z, x_dim, tensor_training)

        # Unsupervised examples are treated differently than supervised:
        unsupervised_tensor_z = tf.cond(
            train_labeled,
            lambda: tensor_z,
            lambda: tf.boolean_mask(tensor_z, tf.logical_not(labeled_mask)))
        N0 = tf.shape(unsupervised_tensor_z)[0]

        means, variances, probs = get_gaussians(
                z_dim, init, dataset, dataset.classes_num)

        gamma = tf.pow(4 / (3 * N0 / dataset.classes_num), 0.4)
        gamma = tf.cast(gamma, tf.float32)

        class_logits = calculate_logits(
            tensor_z, means, variances, probs)
        class_probs = tf.nn.softmax(class_logits)
        class_cost = calculate_logits_cost(
                class_logits, tensor_labels, labeled_mask)

        cw_cost = cramer_wold_distance(
                unsupervised_tensor_z, means, variances, probs, gamma)
        log_cw_cost = tf.log(cw_cost)
        log_cw_cost *= tensor_cw_weight

        # MSE
        rec_cost = norm_squared(tensor_x - tensor_y, axis=-1)
        rec_cost = tf.cond(
            train_labeled,
            lambda: tf.reduce_mean(rec_cost),
            lambda: tf.reduce_mean(
                tf.boolean_mask(rec_cost, tf.logical_not(labeled_mask)))
        )

        distance_cost = linear_distance_penalty(
                z_dim, means, variances, probs, dataset.classes_num)

        unsupervised_cost = tf.reduce_mean(
                rec_cost
                + log_cw_cost
                + distance_weight * distance_cost)

        full_cost = tf.reduce_mean(
                rec_cost
                + log_cw_cost
                + supervised_weight * class_cost
                + distance_weight * distance_cost
                )

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            # Prepare various train ops
            grads, gvars = zip(*self.optimizer.compute_gradients(full_cost))
            grads, _ = tf.clip_by_global_norm(grads, 5.0)
            capped_gvs = [
                    (tf.clip_by_value(grad, -1., 1.), var)
                    for grad, var in zip(grads, gvars)
                ]
            train_op = self.optimizer.apply_gradients(capped_gvs)

            class_train_op = self.optimizer.minimize(class_cost)
            rec_train_op = self.optimizer.minimize(rec_cost)
            cw_train_op = self.optimizer.minimize(log_cw_cost)
            supervised_train_op = self.optimizer.minimize(class_cost)

        # Prepare variables for outside use
        self.z_dim = z_dim
        self.x_dim = x_dim
        self.saver = tf.train.Saver(max_to_keep=10000)
        self.placeholders = {
            "X": tensor_x,
            "y": tensor_labels,
            "train_labeled": train_labeled,
            "cw_weight": tensor_cw_weight,
            "training": tensor_training,
        }

        self.out = {
            "logits": class_logits,
            "probs": class_probs,
            "z": tensor_z,
            "y": tensor_y,
        }

        self.gausses = {
            "means": means,
            "variations": variances,
            "probs": probs}

        self.costs = {
            "class": class_cost,
            "cw": log_cw_cost,
            "reconstruction": rec_cost,
            "distance": distance_cost,
            "full": full_cost,
            "unsupervised": unsupervised_cost,
        }

        self.train_ops = {
            "full": train_op,
            "supervised": supervised_train_op,
            "rec": rec_train_op,
            "class": class_train_op,
            "cw": cw_train_op,
        }

        self.train_op = train_op
        self.supervised_train_op = supervised_train_op
        self.preds = class_logits
def loss(target_y, predicted_y):
  return tf.reduce_mean(tf.square(target_y - predicted_y))

# Loss for the embedder network
loss = tf.losses.mean_squared_error(df, X_tilde)
E_loss0 = 10 * tf.sqrt(loss)

from tensorflow.train import GradientDescentOptimizer

opt = GradientDescentOptimizer(0.1)
grads_and_vars = opt.compute_gradients(loss, e_vars)
opt.apply_gradients(grads_and_vars)

from tensorflow import train
# Define the optimizer and the list of variables it should update
E0_solver = train.AdamOptimizer().minimize(E_loss0, var_list= e_vars + r_vars)

# Start the tensorflow sessions
sess = tf.Session()

sess.run(tf.global_variables_initializer())

# Start the embedding learning
print('Start Embedding Network Training')
for itt in range(10):
sess.run([E0_solver], feed_dict={df: df})        

# Build a RNN generator network
def generator(Z):
    generator = Sequential(name= 'generator')
    generator.add(LSTM(units=15, return_sequences=True, input_shape=(20, 5)))
def main(x,
         y,
         training_fraction=0.80,
         learning_rate=0.001,
         epochs=1000,
         batch_size=1000,
         update_summary_at=100):
    """
    :param x: shape = m * 786
    :param y: shape = m * 10
    :param training_fraction:
    :param epochs:
    :param batch_size:
    :param update_summary_at:
    :return:
    """
    training_size = int(len(x) * training_fraction)

    # if last batch size is less than half of desired batch size then throwing exception.
    # In future, instead of throwing exception we may avoid using this last batch.

    assert training_size % batch_size == 0 or training_size % batch_size > batch_size / 2
    last_batch_size = training_size % batch_size

    _data = train_test_split(x,
                             y,
                             train_size=training_fraction,
                             stratify=y.argmax(1),
                             random_state=0)

    # training_data_x, training_data_y = x[:training_size], y[:training_size]
    # testing_data_x, testing_data_y = x[training_size:], y[training_size:]

    training_data_x, training_data_y = _data[0], _data[2]
    testing_data_x, testing_data_y = _data[1], _data[3]

    feature_size = training_data_x.shape[1]
    hidden_nu = 20
    output_size = training_data_y.shape[1]

    x = placeholder(float32, [None, feature_size], name='x')
    y = placeholder(float32, [None, output_size], name='y')

    # also check xavier_initializer
    W1 = Variable(random_normal([feature_size, hidden_nu],
                                seed=1,
                                dtype=float32),
                  name='W1')
    b1 = Variable(random_normal([hidden_nu], dtype=float32, seed=2),
                  name='b1')  # use zeros also

    W2 = Variable(random_normal([hidden_nu, output_size],
                                seed=3,
                                dtype=float32),
                  name='W2')
    b2 = Variable(random_normal([output_size], dtype=float32, seed=4),
                  name='b2')

    L0_L1 = x @ W1 + b1
    L1_L1 = nn.relu(L0_L1)

    L1_L2 = L1_L1 @ W2 + b2
    L2_L2 = nn.softmax(L1_L2)

    cost = reduce_mean(nn.softmax_cross_entropy_with_logits_v2(logits=L2_L2,
                                                               labels=y),
                       name='cost')

    optimization = train.AdamOptimizer(learning_rate=learning_rate).minimize(
        cost, name='optimization')

    init = global_variables_initializer()

    current_predictions = equal(argmax(L2_L2, axis=1), argmax(y, axis=1))

    accuracy = tf.round(
        10000 * reduce_mean(cast(current_predictions, float32))) / 100

    with Session() as sess:
        writer = summary.FileWriter('mnist/visualize', graph=sess.graph)

        cost_summary = summary.scalar('cost', cost)
        training_accuracy_summary = summary.scalar('training accuracy',
                                                   accuracy)
        testing_accuracy_summary = summary.scalar('testing accuracy', accuracy)

        sess.run(init)

        # ---------------------------------------------------------------------------------

        for e in range(epochs):

            _idx = RandomState(e).permutation(
                training_size)  # check how much does it matter to add
            # uniformity of data in each batch.

            total_cost = 0

            def mini_batch(start_idx, end_idx):
                curr_idx = _idx[start_idx:end_idx]

                _x = training_data_x[curr_idx]
                _y = training_data_y[curr_idx]

                _, c = sess.run([optimization, cost], feed_dict={x: _x, y: _y})

                return (end_idx - start_idx) * c

            for i in range(0, training_size, batch_size):
                total_cost += mini_batch(i, min(i + batch_size, training_size))

            if last_batch_size != 0:
                total_cost += mini_batch(training_size - last_batch_size,
                                         training_size)

            print('epoch:', e, 'total cost:', round(
                total_cost,
                3))  # check how this 'total_cost' can be fed into summary.

            if e % update_summary_at == 0:
                _total_cost, training_accuracy = sess.run(
                    [cost_summary, training_accuracy_summary],
                    feed_dict={
                        x: training_data_x,
                        y: training_data_y
                    })
                writer.add_summary(_total_cost, e)
                writer.add_summary(training_accuracy, e)

                testing_accuracy = sess.run(testing_accuracy_summary,
                                            feed_dict={
                                                x: testing_data_x,
                                                y: testing_data_y
                                            })
                writer.add_summary(testing_accuracy, e)

        writer.close()