コード例 #1
0
def get_optimizer(learning_rate, hparams):
    """Get the tf.train.Optimizer for this optimizer string.

  Args:
    learning_rate: The learning_rate tensor.
    hparams: tf.contrib.training.HParams object with the optimizer and
        momentum values.

  Returns:
    optimizer: The tf.train.Optimizer based on the optimizer string.
  """
    return {
        "rmsprop":
        tf.RMSPropOptimizer(learning_rate,
                            decay=0.95,
                            momentum=hparams.momentum,
                            epsilon=1e-4),
        "adam":
        tf.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8),
        "adagrad":
        tf.AdagradOptimizer(learning_rate, initial_accumulator_value=1.0),
        "mom":
        tf.MomentumOptimizer(learning_rate, momentum=hparams.momentum),
        "sgd":
        tf.GradientDescentOptimizer(learning_rate)
    }.get(hparams.optimizer)
コード例 #2
0
ファイル: item2vec.py プロジェクト: xxqcheers/board-yet
 def build_optimize_graph(self, loss):
     items_to_train = float(self.items_per_epoch * self.num_epochs)
     global_step = tf.Variable(0, name="global_step")
     self.global_step = global_step
     learning_rate = 0.001*self.learning_rate
     optimizer = tf.GradientDescentOptimizer(learning_rate)
     train = optimizer.minimize(loss, global_step=self.global_step,
                                gate_gradients=optimizer.GATE_NONE)
     return train
コード例 #3
0
def word2vec(batch_gen):
    """ Build the graph for word2vec model and train it """
    # Step 1: define the placeholders for input and output
    # center_words have to be int to work on embedding lookup

    # TO DO
    with tf.name_scope('data'):
        center_words = tf.placeholder(tf.int32, [BATCH_SIZE],
                                      name='center_words')
        target_words = tf.placeholder(tf.int32, [BATCH_SIZE, 1],
                                      name='target_words')

    # Step 2: define weights. In word2vec, it's actually the weights that we care about
    # vocab size x embed size
    # initialized to random uniform -1 to 1

    # TOO DO
    with tf.name_scope('embedding_matrix'):
        embed_matrix = tf.Variable(tf.random_uniform([VOCAB_SIZE, EMBED_SIZE],
                                                     -1.0, 1.0),
                                   name='embed_matrix')

    # Step 3: define the inference
    # get the embed of input words using tf.nn.embedding_lookup
    # embed = tf.nn.embedding_lookup(embed_matrix, center_words, name='embed')

    # TO DO
    with tf.name_scope('loss'):
        embed = tf.nn.embedding_lookup(embed_matrix,
                                       center_words,
                                       name='embed')

        # Step 4: construct variables for NCE loss
        # tf.nn.nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, ...)
        # nce_weight (vocab size x embed size), intialized to truncated_normal stddev=1.0 / (EMBED_SIZE ** 0.5)
        # bias: vocab size, initialized to 0

        # TO DO
        nce_weights = tf.Variable(tf.truncated_normal([VOCAB_SIZE, EMBED_SIZE],
                                                      stddev=1.0 /
                                                      (EMBED_SIZE**0.5)),
                                  name='nce_weights')
        nce_biases = tf.Variable(tf.zeros(VOCAB_SIZE), name='nce_biases')

        # define loss function to be NCE loss function
        # tf.nn.nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, ...)
        # need to get the mean accross the batch
        # note: you should use embedding of center words for inputs, not center words themselves

        # TO DO
        nce_loss = tf.nn.nce_loss(weights=nce_weights,
                                  biases=nce_biases,
                                  labels=target_words,
                                  inputs=embed,
                                  num_sampled=NUM_SAMPLED,
                                  num_classes=VOCAB_SIZE,
                                  name='loss')
        loss = tf.reduce_mean(nce_loss)

    # Step 5: define optimizer

    # TO DO
    optimizer = tf.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)

    with tf.Session() as sess:
        # TO DO: initialize variables
        sess.run(tf.global_variable_initializer())

        total_loss = 0.0  # we use this to calculate the average loss in the last SKIP_STEP steps
        writer = tf.summary.FileWriter('./graphs/no_frills/', sess.graph)
        for index in range(NUM_TRAIN_STEPS):
            centers, targets = next(batch_gen)
            # TO DO: create feed_dict, run optimizer, fetch loss_batch
            _, loss_batch = sess.run([optimizer, loss],
                                     feed_dict={
                                         center_words: centers,
                                         target_words: targets
                                     })

            total_loss += loss_batch
            if (index + 1) % SKIP_STEP == 0:
                print('Average loss at step {}: {:5.1f}'.format(
                    index, total_loss / SKIP_STEP))
                total_loss = 0.0
        writer.close()
コード例 #4
0
def kfac_optimizer(model_creator):
    stats_batch_size = 10000
    main_batch_size = 10000

    stats_model, loss, labels = model_creator(stats_batch_size)
    # replace labels_node with synthetic labels

    main_model, _, _ = model_creator(main_batch_size)

    opt = tf.GradientDescentOptimizer(0.2)
    grads_and_vars = opt.compute_gradients(loss)

    trainable_vars = tf.trainable_variables()

    # create SVD and preconditioning variables for matmul vars
    for var in trainable_vars:
        if var not in matmul_registry:
            continue
        dW = u.extract_grad(grads_and_vars, var)
        A[var] = get_activations(var)
        B[var] = get_backprops(var)
        B2[var] = get_backprops2(var)  # get backprops with synthetic labels
        dW[var] = B[var] @ t(A[var])  # todo: sort out dsize division
        cov_A[var] = init_var(A[var] @ t(A[var]) / dsize,
                              "cov_A_%s" % (var.name, ))
        cov_B2[var] = init_var(B2[var] @ t(B2[var]) / dsize,
                               "cov_B2_%s" % (var.name, ))

        vars_svd_A[var] = SvdWrapper(cov_A[var], "svd_A_%d" % (var.name, ))
        vars_svd_B2[var] = SvdWrapper(cov_B2[var], "svd_B2_%d" % (var.name, ))
        whitened_A = u.pseudo_inverse2(vars_svd_A[var]) @ A[var]
        whitened_B2 = u.pseudo_inverse2(vars_svd_B2[var]) @ B[var]
        whitened_A_stable = u.pseudo_inverse_sqrt2(vars_svd_A[var]) @ A[var]
        whitened_B2_stable = u.pseudo_inverse_sqrt2(vars_svd_B2[var]) @ B[var]

        pre_dW[var] = (whitened_B2 @ t(whitened_A)) / dsize
        pre_dW_stable[var] = (
            whitened_B2_stable @ t(whitened_A_stable)) / dsize
        dW[var] = (B[var] @ t(A[var])) / dsize

    # create update params ops

    # new_grads_and_vars = []
    # for grad, var in grads_and_vars:
    #   if var in kfac_registry:
    #     pre_A, pre_B = kfac_registry[var]
    #     new_grad_live = pre_B @ grad @ t(pre_A)
    #     new_grads_and_vars.append((new_grad, var))
    #     print("Preconditioning %s"%(var.name))
    #   else:
    #     new_grads_and_vars.append((grad, var))
    # train_op = opt.apply_gradients(new_grads_and_vars)

    # Each variable has an associated gradient, pre_gradient, variable save op
    def update_grad():
        ops = [grad_update_ops[var] for var in trainable_vars]
        sess.run(ops)

    def update_pre_grad():
        ops = [pre_grad_update_ops[var] for var in trainable_vars]
        sess.run(ops)

    def update_pre_grad2():
        ops = [pre_grad2_update_ops[var] for var in trainable_vars]
        sess.run(ops)

    def save_params():
        ops = [var_save_ops[var] for var in trainable_vars]
        sess.run(ops)

    for step in range(num_steps):
        update_covariances()
        if step % whitened_every_n_steps == 0:
            update_svds()

        update_grad()
        update_pre_grad()  # perf todo: update one of these
        update_pre_grad2()  # stable alternative

        lr0, loss0 = sess.run([lr, loss])
        save_params()

        # when grad norm<1, Fisher is unstable, switch to Sqrt(Fisher)
        # TODO: switch to per-matrix normalization
        stabilized_mode = grad_norm.eval() < 1

        if stabilized_mode:
            update_params2()
        else:
            update_params()

        loss1 = loss.eval()
        advance_batch()

        # line search stuff
        target_slope = (-pre_grad_dot_grad.eval() if stabilized_mode else
                        -pre_grad_stable_dot_grad.eval())
        target_delta = lr0 * target_slope
        actual_delta = loss1 - loss0
        actual_slope = actual_delta / lr0
        slope_ratio = actual_slope / target_slope  # between 0 and 1.01

        losses.append(loss0)
        step_lengths.append(lr0)
        ratios.append(slope_ratio)

        if step % report_frequency == 0:
            print(
                "Step %d loss %.2f, target decrease %.3f, actual decrease, %.3f ratio %.2f grad norm: %.2f pregrad norm: %.2f"
                % (step, loss0, target_delta, actual_delta, slope_ratio,
                   grad_norm.eval(), pre_grad_norm.eval()))

        u.record_time()
コード例 #5
0
        # note: you should use embedding of center words for inputs, not center words themselves

        # TO DO
        nce_loss = tf.nn.nce_loss(weights=nce_weights, 
                              biases=nce_biases, 
                              labels=target_words, 
                              inputs=embed, 
                              num_sampled=NUM_SAMPLED, 
                              num_classes=VOCAB_SIZE, 
                              name='loss')
        loss = tf.reduce_mean(nce_loss)
        
    # Step 5: define optimizer
    
    # TO DO
    optimizer = tf.GradientDescentOptimizer(LEARNING_RATE).minimize(loss)
    

    with tf.Session() as sess:
        # TO DO: initialize variables
        sess.run(tf.global_variable_initializer())

        total_loss = 0.0 # we use this to calculate the average loss in the last SKIP_STEP steps
        writer = tf.summary.FileWriter('./graphs/no_frills/', sess.graph)
        for index in range(NUM_TRAIN_STEPS):
            centers, targets = next(batch_gen)
            # TO DO: create feed_dict, run optimizer, fetch loss_batch
            _, loss_batch = sess.run([optimizer, loss], feed_dict={center_words: centers, target_words: targets})

            total_loss += loss_batch
            if (index + 1) % SKIP_STEP == 0:
コード例 #6
0
Y = tf.placeholder(tf.int32, [None, 1])

Y_one_hot = tf.one_hot(Y, nb_classes)
Y_one_hot = tf.reshpae(Y_one_hot, [-1, nb_classes])  # -1 means everything

W = tf.Variable(tf.random_normal([16, nb_classes]), name='weight')
b = tf.Variable(tf.fandom_normal([nb_classes]), name='bias')

logits = tf.matmul(X, W) + b
hypothesis = tf.nn.softmax(logits)

#Cross_Entropy
cost_i = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                 labels=Y_one_hot)
cost = tf.reduce_mean(cost_i)
optimizer = tf.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)

prediction = tf.argmax(hypothesis, 1)  # Find the maximum value from the array
correct_prediction = tf.equal(prediction, tf.argmax(Y_one_hot, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for step in range(2001):
        sess.run(optimizer, feed_dict={X: x_data, Y: y_data})

        if step % 100 == 0:
            loss, acc = sess.run([cost, accuracy],
                                 feed_dict={
                                     X: x_data,
コード例 #7
0
import tensorflow as tf
import numpy as np

data = np.array([1, 1], [2, 2], [3, 3], [4, 4])
# defining variables and constants
x = tf.placeholder()
y = tf.placeholder()
m = tf.variable()
b = tf.variable()

# prediction, loss and others
prediction = m * x + b
loss = np.sum(np.square(y - prediction))
optimizer = tf.GradientDescentOptimizer(loss)
session = tf.Session()

#initialize variables
# iterate over all data points to minimize loss
# find the new value of slope and things

#for i in data.shape[0]:
#    iterate

# do for actual dataset, house prices
# push to github in a tf repo
コード例 #8
0
import tensorflow as tf

# model: parameters, input and output
W = tf.Variable([0.3], dtype=tf.float32)
b = tf.Variable([-0.3], dtype=tf.float32)
x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)
linear_model = W * x + b

# loss & optimizer
loss = tf.reduce_sum(tf.square(linear_model - y))
optimizer = tf.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)

x_train = [1, 2, 3, 4]
y_train = [0, -1, -2, -3]
# train the linear regression model
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
for i in range(1000):
    sess.run(train, feed_dict={x: x_train, y: y_train})

# Evaluate training accuracy
print(sess.run([curr_W, curr_b, loss], feed_dict={x: x_train, y: y_train}))