def benchmark(batch_size, iters, seed=1, cuda=True, history=100, verbose=False): global final_loss, W_flat tf.set_random_seed(seed) np.random.seed(seed) images = tf.constant(u.get_mnist_images(batch_size).T) images = images[:batch_size] if cuda: images = images.gpu() data = images if cuda: device='/gpu:0' else: device='' device_ctx = tf.device(device) device_ctx.__enter__() visible_size = 28*28 hidden_size = 196 initial_val = tf.zeros([visible_size*hidden_size]) if W_flat is None: W_flat = tfe.Variable(initial_val, name='W_flat') W_flat.assign(initial_val) def loss_fn(w_flat): w = tf.reshape(w_flat, [visible_size, hidden_size]) x = tf.matmul(data, w) x = tf.sigmoid(x) x = tf.matmul(x, w, transpose_b=True) x = tf.sigmoid(x) return tf.reduce_mean(tf.square(x-data)) value_and_gradients_fn = tfe.value_and_gradients_function(loss_fn) def opfunc(x): # returns (value, gradient) value, grads = value_and_gradients_fn(x) return value, grads[0] # initialize weights W_flat.assign(u.ng_init(visible_size, hidden_size).flatten()) state = Struct() config = Struct() config.maxIter = iters config.nCorrection = history config.verbose = True x, f_hist, currentFuncEval = lbfgs(opfunc, W_flat, config, state, verbose) if verbose: u.summarize_time() s = ','.join(["%f"%(n,) for n in times[2:]]) print('{', s,'}') return final_loss
def benchmark(batch_size, iters, seed=1, cuda=True, verbose=False): global final_loss, W_flat tf.set_random_seed(seed) np.random.seed(seed) images = tf.constant(u.get_mnist_images(batch_size).T) images = images[:batch_size] if cuda: images = images.gpu() data = images if cuda: device = '/gpu:0' else: device = '' device_ctx = tf.device(device) device_ctx.__enter__() visible_size = 28 * 28 hidden_size = 196 initial_val = tf.zeros([visible_size * hidden_size]) if W_flat is None: W_flat = tfe.Variable(initial_val, name='W_flat') W_flat.assign(initial_val) def loss_fn(w_flat): w = tf.reshape(w_flat, [visible_size, hidden_size]) x = tf.matmul(data, w) x = tf.sigmoid(x) x = tf.matmul(x, w, transpose_b=True) x = tf.sigmoid(x) return tf.reduce_mean(tf.square(x - data)) value_and_gradients_fn = tfe.value_and_gradients_function(loss_fn) def opfunc(x): # returns (value, gradient) value, grads = value_and_gradients_fn(x) return value, grads[0] # initialize weights W_flat.assign(u.ng_init(visible_size, hidden_size).flatten()) state = Struct() config = Struct() config.maxIter = iters config.verbose = True x, f_hist, currentFuncEval = lbfgs(opfunc, W_flat, config, state, verbose) if verbose: u.summarize_time() return final_loss
def test_fn(x, w): l, (dw, ) = tfe.value_and_gradients_function(loss, [1])(x, w) # pylint:disable=undefined-variable return l, dw
@tfe.custom_gradient def my_matmul(x, y): result = x @ y def grad(dr): return [dr @ tf.transpose(y), tf.transpose(x) @ dr] return result, grad lr = 0.25 n = 2 x = tfe.Variable(tf.ones((n, n)), name="x") y = tf.constant([[1, 2], [3, 4]], dtype=tf.float32) def loss_fn(x): return tf.reduce_sum(my_matmul(x, y)) loss_grads_fn = tfe.value_and_gradients_function(loss_fn) for step in range(5): loss, grads = loss_grads_fn(x) print("loss =", loss.numpy()) x.assign_sub(lr * grads[0]) assert loss.numpy() == -96