Esempi in Python per v2c_np, esempi in Python per util.v2c_np

Esempio n. 1

0

Mostra file

def cost1(theta, visible_size, hidden_size, lambda_, sparsity_param, beta,
          data0):
    """Construct sparse autoencoder loss, return tensor."""

    W10 = theta[0:hidden_size * visible_size].reshape(hidden_size,
                                                      visible_size,
                                                      order='F')
    W20 = theta[hidden_size * visible_size:2 * hidden_size *
                visible_size].reshape(visible_size, hidden_size, order='F')
    b10 = theta[2 * hidden_size * visible_size:2 * hidden_size * visible_size +
                hidden_size]
    b20 = theta[2 * hidden_size * visible_size + hidden_size:]

    init_dict = {}

    def init_var(val, name, trainable=True):
        holder = tf.placeholder(dtype, shape=val.shape, name=name + "_holder")
        var = tf.Variable(holder, name=name + "_var", trainable=trainable)
        init_dict[holder] = val
        return var

    W1 = init_var(W10, "W1")
    W2 = init_var(W20, "W2")
    b1 = init_var(u.v2c_np(b10), "b1")
    b2 = init_var(u.v2c_np(b20), "b2")
    data = init_var(data0, "data", False)

    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer(), feed_dict=init_dict)

    # Number of training examples
    # todo Rename data to x
    m = data0.shape[1]
    a1 = data

    # Forward propagation
    z2 = tf.matmul(W1, a1) + b1
    a2 = tf.sigmoid(z2)
    z3 = tf.matmul(W2, a2) + b2
    a3 = tf.sigmoid(z3)

    # Sparsity
    rho_hat = tf.reduce_sum(a2, axis=1, keep_dims=True) / m
    rho = tf.constant(sparsity_param, dtype=dtype)

    # Cost function
    cost = tf.reduce_sum((a3 - a1) ** 2) / (2 * m) + \
            (lambda_ / 2) * (tf.reduce_sum(W1 ** 2) + \
                             tf.reduce_sum(W2 ** 2)) + \
                             beta * tf.reduce_sum(KL_divergence_tf(rho, rho_hat))
    return init_dict, cost

Esempio n. 2

0

Mostra file

def sparse_autoencoder_cost_tf(theta, visible_size, hidden_size, lambda_,
                               sparsity_param, beta, data):

    # TODO: get rid of b
    #  def f(i): return fs[i+1]  # W[i] has shape f[i] x f[i-1]
    #  fs = [10000, 28*28, 196, 28*28]
    #  n = len(fs)-2
    #  W = [None]*n

    W1 = theta[0:hidden_size * visible_size].reshape(hidden_size,
                                                     visible_size,
                                                     order='F')
    W2 = theta[hidden_size * visible_size:2 * hidden_size *
               visible_size].reshape(visible_size, hidden_size, order='F')
    b1 = theta[2 * hidden_size * visible_size:2 * hidden_size * visible_size +
               hidden_size]
    b2 = theta[2 * hidden_size * visible_size + hidden_size:]

    init_dict = {}

    def init_var(val, name):
        holder = tf.placeholder(dtype, shape=val.shape, name=name + "_holder")
        var = tf.Variable(holder, name=name + "_var")
        init_dict[holder] = val
        return var

    W1_ = init_var(W1, "W1")
    W2_ = init_var(W2, "W2")
    b1_ = init_var(u.v2c_np(b1), "b1")
    b2_ = init_var(u.v2c_np(b2), "b2")
    data_ = init_var(data, "data")
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer(), feed_dict=init_dict)

    # Number of training examples
    m = data.shape[1]
    a1 = data
    a1_ = data_

    # Forward propagation
    z2 = W1.dot(a1) + np.tile(b1, (m, 1)).transpose()
    z2_ = tf.matmul(W1_, a1_) + b1_

    a2 = sigmoid(z2)
    a2_ = tf.sigmoid(z2_)
    z3 = W2.dot(a2) + np.tile(b2, (m, 1)).transpose()
    z3_ = tf.matmul(W2_, a2_) + b2_
    a3 = sigmoid(z3)
    a3_ = tf.sigmoid(z3_)

    # Sparsity
    rho_hat = np.sum(a2, axis=1) / m
    rho_hat_ = tf.reduce_sum(a2_, axis=1, keep_dims=True) / m
    rho = np.tile(sparsity_param, hidden_size)
    # ValueError: Shape must be rank 1 but is rank 0 for 'Tile_2' (op: 'Tile') with input shapes: [], [].
    rho_ = tf.constant(sparsity_param, dtype=dtype)
    #tf.ones((hidden_size, 1), dtype=dtype)*sparsity_param

    u.check_equal(sess.run(a3_), a3)
    u.check_equal(sess.run(a2_), a2)
    u.check_equal(sess.run(a1_), a1)
    u.check_equal(
        tf.reduce_sum(KL_divergence_tf(rho_, rho_hat_)).eval(),
        np.sum(KL_divergence(rho, rho_hat)))

    # Cost function
    cost = np.sum((a3 - a1) ** 2) / (2 * m) + \
           (lambda_ / 2) * (np.sum(W1 ** 2) + np.sum(W2 ** 2)) + \
           beta * np.sum(KL_divergence(rho, rho_hat))
    cost_ = tf.reduce_sum((a3_ - a1_) ** 2) / (2 * m) + \
            (lambda_ / 2) * (tf.reduce_sum(W1_ ** 2) + \
                             tf.reduce_sum(W2_ ** 2)) + \
                             beta * tf.reduce_sum(KL_divergence_tf(rho_, rho_hat_))
    return sess.run(cost_)

Esempio n. 3

0

Mostra file

def simple_newton_kfac_test():
  tf.reset_default_graph()
  X0 = np.genfromtxt('data/rotations_simple_X0.csv',
                     delimiter= ",")
  Y0 = np.genfromtxt('data/rotations_simple_Y0.csv',
                     delimiter= ",")
  W0f = v2c_np(np.genfromtxt('data/rotations_simple_W0f.csv',
                            delimiter= ","))
  assert W0f.shape == (8, 1)
  
  fs = np.genfromtxt('data/rotations_simple_fs.csv',
                      delimiter= ",").astype(np.int32)
  n = len(fs)-2    # number of layers
  u.check_equal(fs, [10,2,2,2])

  def f(i): return fs[i+1]  # W[i] has shape f[i] x f[i-1]
  dsize = X0.shape[1]
  assert f(-1) == dsize
  
  # load W0f and do shape checks (can remove)
  W0s = u.unflatten_np(W0f, fs[1:])  # Wf doesn't have first layer (data matrix)
  W0s.insert(0, X0)
  Wf_holder = tf.placeholder(dtype, shape=W0f.shape)
  Wf = tf.Variable(Wf_holder, name="Wf")
  Wf_copy = tf.Variable(Wf_holder, name="Wf_copy")
  init_dict = {Wf_holder: W0f}
  
  # Create W's
  W = u.unflatten(Wf, fs[1:])
  X = tf.constant(X0)
  Y = tf.constant(Y0)
  W.insert(0, X)
  for (numpy_W, tf_W) in zip(W0s, W):
    u.check_equal(numpy_W.shape, u.fix_shape(tf_W.shape))

  # Create A's
  # A[1] == X
  A = [0]*(n+2)
  A[0] = u.Identity(dsize)
  for i in range(n+1):
    A[i+1] = tf.matmul(W[i], A[i], name="A"+str(i+1))

  assert W[0].get_shape() == X0.shape
  assert A[n+1].get_shape() == X0.shape
  assert A[1].get_shape() == X0.shape

  err = Y - A[n+1]
  loss = tf.reduce_sum(tf.square(err))/(2*dsize)
  lr = tf.Variable(0.5, dtype=dtype, name="learning_rate")
  
  # Create B's
  B = [0]*(n+1)
  B[n] = -err/dsize
  Bn = [0]*(n+1)            # Newton-modified backprop
  Bn[n] = u.Identity(f(n))
  for i in range(n-1, -1, -1):
    B[i] = t(W[i+1]) @ B[i+1]
    Bn[i] = t(W[i+1]) @ Bn[i+1]
    
  # inverse Hessian blocks
  iblocks = u.empty_grid(n+1, n+1)
  for i in range(1, n+1):
    for j in range(1, n+1):
      # reuse Hess tensor calculation in order to get off-diag block sizes
      dummy_term = kr(A[i] @ t(A[j]), Bn[i] @ t(Bn[j])) / dsize;
      if i == j:
        acov = A[i] @ t(A[j])
        bcov = Bn[i] @ t(Bn[j]) / dsize;
        term = kr(u.pseudo_inverse(acov), u.pseudo_inverse(bcov))
      else:
        term = tf.zeros(shape=dummy_term.get_shape(), dtype=dtype)
      iblocks[i][j]=term
        
  # remove leftmost blocks (those are with respect to W[0] which is input)
  del iblocks[0]
  for row in iblocks:
    del row[0]
    
  ihess = u.concat_blocks(iblocks)
  
  sess = tf.Session()
  sess.run(tf.global_variables_initializer(), feed_dict=init_dict)

  # create dW's
  dW = [0]*(n+1)
  for i in range(n+1):
    dW[i] = tf.matmul(B[i], tf.transpose(A[i]), name="dW"+str(i))
  del dW[0]  # get rid of W[0] update
  
  dWf = tf.concat([u.vec(dWi) for dWi in dW], axis=0)
  Wf_new = Wf - lr * ihess @ dWf 

  train_op1 = Wf_copy.assign(Wf_new)
  train_op2 = Wf.assign(Wf_copy)

  
  expected_losses = np.loadtxt("data/rotations_simple_newtonkfac_losses.csv",
                               delimiter= ",")
  observed_losses = []

  # from accompanying notebook
  #  {0.0111498, 0.0000171591, 4.11445*10^-11, 2.33653*10^-22, 
  # 6.88354*10^-33,
 
  for i in range(10):
    observed_losses.append(sess.run([loss])[0])
    sess.run(train_op1)
    sess.run(train_op2)

  u.check_equal(observed_losses, expected_losses)

Esempio n. 4

0

Mostra file

def relu_gradient_test():
  tf.reset_default_graph()
  X0 = np.genfromtxt('data/rotations_X0.csv',
                     delimiter= ",")
  Y0 = np.genfromtxt('data/rotations_Y0.csv',
                     delimiter= ",")
  W0f = v2c_np(np.genfromtxt('data/rotations_W0f.csv',
                            delimiter= ","))
  assert W0f.shape == (8, 1)
  
  fs = np.genfromtxt('data/rotations_relu_fs.csv',
                      delimiter= ",").astype(np.int32)
  n = len(fs)-2    # number of layers
  u.check_equal(fs, [4,2,2,2])

  def f(i): return fs[i+1]  # W[i] has shape f[i] x f[i-1]
  dsize = X0.shape[1]
  assert f(-1) == dsize
  
  # load W0f and do shape checks (can remove)
  W0s = u.unflatten_np(W0f, fs[1:])  # Wf doesn't have first layer (data matrix)
  W0s.insert(0, X0)
  Wf_holder = tf.placeholder(dtype, shape=W0f.shape)
  Wf = tf.Variable(Wf_holder, name="Wf")
  Wf_copy = tf.Variable(Wf_holder, name="Wf_copy")
  init_dict = {Wf_holder: W0f}

  # Create W's
  W = u.unflatten(Wf, fs[1:])
  X = tf.constant(X0, name="X0")
  Y = tf.constant(Y0, name="Y0")
  W.insert(0, X)
  for (numpy_W, tf_W) in zip(W0s, W):
    u.check_equal(numpy_W.shape, u.fix_shape(tf_W.shape))

  # Create A's
  # A[1] == X
  A = [0]*(n+2)
  A[0] = u.Identity(dsize)
  for i in range(n+1):
    if i == 0:
      A[i+1] = X
    else:
      A[i+1] = tf.nn.relu(tf.matmul(W[i], A[i], name="A"+str(i+1)))

  assert W[0].get_shape() == X0.shape
  assert A[n+1].get_shape() == X0.shape
  assert A[1].get_shape() == X0.shape

  err = Y - A[n+1]
  loss = tf.reduce_sum(tf.square(err))/(2*dsize)
  lr = tf.Variable(0.1, dtype=dtype)
  
  # Create B's
  B = [0]*(n+1)
  B[n] = (-err/dsize)*u.relu_mask(A[n+1])
  for i in range(n-1, -1, -1):
    B[i] = t(W[i+1]) @ B[i+1]
    if i > 0:  # there's no relu on first matrix
      B[i] = B[i]*u.relu_mask(A[i+1])

  # create dW's
  dW = [0]*(n+1)
  for i in range(n+1):
    dW[i] = tf.matmul(B[i], tf.transpose(A[i]), name="dW"+str(i))
  del dW[0]  # get rid of W[0] update
  
  dWf = tf.concat([u.vec(dWi) for dWi in dW], axis=0)
  Wf_new = Wf - lr * dWf 

  train_op1 = Wf_copy.assign(Wf_new)
  train_op2 = Wf.assign(Wf_copy)

  sess = tf.Session()
  sess.run(tf.global_variables_initializer(), feed_dict=init_dict)
  
  expected_losses = np.loadtxt("data/rotations_relu_gradient_losses.csv",
                               delimiter= ",")
  observed_losses = []
  
  # From accompanying notebook
  #  {0.407751, 0.0683822, 0.0138657, 0.0039221, 0.00203637, 0.00164892,
  #    0.00156137, 0.00153857, 0.00153051, 0.00152593}
  for i in range(10):
    observed_losses.append(sess.run([loss])[0])
    sess.run(train_op1)
    sess.run(train_op2)

  u.check_equal(observed_losses, expected_losses)

Esempio n. 5

0

Mostra file

def simple_newton_bd_test():
  tf.reset_default_graph()
  X0 = np.genfromtxt('data/rotations_simple_X0.csv',
                     delimiter= ",")
  Y0 = np.genfromtxt('data/rotations_simple_Y0.csv',
                     delimiter= ",")
  W0f = v2c_np(np.genfromtxt('data/rotations_simple_W0f.csv',
                            delimiter= ","))
  assert W0f.shape == (8, 1)
  
  fs = np.genfromtxt('data/rotations_simple_fs.csv',
                      delimiter= ",").astype(np.int32)
  n = len(fs)-2    # number of layers
  u.check_equal(fs, [10,2,2,2])

  def f(i): return fs[i+1]  # W[i] has shape f[i] x f[i-1]
  dsize = X0.shape[1]
  assert f(-1) == dsize
  
  # load W0f and do shape checks (can remove)
  W0s = u.unflatten_np(W0f, fs[1:])  # Wf doesn't have first layer (data matrix)
  W0s.insert(0, X0)
  Wf_holder = tf.placeholder(dtype, shape=W0f.shape)
  Wf = tf.Variable(Wf_holder, name="Wf")
  Wf_copy = tf.Variable(Wf_holder, name="Wf_copy")
  init_dict = {Wf_holder: W0f}
  
  # Create W's
  W = u.unflatten(Wf, fs[1:])
  X = tf.constant(X0)
  Y = tf.constant(Y0)
  W.insert(0, X)
  for (numpy_W, tf_W) in zip(W0s, W):
    u.check_equal(numpy_W.shape, u.fix_shape(tf_W.shape))

  # Create A's
  # A[1] == X
  A = [0]*(n+2)
  A[0] = u.Identity(dsize)
  for i in range(n+1):
    A[i+1] = tf.matmul(W[i], A[i], name="A"+str(i+1))

  assert W[0].get_shape() == X0.shape
  assert A[n+1].get_shape() == X0.shape
  assert A[1].get_shape() == X0.shape

  err = Y - A[n+1]
  loss = tf.reduce_sum(tf.square(err))/(2*dsize)
  lr = tf.Variable(0.5, dtype=dtype, name="learning_rate")
  
  # Create B's
  B = [0]*(n+1)
  B[n] = -err/dsize
  Bn = [0]*(n+1)            # Newton-modified backprop
  Bn[n] = u.Identity(f(n))
  for i in range(n-1, -1, -1):
    B[i] = t(W[i+1]) @ B[i+1]
    Bn[i] = t(W[i+1]) @ Bn[i+1]

  # Create U's
  U = [list(range(n+1)) for _ in range(n+1)]
  for bottom in range(n+1):
    for top in range(n+1):
      if bottom > top:
        prod = u.Identity(f(top))
      else:
        prod = u.Identity(f(bottom-1))
        for i in range(bottom, top+1):
          prod = prod@t(W[i])
      U[bottom][top] = prod

  # Block i, j gives hessian block between layer i and layer j
  blocks = [list(range(n+1)) for _ in range(n+1)]
  for i in range(1, n+1):
    for j in range(1, n+1):
      term1 = kr(A[i] @ t(A[j]), Bn[i] @ t(Bn[j])) / dsize;
      if i == j:
        term2 = tf.zeros((f(i)*f(i-1), f(i)*f(i-1)), dtype=dtype)
      elif i < j:
        term2 = kr(A[i] @ t(B[j]), U[i+1][j-1])
      else:
        term2 = kr(t(U[j+1][i-1]), B[i] @ t(A[j]))
        
      blocks[i][j]=term1 + term2 @ Kmat(f(j), f(j-1))

        
  # remove leftmost blocks (those are with respect to W[0] which is input)
  del blocks[0]
  for row in blocks:
    del row[0]
    
  #hess = u.concat_blocks(blocks)
  ihess = u.concat_blocks(u.block_diagonal_inverse(blocks))
  #  ihess = u.pseudo_inverse(hess)
  
  sess = tf.Session()
  sess.run(tf.global_variables_initializer(), feed_dict=init_dict)

  # create dW's
  dW = [0]*(n+1)
  for i in range(n+1):
    dW[i] = tf.matmul(B[i], tf.transpose(A[i]), name="dW"+str(i))
  del dW[0]  # get rid of W[0] update
  
  dWf = tf.concat([u.vec(dWi) for dWi in dW], axis=0)
  Wf_new = Wf - lr * ihess @ dWf 

  train_op1 = Wf_copy.assign(Wf_new)
  train_op2 = Wf.assign(Wf_copy)

  
  expected_losses = np.loadtxt("data/rotations_simple_newtonbd_losses.csv",
                               delimiter= ",")
  observed_losses = []
  
  # from accompanying notebook
  # 0.0111498, 0.0000171591, 4.11445*10^-11, 2.33652*10^-22, 
  # 1.21455*10^-32,
 
  for i in range(10):
    observed_losses.append(sess.run([loss])[0])
    sess.run(train_op1)
    sess.run(train_op2)

  u.check_equal(observed_losses, expected_losses)

Esempio n. 6

0

Mostra file

File: rotations_comparison.py Progetto: yaroslavvb/stuff

def grad_update(new_val):
  copy_op = Wf_copy.assign(new_val)
  with tf.control_dependencies([copy_op]):
    train_op = Wf.assign(Wf_copy)
  return train_op


if __name__ == '__main__':
  # Compare a set of algorithms on rotations problem

  X0 = np.genfromtxt('data/large_rotations2_X0.csv',
                     delimiter= ",")
  Y0 = np.genfromtxt('data/large_rotations2_Y0.csv',
                     delimiter= ",")
  W0f = v2c_np(np.genfromtxt('data/large_rotations2_W0f.csv',
                             delimiter= ","))
  fs = np.genfromtxt('data/large_rotations2_fs.csv',
                     delimiter= ",").astype(np.int32)
  n = len(fs)-2    # number of layers

  def f(i): return fs[i+1]  # W[i] has shape f[i] x f[i-1]

  dsize = X0.shape[1]
  assert f(-1) == dsize

  # load W0f and do shape checks (can remove)
  W0s = u.unflatten_np(W0f, fs[1:])  # Wf doesn't have first layer (data matrix)
  W0s.insert(0, X0)
  Wf_holder = tf.placeholder(dtype, shape=W0f.shape)
  Wf = tf.Variable(Wf_holder, name="Wf")
  Wf_copy = tf.Variable(Wf_holder, name="Wf_copy")

Esempio n. 7

0

Mostra file

def simple_gradient_test():
  tf.reset_default_graph()
  X0 = np.genfromtxt('data/rotations_simple_X0.csv',
                     delimiter= ",")
  Y0 = np.genfromtxt('data/rotations_simple_Y0.csv',
                     delimiter= ",")
  W0f = v2c_np(np.genfromtxt('data/rotations_simple_W0f.csv',
                            delimiter= ","))
  assert W0f.shape == (8, 1)
  
  fs = np.genfromtxt('data/rotations_simple_fs.csv',
                      delimiter= ",").astype(np.int32)
  n = len(fs)-2    # number of layers
  u.check_equal(fs, [10,2,2,2])

  def f(i): return fs[i+1]  # W[i] has shape f[i] x f[i-1]
  dsize = X0.shape[1]
  assert f(-1) == dsize
  
  # load W0f and do shape checks (can remove)
  W0s = u.unflatten_np(W0f, fs[1:])  # Wf doesn't have first layer (data matrix)
  W0s.insert(0, X0)
  Wf_holder = tf.placeholder(dtype, shape=W0f.shape)
  Wf = tf.Variable(Wf_holder, name="Wf")
  Wf_copy = tf.Variable(Wf_holder, name="Wf_copy")
  init_dict = {Wf_holder: W0f}

  # Create W's
  W = u.unflatten(Wf, fs[1:])
  X = tf.constant(X0)
  Y = tf.constant(Y0)
  W.insert(0, X)
  for (numpy_W, tf_W) in zip(W0s, W):
    u.check_equal(numpy_W.shape, u.fix_shape(tf_W.shape))

  # Create A's
  # A[1] == X
  A = [0]*(n+2)
  A[0] = u.Identity(dsize)
  for i in range(n+1):
    A[i+1] = tf.matmul(W[i], A[i], name="A"+str(i+1))


  assert W[0].get_shape() == X0.shape
  assert A[n+1].get_shape() == X0.shape
  assert A[1].get_shape() == X0.shape

  err = Y - A[n+1]
  loss = tf.reduce_sum(tf.square(err))/(2*dsize)
  lr = tf.Variable(1.0, dtype=dtype)
  
  # Create B's
  B = [0]*(n+1)
  B[n] = -err/dsize
  for i in range(n-1, -1, -1):
    B[i] = t(W[i+1]) @ B[i+1]

  # create dW's
  dW = [0]*(n+1)
  for i in range(n+1):
    dW[i] = tf.matmul(B[i], tf.transpose(A[i]), name="dW"+str(i))
  del dW[0]  # get rid of W[0] update
  
  dWf = tf.concat([u.vec(dWi) for dWi in dW], axis=0)
  Wf_new = Wf - lr * dWf 

  train_op1 = Wf_copy.assign(Wf_new)
  train_op2 = Wf.assign(Wf_copy)

  sess = tf.Session()
  sess.run(tf.global_variables_initializer(), feed_dict=init_dict)
  
  expected_losses = np.loadtxt("data/rotations_simple_gradient_losses.csv",
                               delimiter= ",")
  observed_losses = []
  # from accompanying notebook
  # {0.0111498, 0.00694816, 0.00429464, 0.00248228, 0.00159361,
  #  0.000957424, 0.000651653, 0.000423802, 0.000306749, 0.00021772,
  for i in range(20):
    observed_losses.append(sess.run([loss])[0])
    sess.run(train_op1)
    sess.run(train_op2)

  u.check_equal(observed_losses, expected_losses)

Esempio n. 8

0

Mostra file