def __init__(self, topology, activations, session, checkpoint_base_path, dtype=tf.float32): n_input = topology[0] # Layers in network. L = len(topology) - 1 self.session = session self.L = L self.topology = topology self.checkpoint_base_path = checkpoint_base_path self.o_n = tf.placeholder(dtype, shape=[None, n_input], name='prog_nn_input_placeholder') self.W = [] self.b = [] self.h = [self.o_n] params = [] for k in range(L): shape = topology[k:k + 2] self.W.append( weight_variable(shape, name="weight_var_layer_" + str(k))) self.b.append( bias_variable([shape[1]], name="bias_var_layer_" + str(k))) self.h.append(activations[k](tf.matmul(self.h[-1], self.W[k]) + self.b[k])) params.append(self.W[-1]) params.append(self.b[-1]) self.pc = ParamCollection(self.session, params)
class InitialColumnProgNN(object): """ Descr: Initial network to train for later use transfer learning with a Progressive Neural Network. Args: topology - A list of number of units in each hidden dimension. First entry is input dimension. activations - A list of activation functions to use on the transforms. session - A TensorFlow session. Returns: None - attaches objects to class for InitialColumnProgNN.session.run() """ def __init__(self, topology, activations, session, checkpoint_base_path, dtype=tf.float32): n_input = topology[0] # Layers in network. L = len(topology) - 1 self.session = session self.L = L self.topology = topology self.checkpoint_base_path = checkpoint_base_path self.o_n = tf.placeholder(dtype, shape=[None, n_input], name='prog_nn_input_placeholder') self.W = [] self.b = [] self.h = [self.o_n] params = [] for k in range(L): shape = topology[k:k + 2] self.W.append( weight_variable(shape, name="weight_var_layer_" + str(k))) self.b.append( bias_variable([shape[1]], name="bias_var_layer_" + str(k))) self.h.append(activations[k](tf.matmul(self.h[-1], self.W[k]) + self.b[k])) params.append(self.W[-1]) params.append(self.b[-1]) self.pc = ParamCollection(self.session, params) def save(self, checkpoint_i): save_path = get_checkpoint_path(self.checkpoint_base_path, 0, checkpoint_i) current_params = self.pc.get_values_flat() np.save(save_path, current_params) def restore_weights(self, checkpoint_i): save_path = get_checkpoint_path(self.checkpoint_base_path, 0, checkpoint_i) saved_theta = np.load(save_path) self.pc.set_values_flat(saved_theta)
def __init__(self, n_actions): Serializable.__init__(self, n_actions) cgt.set_precision('double') n_in = 128 o_no = cgt.matrix("o_no",fixed_shape=(None,n_in)) a_n = cgt.vector("a_n",dtype='i8') q_n = cgt.vector("q_n") oldpdist_np = cgt.matrix("oldpdists") h0 = (o_no - 128.0)/128.0 nhid = 64 h1 = cgt.tanh(nn.Affine(128,nhid,weight_init=nn.IIDGaussian(std=.1))(h0)) probs_na = nn.softmax(nn.Affine(nhid,n_actions,weight_init=nn.IIDGaussian(std=0.01))(h1)) logprobs_na = cgt.log(probs_na) b = cgt.size(o_no, 0) logps_n = logprobs_na[cgt.arange(b), a_n] surr = (logps_n*q_n).mean() kl = (oldpdist_np * cgt.log(oldpdist_np/probs_na)).sum(axis=1).mean() params = nn.get_parameters(surr) gradsurr = cgt.grad(surr, params) flatgrad = cgt.concatenate([p.flatten() for p in gradsurr]) lam = cgt.scalar() penobj = surr - lam * kl self._f_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_n, q_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)])) self.f_pdist = cgt.function([o_no], probs_na) self.f_probs = cgt.function([o_no], probs_na) self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl]) self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad) self.pc = ParamCollection(params)
def __init__(self, obs_dim, ctrl_dim): cgt.set_precision('double') Serializable.__init__(self, obs_dim, ctrl_dim) self.obs_dim = obs_dim self.ctrl_dim = ctrl_dim o_no = cgt.matrix("o_no",fixed_shape=(None,obs_dim)) a_na = cgt.matrix("a_na",fixed_shape = (None, ctrl_dim)) adv_n = cgt.vector("adv_n") oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2*ctrl_dim)) self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a") std_1a = cgt.exp(logstd_1a) # Here's where we apply the network h0 = o_no nhid = 32 h1 = cgt.tanh(nn.Affine(obs_dim,nhid,weight_init=nn.IIDGaussian(std=0.1))(h0)) h2 = cgt.tanh(nn.Affine(nhid,nhid,weight_init=nn.IIDGaussian(std=0.1))(h1)) mean_na = nn.Affine(nhid,ctrl_dim,weight_init=nn.IIDGaussian(std=0.01))(h2) b = cgt.size(o_no, 0) std_na = cgt.repeat(std_1a, b, axis=0) oldmean_na = oldpdist_np[:, 0:self.ctrl_dim] oldstd_na = oldpdist_np[:, self.ctrl_dim:2*self.ctrl_dim] logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na ).sum(axis=1)) - logstd_1a.sum() oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na ).sum(axis=1)) - cgt.log(oldstd_na).sum(axis=1) ratio_n = cgt.exp(logp_n - oldlogp_n) surr = (ratio_n*adv_n).mean() pdists_np = cgt.concatenate([mean_na, std_na], axis=1) # kl = cgt.log(sigafter/) params = nn.get_parameters(surr) oldvar_na = cgt.square(oldstd_na) var_na = cgt.square(std_na) kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean() lam = cgt.scalar() penobj = surr - lam * kl self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self._compute_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_na, adv_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)])) self.f_pdist = cgt.function([o_no], pdists_np) self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self.pc = ParamCollection(params)
def __init__(self, topology, activations, session, prev_columns, dtype=tf.float64): n_input = topology[0] self.topology = topology self.session = session width = len(prev_columns) # Layers in network. First value is n_input, so it doesn't count. L = len(topology) - 1 self.L = L self.prev_columns = prev_columns # Doesn't work if the columns aren't the same height. assert all([self.L == x.L for x in prev_columns]) self.o_n = tf.placeholder(dtype, shape=[None, n_input]) self.W = [[]] * L self.b = [[]] * L self.U = [] for k in range(L - 1): self.U.append([[]] * width) self.h = [self.o_n] # Collect parameters to hand off to ParamCollection. params = [] for k in range(L): W_shape = topology[k:k + 2] self.W[k] = weight_variable(W_shape) self.b[k] = bias_variable([W_shape[1]]) if k == 0: self.h.append(activations[k](tf.matmul(self.h[-1], self.W[k]) + self.b[k])) params.append(self.W[k]) params.append(self.b[k]) continue preactivation = tf.matmul(self.h[-1], self.W[k]) + self.b[k] for kk in range(width): U_shape = [prev_columns[kk].topology[k], topology[k + 1]] # Remember len(self.U) == L - 1! self.U[k - 1][kk] = weight_variable(U_shape) # pprint(prev_columns[kk].h[k].get_shape().as_list()) # pprint(self.U[k-1][kk].get_shape().as_list()) preactivation += tf.matmul(prev_columns[kk].h[k], self.U[k - 1][kk]) self.h.append(activations[k](preactivation)) params.append(self.W[k]) params.append(self.b[k]) for kk in range(width): params.append(self.U[k - 1][kk]) self.pc = ParamCollection(self.session, params)
def __init__(self, n_actions): Serializable.__init__(self, n_actions) cgt.set_precision('double') n_in = 128 o_no = cgt.matrix("o_no", fixed_shape=(None, n_in)) a_n = cgt.vector("a_n", dtype='i8') q_n = cgt.vector("q_n") oldpdist_np = cgt.matrix("oldpdists") h0 = (o_no - 128.0) / 128.0 nhid = 64 h1 = cgt.tanh( nn.Affine(128, nhid, weight_init=nn.IIDGaussian(std=.1))(h0)) probs_na = nn.softmax( nn.Affine(nhid, n_actions, weight_init=nn.IIDGaussian(std=0.01))(h1)) logprobs_na = cgt.log(probs_na) b = cgt.size(o_no, 0) logps_n = logprobs_na[cgt.arange(b), a_n] surr = (logps_n * q_n).mean() kl = (oldpdist_np * cgt.log(oldpdist_np / probs_na)).sum(axis=1).mean() params = nn.get_parameters(surr) gradsurr = cgt.grad(surr, params) flatgrad = cgt.concatenate([p.flatten() for p in gradsurr]) lam = cgt.scalar() penobj = surr - lam * kl self._f_grad_lagrangian = cgt.function( [lam, oldpdist_np, o_no, a_n, q_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)])) self.f_pdist = cgt.function([o_no], probs_na) self.f_probs = cgt.function([o_no], probs_na) self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl]) self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad) self.pc = ParamCollection(params)
def __init__(self, topology, activations, session, dtype=tf.float64): n_input = topology[0] # Layers in network. L = len(topology) - 1 # n_layers except input layer self.session = session self.L = L self.topology = topology # list of layer output dims self.o_n = tf.placeholder(dtype,shape=[None, n_input]) # output of input layer self.W = [] # weights in each layer self.b =[] # biases in each layer self.h = [self.o_n] # activation output in each layer params = [] # store all Ws and bs, will be modified when W and b is trained i think for k in range(L): shape = topology[k:k+2] self.W.append(weight_variable(shape)) self.b.append(bias_variable([shape[1]])) self.h.append(activations[k](tf.matmul(self.h[-1], self.W[k]) + self.b[k])) params.append(self.W[-1]) params.append(self.b[-1]) self.pc = ParamCollection(self.session, params)
def __init__(self, topology, activations, session, dtype=tf.float64): n_input = topology[0] # Layers in network. L = len(topology) - 1 self.session = session self.L = L self.topology = topology self.o_n = tf.placeholder(dtype, shape=[None, n_input]) self.W = [] self.b = [] self.h = [self.o_n] params = [] for k in range(L): shape = topology[k:k + 2] self.W.append(weight_variable(shape)) self.b.append(bias_variable([shape[1]])) self.h.append(activations[k](tf.matmul(self.h[-1], self.W[k]) + self.b[k])) params.append(self.W[-1]) params.append(self.b[-1]) self.pc = ParamCollection(self.session, params)
def __init__(self, n_input, kernel, stride, activations, session, checkpoint_base_path, prev_columns, dtype=tf.float32): self.session = session self.width = len(prev_columns) # Layers in network. First value is n_input, so it doesn't count. L = 5 self.prev_columns = prev_columns self.checkpoint_base_path = checkpoint_base_path # Doesn't work if the columns aren't the same height #assert all([L == x.L for x in prev_columns]) self.o_n = tf.placeholder(dtype=tf.float32, shape=[None, n_input]) self.imageIn = tf.reshape(self.o_n, shape=[-1, 84, 84, 1]) self.W = [[]] * L self.b = [[]] * L self.U = [] self.V = [] self.a = [] for k in range(L - 1): self.U.append([[]] * self.width) self.V.append([[]] * self.width) self.a.append([[]] * self.width) self.h = [self.imageIn] #h[0] # Collect parameters to hand off to ParamCollection. params = [] padding = 'SAME' #first layer, not connected with previous layers self.W[0] = (weight_variable(kernel[0])) self.b[0] = (bias_variable([kernel[0][-1]])) conv = tf.nn.conv2d(self.h[-1], self.W[0], stride[0], padding) + self.b[0] self.h.append(activations(conv)) #h[1] params.append(self.W[0]) params.append(self.b[0]) #second layer self.W[1] = (weight_variable(kernel[1])) self.b[1] = (bias_variable([kernel[1][-1]])) preactivation = tf.nn.conv2d(self.h[-1], self.W[1], stride[1], padding) + self.b[1] for kk in range(self.width): self.a[0][kk] = adapters() ah = tf.multiply(self.a[0][kk], prev_columns[kk].h[1]) maps_in = ah.get_shape().as_list()[3] maps_out = int(maps_in / (2.0 * self.width)) self.V[0][kk] = weight_variable([1, 1, maps_in, maps_out]) lateral = tf.nn.conv2d(ah, self.V[0][kk], stride[2], padding) lateral = activations(lateral) self.U[0][kk] = weight_variable( [kernel[1][0], kernel[1][1], maps_out, kernel[1][3]]) preactivation1 = tf.nn.conv2d(lateral, self.U[0][kk], stride[1], padding) preactivation = preactivation + preactivation1 self.h.append(activations(preactivation)) params.append(self.W[1]) params.append(self.b[1]) for kk in range(self.width): params.append(self.U[0][kk]) params.append(self.V[0][kk]) params.append(self.a[0][kk]) self.h.append(tf.layers.flatten(self.h[-1])) #h[3] #fully connected layer self.W[2] = (weight_variable(kernel[-1])) self.b[2] = (bias_variable([kernel[-1][-1]])) fc = tf.matmul(self.h[-1], self.W[2]) + self.b[2] for kk in range(self.width): self.a[1][kk] = adapters() ah = tf.multiply(self.a[1][kk], prev_columns[kk].h[2]) maps_in = ah.get_shape().as_list()[3] maps_out = int(maps_in / (2.0 * self.width)) self.V[1][kk] = weight_variable([1, 1, maps_in, maps_out]) lateral = tf.nn.conv2d(ah, self.V[1][kk], stride[2], padding) lateral = activations(lateral) #lateral = tf.reshape(lateral,[-1,kernel[-1][-1]]) lateral = tf.layers.flatten(lateral) self.U[1][kk] = weight_variable( [lateral.get_shape().as_list()[-1], kernel[-1][-1]]) fc += tf.matmul(lateral, self.U[1][kk]) self.h.append(activations(fc)) #h[4] params.append(self.W[2]) params.append(self.b[2]) for kk in range(self.width): params.append(self.U[1][kk]) params.append(self.V[1][kk]) params.append(self.a[1][kk]) #calculate value self.W[3] = (weight_variable([256, 1])) self.b[3] = (bias_variable([1])) self.value = tf.matmul(self.h[-1], self.W[3]) + self.b[3] for kk in range(self.width): self.a[2][kk] = adapters() ah = tf.multiply(self.a[2][kk], prev_columns[kk].h[4]) maps_in = ah.get_shape().as_list()[1] maps_out = int(maps_in / (2.0 * self.width)) self.V[2][kk] = weight_variable([maps_in, maps_out]) lateral = tf.matmul(ah, self.V[2][kk]) lateral = activations(lateral) self.U[2][kk] = weight_variable([maps_out, 1]) self.value += tf.matmul(lateral, self.U[2][kk]) params.append(self.W[3]) params.append(self.b[3]) for kk in range(self.width): params.append(self.U[2][kk]) params.append(self.V[2][kk]) params.append(self.a[2][kk]) #calculate policy self.W[4] = (weight_variable([256, 6])) self.b[4] = (bias_variable([6])) fc = tf.matmul(self.h[-1], self.W[4]) + self.b[4] for kk in range(self.width): self.a[3][kk] = adapters() ah = tf.multiply(self.a[3][kk], prev_columns[kk].h[4]) maps_in = ah.get_shape().as_list()[1] maps_out = int(maps_in / (2.0 * self.width)) self.V[3][kk] = weight_variable([maps_in, maps_out]) lateral = tf.matmul(ah, self.V[3][kk]) lateral = activations(lateral) self.U[3][kk] = weight_variable([maps_out, 6]) fc += tf.matmul(lateral, self.U[3][kk]) self.policy = tf.nn.softmax(fc) params.append(self.W[4]) params.append(self.b[4]) for kk in range(self.width): params.append(self.U[3][kk]) params.append(self.V[3][kk]) params.append(self.a[3][kk]) self.pc = ParamCollection(self.session, params)
class ExtensibleColumnProgNN(object): """ Descr: An extensible network column for use in transfer learning with a Progressive Neural Network. Args: n_input - The array length which the input image is flattened to kernel - A list of kernel size for each layer activations - A list of activation functions to use on the transforms. session - A TensorFlow session. checkpoing_base_path - Save path. prev_columns - Previously trained columns, either Initial or Extensible, we are going to create lateral connections to for the current column. Returns: None - attaches objects to class for ExtensibleColumnProgNN.session.run() """ def __init__(self, n_input, kernel, stride, activations, session, checkpoint_base_path, prev_columns, dtype=tf.float32): self.session = session self.width = len(prev_columns) # Layers in network. First value is n_input, so it doesn't count. L = 5 self.prev_columns = prev_columns self.checkpoint_base_path = checkpoint_base_path # Doesn't work if the columns aren't the same height #assert all([L == x.L for x in prev_columns]) self.o_n = tf.placeholder(dtype=tf.float32, shape=[None, n_input]) self.imageIn = tf.reshape(self.o_n, shape=[-1, 84, 84, 1]) self.W = [[]] * L self.b = [[]] * L self.U = [] self.V = [] self.a = [] for k in range(L - 1): self.U.append([[]] * self.width) self.V.append([[]] * self.width) self.a.append([[]] * self.width) self.h = [self.imageIn] #h[0] # Collect parameters to hand off to ParamCollection. params = [] padding = 'SAME' #first layer, not connected with previous layers self.W[0] = (weight_variable(kernel[0])) self.b[0] = (bias_variable([kernel[0][-1]])) conv = tf.nn.conv2d(self.h[-1], self.W[0], stride[0], padding) + self.b[0] self.h.append(activations(conv)) #h[1] params.append(self.W[0]) params.append(self.b[0]) #second layer self.W[1] = (weight_variable(kernel[1])) self.b[1] = (bias_variable([kernel[1][-1]])) preactivation = tf.nn.conv2d(self.h[-1], self.W[1], stride[1], padding) + self.b[1] for kk in range(self.width): self.a[0][kk] = adapters() ah = tf.multiply(self.a[0][kk], prev_columns[kk].h[1]) maps_in = ah.get_shape().as_list()[3] maps_out = int(maps_in / (2.0 * self.width)) self.V[0][kk] = weight_variable([1, 1, maps_in, maps_out]) lateral = tf.nn.conv2d(ah, self.V[0][kk], stride[2], padding) lateral = activations(lateral) self.U[0][kk] = weight_variable( [kernel[1][0], kernel[1][1], maps_out, kernel[1][3]]) preactivation1 = tf.nn.conv2d(lateral, self.U[0][kk], stride[1], padding) preactivation = preactivation + preactivation1 self.h.append(activations(preactivation)) params.append(self.W[1]) params.append(self.b[1]) for kk in range(self.width): params.append(self.U[0][kk]) params.append(self.V[0][kk]) params.append(self.a[0][kk]) self.h.append(tf.layers.flatten(self.h[-1])) #h[3] #fully connected layer self.W[2] = (weight_variable(kernel[-1])) self.b[2] = (bias_variable([kernel[-1][-1]])) fc = tf.matmul(self.h[-1], self.W[2]) + self.b[2] for kk in range(self.width): self.a[1][kk] = adapters() ah = tf.multiply(self.a[1][kk], prev_columns[kk].h[2]) maps_in = ah.get_shape().as_list()[3] maps_out = int(maps_in / (2.0 * self.width)) self.V[1][kk] = weight_variable([1, 1, maps_in, maps_out]) lateral = tf.nn.conv2d(ah, self.V[1][kk], stride[2], padding) lateral = activations(lateral) #lateral = tf.reshape(lateral,[-1,kernel[-1][-1]]) lateral = tf.layers.flatten(lateral) self.U[1][kk] = weight_variable( [lateral.get_shape().as_list()[-1], kernel[-1][-1]]) fc += tf.matmul(lateral, self.U[1][kk]) self.h.append(activations(fc)) #h[4] params.append(self.W[2]) params.append(self.b[2]) for kk in range(self.width): params.append(self.U[1][kk]) params.append(self.V[1][kk]) params.append(self.a[1][kk]) #calculate value self.W[3] = (weight_variable([256, 1])) self.b[3] = (bias_variable([1])) self.value = tf.matmul(self.h[-1], self.W[3]) + self.b[3] for kk in range(self.width): self.a[2][kk] = adapters() ah = tf.multiply(self.a[2][kk], prev_columns[kk].h[4]) maps_in = ah.get_shape().as_list()[1] maps_out = int(maps_in / (2.0 * self.width)) self.V[2][kk] = weight_variable([maps_in, maps_out]) lateral = tf.matmul(ah, self.V[2][kk]) lateral = activations(lateral) self.U[2][kk] = weight_variable([maps_out, 1]) self.value += tf.matmul(lateral, self.U[2][kk]) params.append(self.W[3]) params.append(self.b[3]) for kk in range(self.width): params.append(self.U[2][kk]) params.append(self.V[2][kk]) params.append(self.a[2][kk]) #calculate policy self.W[4] = (weight_variable([256, 6])) self.b[4] = (bias_variable([6])) fc = tf.matmul(self.h[-1], self.W[4]) + self.b[4] for kk in range(self.width): self.a[3][kk] = adapters() ah = tf.multiply(self.a[3][kk], prev_columns[kk].h[4]) maps_in = ah.get_shape().as_list()[1] maps_out = int(maps_in / (2.0 * self.width)) self.V[3][kk] = weight_variable([maps_in, maps_out]) lateral = tf.matmul(ah, self.V[3][kk]) lateral = activations(lateral) self.U[3][kk] = weight_variable([maps_out, 6]) fc += tf.matmul(lateral, self.U[3][kk]) self.policy = tf.nn.softmax(fc) params.append(self.W[4]) params.append(self.b[4]) for kk in range(self.width): params.append(self.U[3][kk]) params.append(self.V[3][kk]) params.append(self.a[3][kk]) self.pc = ParamCollection(self.session, params) def add_input_to_feed_dict(self, feed_dict, input_batch): for col in self.prev_columns: feed_dict[col.o_n] = input_batch feed_dict[self.o_n] = input_batch return feed_dict def save(self, checkpoint_i): self.save_path, file_name = get_checkpoint_path( self.checkpoint_base_path, self.width, checkpoint_i) current_params = self.pc.get_values_flat() np.save(file_name, current_params) def restore_weights(self, checkpoint_i): self.save_path, file_name = get_checkpoint_path( self.checkpoint_base_path, self.width, checkpoint_i) saved_theta = np.load(file_name) self.pc.set_values_flat(saved_theta)
def main(): nr.seed(0) parser = argparse.ArgumentParser() parser.add_argument("--data_dir", type=str, default="alice") parser.add_argument("--size_mem", type=int,default=64) parser.add_argument("--size_batch", type=int,default=64) parser.add_argument("--n_layers",type=int,default=2) parser.add_argument("--n_unroll",type=int,default=16) parser.add_argument("--k_in",type=int,default=3) parser.add_argument("--k_h",type=int,default=5) parser.add_argument("--step_size",type=float,default=.01) parser.add_argument("--decay_rate",type=float,default=0.95) parser.add_argument("--n_epochs",type=int,default=20) parser.add_argument("--arch",choices=["lstm","gru"],default="gru") parser.add_argument("--grad_check",action="store_true") parser.add_argument("--profile",action="store_true") parser.add_argument("--unittest",action="store_true") args = parser.parse_args() cgt.set_precision("quad" if args.grad_check else "single") assert args.n_unroll > 1 loader = Loader(args.data_dir,args.size_batch, args.n_unroll, (.8,.1,.1)) network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step(args.arch, loader.size_vocab, loader.size_vocab, args.size_mem, args.size_batch, args.n_layers, args.n_unroll, args.k_in, args.k_h) if args.profile: profiler.start() params = network.get_parameters() pc = ParamCollection(params) pc.set_value_flat(nr.uniform(-0.01, 0.01, size=(pc.get_total_size(),))) for i, param in enumerate(pc.params): if "is_rotation" in param.props: shape = pc.get_shapes()[i] num_vec = int(shape[0] / 2) size_vec = int(shape[1]) gauss = nr.normal(size=(num_vec * size_vec)) gauss = np.reshape(gauss, (num_vec, size_vec)) gauss_mag = norm(gauss, axis=1, keepdims=True) gauss_normed = gauss / gauss_mag gauss_perturb = nr.normal(scale=0.01, size=(num_vec * size_vec)) gauss_perturb = np.reshape(gauss_perturb, (num_vec, size_vec)) second_vec = gauss_normed + gauss_perturb second_vec_mag = norm(second_vec, axis=1, keepdims=True) second_vec_normed = second_vec / second_vec_mag new_param_value = np.zeros(shape) for j in xrange(num_vec): new_param_value[2 * j, :] = gauss_normed[j, :] new_param_value[2 * j + 1, :] = second_vec_normed[j, :] param.op.set_value(new_param_value) #print new_param_value def initialize_hiddens(n): return [np.ones((n, args.size_mem), cgt.floatX) / float(args.size_mem) for _ in xrange(get_num_hiddens(args.arch, args.n_layers))] if args.grad_check: #if True: x,y = loader.train_batches_iter().next() prev_hiddens = initialize_hiddens(args.size_batch) def f(thnew): thold = pc.get_value_flat() pc.set_value_flat(thnew) loss = f_loss(x,y, *prev_hiddens) pc.set_value_flat(thold) return loss from cgt.numeric_diff import numeric_grad print "Beginning grad check" g_num = numeric_grad(f, pc.get_value_flat(),eps=1e-10) print "Ending grad check" result = f_loss_and_grad(x,y,*prev_hiddens) g_anal = result[1] diff = g_num - g_anal abs_diff = np.abs(diff) print np.where(abs_diff > 1e-4) print diff[np.where(abs_diff > 1e-4)] embed() assert np.allclose(g_num, g_anal, atol=1e-4) print "Gradient check succeeded!" return optim_state = make_rmsprop_state(theta=pc.get_value_flat(), step_size = args.step_size, decay_rate = args.decay_rate) for iepoch in xrange(args.n_epochs): losses = [] tstart = time() print "starting epoch",iepoch cur_hiddens = initialize_hiddens(args.size_batch) for (x,y) in loader.train_batches_iter(): out = f_loss_and_grad(x,y, *cur_hiddens) loss = out[0] grad = out[1] cur_hiddens = out[2:] rmsprop_update(grad, optim_state) pc.set_value_flat(optim_state.theta) losses.append(loss) if args.unittest: return print "%.3f s/batch. avg loss = %.3f"%((time()-tstart)/len(losses), np.mean(losses)) optim_state.step_size *= .98 #pylint: disable=E1101 sample(f_step, initialize_hiddens(1), char2ind=loader.char2ind, n_steps=300, temp=1.0, seed_text = "") if args.profile: profiler.print_stats()
class AtariRAMPolicy(PPOPolicy, Serializable): def __init__(self, n_actions): Serializable.__init__(self, n_actions) cgt.set_precision('double') n_in = 128 o_no = cgt.matrix("o_no", fixed_shape=(None, n_in)) a_n = cgt.vector("a_n", dtype='i8') q_n = cgt.vector("q_n") oldpdist_np = cgt.matrix("oldpdists") h0 = (o_no - 128.0) / 128.0 nhid = 64 h1 = cgt.tanh( nn.Affine(128, nhid, weight_init=nn.IIDGaussian(std=.1))(h0)) probs_na = nn.softmax( nn.Affine(nhid, n_actions, weight_init=nn.IIDGaussian(std=0.01))(h1)) logprobs_na = cgt.log(probs_na) b = cgt.size(o_no, 0) logps_n = logprobs_na[cgt.arange(b), a_n] surr = (logps_n * q_n).mean() kl = (oldpdist_np * cgt.log(oldpdist_np / probs_na)).sum(axis=1).mean() params = nn.get_parameters(surr) gradsurr = cgt.grad(surr, params) flatgrad = cgt.concatenate([p.flatten() for p in gradsurr]) lam = cgt.scalar() penobj = surr - lam * kl self._f_grad_lagrangian = cgt.function( [lam, oldpdist_np, o_no, a_n, q_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)])) self.f_pdist = cgt.function([o_no], probs_na) self.f_probs = cgt.function([o_no], probs_na) self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl]) self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad) self.pc = ParamCollection(params) def step(self, X): pdist_na = self.f_probs(X) acts_n = cat_sample(pdist_na) return {"action": acts_n, "pdist": pdist_na} def compute_gradient(self, pdist_np, o_no, a_n, q_n): return self.f_gradlogp(pdist_np, o_no, a_n, q_n) def compute_surr_kl(self, pdist_np, o_no, a_n, q_n): return self.f_surr_kl(pdist_np, o_no, a_n, q_n) def compute_grad_lagrangian(self, lam, pdist_np, o_no, a_n, q_n): return self._f_grad_lagrangian(lam, pdist_np, o_no, a_n, q_n) def compute_entropy(self, pdist_np): return cat_entropy(pdist_np) def get_parameters_flat(self): return self.pc.get_value_flat() def set_parameters_flat(self, th): return self.pc.set_value_flat(th)
def main(): nr.seed(0) parser = argparse.ArgumentParser() parser.add_argument("--data_dir", type=str, default="alice") parser.add_argument("--size_mem", type=int, default=64) parser.add_argument("--size_batch", type=int, default=64) parser.add_argument("--n_layers", type=int, default=2) parser.add_argument("--n_unroll", type=int, default=16) parser.add_argument("--k_in", type=int, default=3) parser.add_argument("--k_h", type=int, default=5) parser.add_argument("--step_size", type=float, default=.01) parser.add_argument("--decay_rate", type=float, default=0.95) parser.add_argument("--n_epochs", type=int, default=20) parser.add_argument("--arch", choices=["lstm", "gru"], default="gru") parser.add_argument("--grad_check", action="store_true") parser.add_argument("--profile", action="store_true") parser.add_argument("--unittest", action="store_true") args = parser.parse_args() cgt.set_precision("quad" if args.grad_check else "single") assert args.n_unroll > 1 loader = Loader(args.data_dir, args.size_batch, args.n_unroll, (.8, .1, .1)) network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step( args.arch, loader.size_vocab, loader.size_vocab, args.size_mem, args.size_batch, args.n_layers, args.n_unroll, args.k_in, args.k_h) if args.profile: profiler.start() params = network.get_parameters() pc = ParamCollection(params) pc.set_value_flat(nr.uniform(-0.01, 0.01, size=(pc.get_total_size(), ))) for i, param in enumerate(pc.params): if "is_rotation" in param.props: shape = pc.get_shapes()[i] num_vec = int(shape[0] / 2) size_vec = int(shape[1]) gauss = nr.normal(size=(num_vec * size_vec)) gauss = np.reshape(gauss, (num_vec, size_vec)) gauss_mag = norm(gauss, axis=1, keepdims=True) gauss_normed = gauss / gauss_mag gauss_perturb = nr.normal(scale=0.01, size=(num_vec * size_vec)) gauss_perturb = np.reshape(gauss_perturb, (num_vec, size_vec)) second_vec = gauss_normed + gauss_perturb second_vec_mag = norm(second_vec, axis=1, keepdims=True) second_vec_normed = second_vec / second_vec_mag new_param_value = np.zeros(shape) for j in xrange(num_vec): new_param_value[2 * j, :] = gauss_normed[j, :] new_param_value[2 * j + 1, :] = second_vec_normed[j, :] param.op.set_value(new_param_value) #print new_param_value def initialize_hiddens(n): return [ np.ones((n, args.size_mem), cgt.floatX) / float(args.size_mem) for _ in xrange(get_num_hiddens(args.arch, args.n_layers)) ] if args.grad_check: #if True: x, y = loader.train_batches_iter().next() prev_hiddens = initialize_hiddens(args.size_batch) def f(thnew): thold = pc.get_value_flat() pc.set_value_flat(thnew) loss = f_loss(x, y, *prev_hiddens) pc.set_value_flat(thold) return loss from cgt.numeric_diff import numeric_grad print "Beginning grad check" g_num = numeric_grad(f, pc.get_value_flat(), eps=1e-10) print "Ending grad check" result = f_loss_and_grad(x, y, *prev_hiddens) g_anal = result[1] diff = g_num - g_anal abs_diff = np.abs(diff) print np.where(abs_diff > 1e-4) print diff[np.where(abs_diff > 1e-4)] embed() assert np.allclose(g_num, g_anal, atol=1e-4) print "Gradient check succeeded!" return optim_state = make_rmsprop_state(theta=pc.get_value_flat(), step_size=args.step_size, decay_rate=args.decay_rate) for iepoch in xrange(args.n_epochs): losses = [] tstart = time() print "starting epoch", iepoch cur_hiddens = initialize_hiddens(args.size_batch) for (x, y) in loader.train_batches_iter(): out = f_loss_and_grad(x, y, *cur_hiddens) loss = out[0] grad = out[1] cur_hiddens = out[2:] rmsprop_update(grad, optim_state) pc.set_value_flat(optim_state.theta) losses.append(loss) if args.unittest: return print "%.3f s/batch. avg loss = %.3f" % ( (time() - tstart) / len(losses), np.mean(losses)) optim_state.step_size *= .98 #pylint: disable=E1101 sample(f_step, initialize_hiddens(1), char2ind=loader.char2ind, n_steps=300, temp=1.0, seed_text="") if args.profile: profiler.print_stats()
def __init__(self, session, n_actions, ih, iw, nin): """ Method: __init__(self, session, n_actions, ih, iw, nin) Args: self -- standard method session -- a TensorFlow session. n_actions -- the dimension of the action space, assumed to be discrete because we're playing Atari. ih -- image height iw -- image width nin -- input channels in image, typically 3 for rgb_array. Returns: None -- defines model from images to actions for class Policy. """ self.session = session self.n_actions = n_actions self.img_no = tf.placeholder(tf.float32, shape=[None, ih, iw, nin]) self.a_n = tf.placeholder(tf.int32, shape=[None]) self.q_n = tf.placeholder(tf.float32, shape=[None]) self.oldpdist_np = tf.placeholder(tf.float32, shape=[None, n_actions]) self.keep_prob = tf.placeholder(tf.float32) self.lam = tf.placeholder(tf.float32) self.n_batch = tf.shape(self.img_no)[0] mu, var = tf.nn.moments(self.img_no,axes=[0,1,2,3]) normed_img = tf.nn.batch_normalization( self.img_no, mu, var, None, None,1e-6) with tf.variable_scope("conv1"): relu1 = conv_relu(normed_img, [5,5,nin,24],[24], stride=2) with tf.variable_scope("conv2"): relu2 = conv_relu(relu1, [5,5,24,36], [36], stride=2) with tf.variable_scope("conv3"): relu3 = conv_relu(relu2, [3,3,36,64], [64], stride=2) with tf.variable_scope("conv4"): relu4 = conv_relu(relu3, [5,5,64,64], [64], stride=2) with tf.variable_scope("avgpool1"): avgpool1 = tf.nn.avg_pool(relu4, [1, 5, 5, 1], strides=[1,1,1,1], padding='VALID') avgpool1_shape = avgpool1.get_shape().as_list() avgpool1_flat_n = np.prod(avgpool1_shape[1:]) avgpool1_flat = tf.reshape(avgpool1, [self.n_batch, avgpool1_flat_n]) with tf.variable_scope("fc1"): fc1 = fc_relu(avgpool1_flat, avgpool1_flat_n, 1164) fc1_dropout = tf.nn.dropout(fc1, self.keep_prob) with tf.variable_scope("fc2"): fc2 = fc_relu(fc1_dropout, 1164, 512) fc2_dropout = tf.nn.dropout(fc2, self.keep_prob) with tf.variable_scope("fc3"): fc3 = fc_relu(fc2_dropout, 512, 128) fc3_dropout = tf.nn.dropout(fc3, self.keep_prob) with tf.variable_scope("fc4"): fc4 = fc_relu(fc3_dropout, 128, 64) fc4_dropout = tf.nn.dropout(fc4, self.keep_prob) with tf.variable_scope("probs_na"): weights = tf.get_variable("weights", [64, n_actions], initializer=tf.random_normal_initializer()) biases = tf.get_variable("biases", [n_actions], initializer=tf.constant_initializer(0.0)) self.probs_na = tf.nn.softmax(tf.matmul(fc4_dropout, weights) \ + biases) self.pred_action = tf.argmax(self.probs_na, 1) logprobs_na = tf.log(self.probs_na) idx_flattened = tf.range(0,self.n_batch) * n_actions + self.a_n logps_n = tf.gather(tf.reshape(logprobs_na, [-1]), idx_flattened) self.surr = tf.reduce_mean(tf.mul(logps_n, self.q_n)) params = tf.trainable_variables() self.surr_grads = tf.gradients(self.surr, params) self.kl = tf.reduce_mean( tf.reduce_sum( tf.mul(self.oldpdist_np, tf.log(tf.div(self.oldpdist_np, self.probs_na))), 1 ) ) penobj = tf.sub(self.surr, tf.mul(self.lam, self.kl)) self.pc = ParamCollection(self.session, params)
class AtariRAMPolicy(PPOPolicy, Serializable): def __init__(self, n_actions): Serializable.__init__(self, n_actions) cgt.set_precision('double') n_in = 128 o_no = cgt.matrix("o_no",fixed_shape=(None,n_in)) a_n = cgt.vector("a_n",dtype='i8') q_n = cgt.vector("q_n") oldpdist_np = cgt.matrix("oldpdists") nhid, nhid2 = 64, 64 h0 = (o_no - 128.0)/128.0 d0 = nn.dropout(h1, .2) h1 = nn.rectify(nn.Affine(128,nhid,weight_init=nn.IIDGaussian(std=.1))(d0)) d1 = nn.dropout(h1, .2) h2 = nn.rectify(nn.Affine(nhid,nhid2,weight_init=nn.IIDGaussian(std=.1))(d1)) # d2 = nn.dropout(h2, .2) probs_na = nn.softmax(nn.Affine(nhid2,n_actions,weight_init=nn.IIDGaussian(std=0.01))(d2)) logprobs_na = cgt.log(probs_na) b = cgt.size(o_no, 0) logps_n = logprobs_na[cgt.arange(b), a_n] surr = (logps_n*q_n).mean() kl = (oldpdist_np * cgt.log(oldpdist_np/probs_na)).sum(axis=1).mean() params = nn.get_parameters(surr) gradsurr = cgt.grad(surr, params) flatgrad = cgt.concatenate([p.flatten() for p in gradsurr]) lam = cgt.scalar() penobj = surr - lam * kl self._f_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_n, q_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)])) self.f_pdist = cgt.function([o_no], probs_na) self.f_probs = cgt.function([o_no], probs_na) self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl]) self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad) self.pc = ParamCollection(params) def step(self, X): pdist_na = self.f_probs(X) acts_n = cat_sample(pdist_na) return { "action" : acts_n, "pdist" : pdist_na } def compute_gradient(self, pdist_np, o_no, a_n, q_n): return self.f_gradlogp(pdist_np, o_no, a_n, q_n) def compute_surr_kl(self, pdist_np, o_no, a_n, q_n): return self.f_surr_kl(pdist_np, o_no, a_n, q_n) def compute_grad_lagrangian(self, lam, pdist_np, o_no, a_n, q_n): return self._f_grad_lagrangian(lam, pdist_np, o_no, a_n, q_n) def compute_entropy(self, pdist_np): return cat_entropy(pdist_np) def get_parameters_flat(self): return self.pc.get_value_flat() def set_parameters_flat(self,th): return self.pc.set_value_flat(th)
def __init__(self, topology, activations, session, checkpoint_base_path, prev_columns, dtype=tf.float32): n_input = topology[0] self.topology = topology self.session = session width = len(prev_columns) # Layers in network. First value is n_input, so it doesn't count. L = len(topology) - 1 self.L = L self.prev_columns = prev_columns self.checkpoint_base_path = checkpoint_base_path self.column_number = width # Doesn't work if the columns aren't the same height. assert all([self.L == x.L for x in prev_columns]) self.o_n = tf.placeholder(dtype, shape=[None, n_input], name='prog_nn_input_placeholder') self.W = [[]] * L self.b = [[]] * L self.U = [] for k in range(L - 1): self.U.append([[]] * width) self.h = [self.o_n] # Collect parameters to hand off to ParamCollection. params = [] for k in range(L): W_shape = topology[k:k + 2] self.W[k] = weight_variable(W_shape, name="weight_var_layer_" + str(k)) self.b[k] = bias_variable([W_shape[1]], name="bias_var_layer_" + str(k)) if k == 0: if activations[k] is None: self.h.append(tf.matmul(self.h[-1], self.W[k]) + self.b[k]) else: self.h.append( activations[k](tf.matmul(self.h[-1], self.W[k]) + self.b[k])) params.append(self.W[k]) params.append(self.b[k]) continue preactivation = tf.matmul(self.h[-1], self.W[k]) + self.b[k] for kk in range(width): U_shape = [prev_columns[kk].topology[k], topology[k + 1]] # Remember len(self.U) == L - 1! self.U[k - 1][kk] = weight_variable( U_shape, name="lateral_weight_var_layer_" + str(k) + "_to_column_" + str(kk)) # pprint(prev_columns[kk].h[k].get_shape().as_list()) # pprint(self.U[k-1][kk].get_shape().as_list()) ##### -------- += adding preactivations with a U tranform from previous ##### ------------------- layer preactivation += tf.matmul(prev_columns[kk].h[k], self.U[k - 1][kk]) if activations[k] is None: self.h.append(preactivation) else: self.h.append(activations[k](preactivation)) params.append(self.W[k]) params.append(self.b[k]) for kk in range(width): params.append(self.U[k - 1][kk]) return h[-1] self.pc = ParamCollection(self.session, params)
def train(args, X, Y, dbg_iter=None, dbg_epoch=None, dbg_done=None): dbg_out = [] net_in, net_out = hybrid_network(args.num_inputs, args.num_outputs, args.num_units, args.num_sto, dbg_out=dbg_out) params, f_step, f_loss, f_grad, f_surr = \ make_funcs(net_in, net_out, args, dbg_out=dbg_out) param_col = ParamCollection(params) init_params = nn.init_array(args.init_conf, (param_col.get_total_size(), 1)) param_col.set_value_flat(init_params.flatten()) init_params = [ np.array([[0., 1.]]), # W_1 np.array([[0., 0.]]), # b_1 np.array([[1.], [1.]]), # W_3 np.array([[0.]]), # b_3 ] param_col.set_values(init_params) if 'snapshot' in args: print "Loading params from previous snapshot" snapshot = pickle.load(open(args['snapshot'], 'r')) param_col.set_values(snapshot) # param_col.set_value_flat( # np.random.normal(0., 1.,size=param_col.get_total_size()) # ) # optim_state = Table(theta=param_col.get_value_flat(), # scratch=param_col.get_value_flat(), # step_size=args.step_size # ) optim_state = make_rmsprop_state(theta=param_col.get_value_flat(), step_size=args.step_size, decay_rate=args.decay_rate) for i_epoch in range(args.n_epochs): for i_iter in range(X.shape[0]): ind = np.random.choice(X.shape[0], args['size_batch']) x, y = X[ind], Y[ind] # not sure this works for multi-dim info = f_surr(x, y, num_samples=args['size_sample']) loss, loss_surr, grad = info['loss'], info['surr_loss'], info[ 'surr_grad'] # loss, loss_surr, grad = f_grad(x, y) # update rmsprop_update(param_col.flatten_values(grad), optim_state) # optim_state.scratch = param_col.flatten_values(grad) # optim_state.theta -= optim_state.step_size * optim_state.scratch param_col.set_value_flat(optim_state.theta) print param_col.get_value_flat() if dbg_iter: dbg_iter(i_epoch, i_iter, param_col, optim_state, info) if dbg_epoch: dbg_epoch(i_epoch, param_col, f_surr) if dbg_done: dbg_done(param_col, optim_state, f_surr) return optim_state
class InitialColumnProgNN(object): """ Descr: Initial network to train for later use transfer learning with a Progressive Neural Network. Args: n_input - The array length which the input image is flattened to kernel - A list of kernel size for each layer activations - A list of activation functions to use on the transforms. session - A TensorFlow session. checkpoing_base_path - Save path. Returns: None - attaches objects to class for InitialColumnProgNN.session.run() """ #todo:add name to enery tensor def __init__(self, n_input, kernel, stride, activations, session, checkpoint_base_path, dtype=tf.float32): # Layers in network. self.session = session #self.L = len(topology) #self.topology = topology self.o_n = tf.placeholder(dtype=tf.float32, shape=[None, n_input]) self.imageIn = tf.reshape(self.o_n, shape=[-1, 84, 84, 1]) self.checkpoint_base_path = checkpoint_base_path self.W = [] self.b = [] self.h = [self.imageIn] params = [] padding = 'SAME' #The first two layers for k in range(2): #When training on second column, if the previous weights need to be frozen, set initial=True, # the variables are set as not trainable. self.W.append(weight_variable(kernel[k], initial=None)) self.b.append(bias_variable([kernel[k][-1]], initial=None)) conv = tf.nn.conv2d(self.h[-1], self.W[k], stride[k], padding) + self.b[k] self.h.append(activations(conv)) params.append(self.W[k]) params.append(self.b[k]) self.h.append(tf.layers.flatten(self.h[-1])) #fully connected layer self.W.append(weight_variable(kernel[-1], initial=None)) self.b.append(bias_variable([kernel[-1][-1]], initial=None)) fc = tf.matmul(self.h[-1], self.W[-1]) + self.b[-1] self.h.append(activations(fc)) params.append(self.W[-1]) params.append(self.b[-1]) #Calculate value self.W.append(weight_variable([256, 1], initial=None)) self.b.append(bias_variable([1], initial=None)) self.value = tf.matmul(self.h[-1], self.W[-1]) + self.b[-1] params.append(self.W[-1]) params.append(self.b[-1]) #Calculate policy self.W.append(weight_variable([256, 6], initial=None)) self.b.append(bias_variable([6], initial=None)) fc = tf.matmul(self.h[-1], self.W[-1]) + self.b[-1] self.policy = tf.nn.softmax(fc) params.append(self.W[-1]) params.append(self.b[-1]) self.pc = ParamCollection(self.session, params) def add_input_to_feed_dict(self, feed_dict, input_batch): feed_dict[self.o_n] = input_batch return feed_dict def save(self, checkpoint_i): self.save_path, file_name = get_checkpoint_path( self.checkpoint_base_path, 0, checkpoint_i) current_params = self.pc.get_values_flat() np.save(file_name, current_params) def restore_weights(self, checkpoint_i): self.save_path, file_name = get_checkpoint_path( self.checkpoint_base_path, 0, checkpoint_i) saved_theta = np.load(file_name) self.pc.set_values_flat(saved_theta)
def __init__(self, n_input, kernel, stride, activations, session, checkpoint_base_path, dtype=tf.float32): # Layers in network. self.session = session #self.L = len(topology) #self.topology = topology self.o_n = tf.placeholder(dtype=tf.float32, shape=[None, n_input]) self.imageIn = tf.reshape(self.o_n, shape=[-1, 84, 84, 1]) self.checkpoint_base_path = checkpoint_base_path self.W = [] self.b = [] self.h = [self.imageIn] params = [] padding = 'SAME' #The first two layers for k in range(2): #When training on second column, if the previous weights need to be frozen, set initial=True, # the variables are set as not trainable. self.W.append(weight_variable(kernel[k], initial=None)) self.b.append(bias_variable([kernel[k][-1]], initial=None)) conv = tf.nn.conv2d(self.h[-1], self.W[k], stride[k], padding) + self.b[k] self.h.append(activations(conv)) params.append(self.W[k]) params.append(self.b[k]) self.h.append(tf.layers.flatten(self.h[-1])) #fully connected layer self.W.append(weight_variable(kernel[-1], initial=None)) self.b.append(bias_variable([kernel[-1][-1]], initial=None)) fc = tf.matmul(self.h[-1], self.W[-1]) + self.b[-1] self.h.append(activations(fc)) params.append(self.W[-1]) params.append(self.b[-1]) #Calculate value self.W.append(weight_variable([256, 1], initial=None)) self.b.append(bias_variable([1], initial=None)) self.value = tf.matmul(self.h[-1], self.W[-1]) + self.b[-1] params.append(self.W[-1]) params.append(self.b[-1]) #Calculate policy self.W.append(weight_variable([256, 6], initial=None)) self.b.append(bias_variable([6], initial=None)) fc = tf.matmul(self.h[-1], self.W[-1]) + self.b[-1] self.policy = tf.nn.softmax(fc) params.append(self.W[-1]) params.append(self.b[-1]) self.pc = ParamCollection(self.session, params)
def train(args, X, Y, dbg_iter=None, dbg_epoch=None, dbg_done=None): dbg_out = [] net_in, net_out = hybrid_network(args.num_inputs, args.num_outputs, args.num_units, args.num_sto, dbg_out=dbg_out) params, f_step, f_loss, f_grad, f_surr = \ make_funcs(net_in, net_out, args, dbg_out=dbg_out) param_col = ParamCollection(params) init_params = nn.init_array(args.init_conf, (param_col.get_total_size(), 1)) param_col.set_value_flat(init_params.flatten()) init_params = [ np.array([[0., 1.]]), # W_1 np.array([[0., 0.]]), # b_1 np.array([[1.], [1.]]), # W_3 np.array([[0.]]), # b_3 ] param_col.set_values(init_params) if 'snapshot' in args: print "Loading params from previous snapshot" snapshot = pickle.load(open(args['snapshot'], 'r')) param_col.set_values(snapshot) # param_col.set_value_flat( # np.random.normal(0., 1.,size=param_col.get_total_size()) # ) # optim_state = Table(theta=param_col.get_value_flat(), # scratch=param_col.get_value_flat(), # step_size=args.step_size # ) optim_state = make_rmsprop_state(theta=param_col.get_value_flat(), step_size=args.step_size, decay_rate=args.decay_rate) for i_epoch in range(args.n_epochs): for i_iter in range(X.shape[0]): ind = np.random.choice(X.shape[0], args['size_batch']) x, y = X[ind], Y[ind] # not sure this works for multi-dim info = f_surr(x, y, num_samples=args['size_sample']) loss, loss_surr, grad = info['loss'], info['surr_loss'], info['surr_grad'] # loss, loss_surr, grad = f_grad(x, y) # update rmsprop_update(param_col.flatten_values(grad), optim_state) # optim_state.scratch = param_col.flatten_values(grad) # optim_state.theta -= optim_state.step_size * optim_state.scratch param_col.set_value_flat(optim_state.theta) print param_col.get_value_flat() if dbg_iter: dbg_iter(i_epoch, i_iter, param_col, optim_state, info) if dbg_epoch: dbg_epoch(i_epoch, param_col, f_surr) if dbg_done: dbg_done(param_col, optim_state, f_surr) return optim_state
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--grad_check", action="store_true") parser.add_argument("--n_batches", type=int, default=1000000) parser.add_argument("--profile", action="store_true") parser.add_argument("--unittest", action="store_true") args = parser.parse_args() np.seterr("raise") cgt.set_precision("quad" if args.grad_check else "double") np.random.seed(0) # model parameters if args.grad_check: opt = NTMOpts( b=1, # batch size h=1, # number of heads n=2, # number of memory sites m=3, # dimension at each memory site k=4, # dimension of input p=2, # dimension of output ff_hid_sizes=[]) seq_length = 2 else: opt = NTMOpts( b=64, # batch size h=3, # number of heads n=128, # number of memory sites m=20, # dimension at each memory site k=3, # dimension of input p=1, # dimension of output ff_hid_sizes=[128, 128]) seq_length = 10 if args.unittest: seq_length = 3 args.n_batches = 3 tstart = time.time() ntm = make_ntm(opt) task = CopyTask(opt.b, seq_length, opt.p) f_loss, f_loss_and_grad, params = make_funcs(opt, ntm, task.total_time(), task.loss_timesteps()) print "graph construction and compilation took %g seconds" % (time.time() - tstart) pc = ParamCollection(params) pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(), ))) if args.grad_check: x, y = task.gen_batch() def f(thnew): thold = th.copy() pc.set_value_flat(thnew) loss = f_loss(x, y) pc.set_value_flat(thold) return loss from cgt.numeric_diff import numeric_grad g_num = numeric_grad(f, th, eps=1e-8) _, _, g_anal = f_loss_and_grad(x, y) assert np.allclose(g_num, g_anal, atol=1e-8) print "Gradient check succeeded!" print "%i/%i elts of grad are nonzero" % ( (g_anal != 0).sum(), g_anal.size) return seq_num = 0 state = make_rmsprop_state(pc.get_value_flat(), .01, .95) print fmt_row(13, ["seq num", "CE (bits)", "accuracy", "|g|_inf"], header=True) if args.profile: cgt.profiler.start() for i in xrange(args.n_batches): x, y = task.gen_batch() seq_num += x.shape[1] l, l01, g = f_loss_and_grad(x, y) print fmt_row(13, [seq_num, l, l01, np.abs(g).max()]) rmsprop_update(g, state) pc.set_value_flat(state.theta) if not np.isfinite(l): break if args.profile: cgt.profiler.print_stats()
def main(): nr.seed(0) parser = argparse.ArgumentParser() parser.add_argument("--data_dir", type=str, default="alice") parser.add_argument("--size_mem", type=int, default=64) parser.add_argument("--size_batch", type=int, default=64) parser.add_argument("--n_layers", type=int, default=2) parser.add_argument("--n_unroll", type=int, default=16) parser.add_argument("--step_size", type=float, default=.01) parser.add_argument("--decay_rate", type=float, default=0.95) parser.add_argument("--n_epochs", type=int, default=20) parser.add_argument("--arch", choices=["lstm", "gru"], default="lstm") parser.add_argument("--grad_check", action="store_true") parser.add_argument("--profile", action="store_true") parser.add_argument("--unittest", action="store_true") args = parser.parse_args() cgt.set_precision("quad" if args.grad_check else "single") assert args.n_unroll > 1 loader = Loader(args.data_dir, args.size_batch, args.n_unroll, (.8, .1, .1)) network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step( args.arch, loader.size_vocab, loader.size_vocab, args.size_mem, args.size_batch, args.n_layers, args.n_unroll) if args.profile: profiler.start() params = network.get_parameters() pc = ParamCollection(params) pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(), ))) def initialize_hiddens(n): return [ np.zeros((n, args.size_mem), cgt.floatX) for _ in xrange(get_num_hiddens(args.arch, args.n_layers)) ] if args.grad_check: x, y = loader.train_batches_iter().next() prev_hiddens = initialize_hiddens(args.size_batch) def f(thnew): thold = pc.get_value_flat() pc.set_value_flat(thnew) loss = f_loss(x, y, *prev_hiddens) pc.set_value_flat(thold) return loss from cgt.numeric_diff import numeric_grad g_num = numeric_grad(f, pc.get_value_flat(), eps=1e-10) result = f_loss_and_grad(x, y, *prev_hiddens) g_anal = result[1] assert np.allclose(g_num, g_anal, atol=1e-4) print "Gradient check succeeded!" return optim_state = make_rmsprop_state(theta=pc.get_value_flat(), step_size=args.step_size, decay_rate=args.decay_rate) for iepoch in xrange(args.n_epochs): losses = [] tstart = time() print "starting epoch", iepoch cur_hiddens = initialize_hiddens(args.size_batch) for (x, y) in loader.train_batches_iter(): out = f_loss_and_grad(x, y, *cur_hiddens) loss = out[0] grad = out[1] cur_hiddens = out[2:] rmsprop_update(grad, optim_state) pc.set_value_flat(optim_state.theta) losses.append(loss) if args.unittest: return print "%.3f s/batch. avg loss = %.3f" % ( (time() - tstart) / len(losses), np.mean(losses)) optim_state.step_size *= .98 #pylint: disable=E1101 sample(f_step, initialize_hiddens(1), char2ind=loader.char2ind, n_steps=300, temp=1.0, seed_text="") if args.profile: profiler.print_stats()
r_vec = nn.Affine(size_x, 2 * k_in * size_mem)(x) r_non = cgt.reshape(r_vec, (size_batch, 2 * k_in, size_mem)) r_norm = cgt.norm(r_non, axis=2, keepdims=True) r = cgt.broadcast('/', r_non, r_norm, "xxx,xx1") prev_h_3 = cgt.reshape(prev_h, (size_batch, size_mem, 1)) inters = [prev_h_3] for i in xrange(k_in * 2): inter_in = inters[-1] r_cur = r[:, i, :] r_cur_3_transpose = cgt.reshape(r_cur, (size_batch, 1, size_mem)) r_cur_3 = cgt.reshape(r_cur, (size_batch, size_mem, 1)) ref_cur = cgt.batched_matmul( r_cur_3, cgt.batched_matmul(r_cur_3_transpose, inter_in)) inter_out = inter_in - ref_cur inters.append(inter_out) h = inters[-1] r_nn = nn.Module([x], [h]) params = r_nn.get_parameters() pc = ParamCollection(params) pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(), ))) func = cgt.function([x, prev_h], h) x_in = nr.uniform(-.1, .1, size=(size_batch * size_x)).reshape(size_batch, size_x) h_in = np.zeros((size_batch, size_mem)) h_in[:, 0] = np.ones(size_batch) h = func(x_in, h_in)
class ExtensibleColumnProgNN(object): """ Descr: An extensible network column for use in transfer learning with a Progressive Neural Network. Args: topology - A list of number of units in each hidden dimension. First entry is input dimension. activations - A list of activation functions to use on the transforms. session - A TensorFlow session. prev_columns - Previously trained columns, either Initial or Extensible, we are going to create lateral connections to for the current column. Returns: None - attaches objects to class for ExtensibleColumnProgNN.session.run() """ def __init__(self, topology, activations, session, checkpoint_base_path, prev_columns, dtype=tf.float32): n_input = topology[0] self.topology = topology self.session = session width = len(prev_columns) # Layers in network. First value is n_input, so it doesn't count. L = len(topology) - 1 self.L = L self.prev_columns = prev_columns self.checkpoint_base_path = checkpoint_base_path self.column_number = width # Doesn't work if the columns aren't the same height. assert all([self.L == x.L for x in prev_columns]) self.o_n = tf.placeholder(dtype, shape=[None, n_input], name='prog_nn_input_placeholder') self.W = [[]] * L self.b = [[]] * L self.U = [] for k in range(L - 1): self.U.append([[]] * width) self.h = [self.o_n] # Collect parameters to hand off to ParamCollection. params = [] for k in range(L): W_shape = topology[k:k + 2] self.W[k] = weight_variable(W_shape, name="weight_var_layer_" + str(k)) self.b[k] = bias_variable([W_shape[1]], name="bias_var_layer_" + str(k)) if k == 0: self.h.append(activations[k](tf.matmul(self.h[-1], self.W[k]) + self.b[k])) params.append(self.W[k]) params.append(self.b[k]) continue preactivation = tf.matmul(self.h[-1], self.W[k]) + self.b[k] for kk in range(width): U_shape = [prev_columns[kk].topology[k], topology[k + 1]] # Remember len(self.U) == L - 1! self.U[k - 1][kk] = weight_variable( U_shape, name="lateral_weight_var_layer_" + str(k) + "_to_column_" + str(kk)) # pprint(prev_columns[kk].h[k].get_shape().as_list()) # pprint(self.U[k-1][kk].get_shape().as_list()) preactivation += tf.matmul(prev_columns[kk].h[k], self.U[k - 1][kk]) self.h.append(activations[k](preactivation)) params.append(self.W[k]) params.append(self.b[k]) for kk in range(width): params.append(self.U[k - 1][kk]) self.pc = ParamCollection(self.session, params) def save(self, checkpoint_i): save_path = get_checkpoint_path(self.checkpoint_base_path, self.column_number, checkpoint_i) current_params = self.pc.get_values_flat() np.save(save_path, current_params) def restore_weights(self, checkpoint_i): save_path = get_checkpoint_path(self.checkpoint_base_path, self.column_number, checkpoint_i) saved_theta = np.load(save_path) self.pc.set_values_flat(saved_theta)
def __init__(self, obs_dim, ctrl_dim): cgt.set_precision('double') Serializable.__init__(self, obs_dim, ctrl_dim) self.obs_dim = obs_dim self.ctrl_dim = ctrl_dim o_no = cgt.matrix("o_no", fixed_shape=(None, obs_dim)) a_na = cgt.matrix("a_na", fixed_shape=(None, ctrl_dim)) adv_n = cgt.vector("adv_n") oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2 * ctrl_dim)) self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a") std_1a = cgt.exp(logstd_1a) # Here's where we apply the network h0 = o_no nhid = 32 h1 = cgt.tanh( nn.Affine(obs_dim, nhid, weight_init=nn.IIDGaussian(std=0.1))(h0)) h2 = cgt.tanh( nn.Affine(nhid, nhid, weight_init=nn.IIDGaussian(std=0.1))(h1)) mean_na = nn.Affine(nhid, ctrl_dim, weight_init=nn.IIDGaussian(std=0.01))(h2) b = cgt.size(o_no, 0) std_na = cgt.repeat(std_1a, b, axis=0) oldmean_na = oldpdist_np[:, 0:self.ctrl_dim] oldstd_na = oldpdist_np[:, self.ctrl_dim:2 * self.ctrl_dim] logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na).sum(axis=1)) - logstd_1a.sum() oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na).sum(axis=1) ) - cgt.log(oldstd_na).sum(axis=1) ratio_n = cgt.exp(logp_n - oldlogp_n) surr = (ratio_n * adv_n).mean() pdists_np = cgt.concatenate([mean_na, std_na], axis=1) # kl = cgt.log(sigafter/) params = nn.get_parameters(surr) oldvar_na = cgt.square(oldstd_na) var_na = cgt.square(std_na) kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean() lam = cgt.scalar() penobj = surr - lam * kl self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self._compute_grad_lagrangian = cgt.function( [lam, oldpdist_np, o_no, a_na, adv_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)])) self.f_pdist = cgt.function([o_no], pdists_np) self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self.pc = ParamCollection(params)
r_non = cgt.reshape(r_vec, (size_batch, 2 * k_in, size_mem)) r_norm = cgt.norm(r_non, axis=2, keepdims=True) r = cgt.broadcast('/', r_non, r_norm, "xxx,xx1") prev_h_3 = cgt.reshape(prev_h, (size_batch, size_mem, 1)) inters = [prev_h_3] for i in xrange(k_in * 2): inter_in = inters[-1] r_cur = r[:, i, :] r_cur_3_transpose = cgt.reshape(r_cur, (size_batch, 1, size_mem)) r_cur_3 = cgt.reshape(r_cur, (size_batch, size_mem, 1)) ref_cur = cgt.batched_matmul(r_cur_3, cgt.batched_matmul(r_cur_3_transpose, inter_in)) inter_out = inter_in - ref_cur inters.append(inter_out) h = inters[-1] r_nn = nn.Module([x], [h]) params = r_nn.get_parameters() pc = ParamCollection(params) pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(),))) func = cgt.function([x, prev_h], h) x_in = nr.uniform(-.1, .1, size=(size_batch * size_x)).reshape(size_batch, size_x) h_in = np.zeros((size_batch, size_mem)) h_in[:, 0] = np.ones(size_batch) h = func(x_in, h_in)
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--grad_check",action="store_true") parser.add_argument("--n_batches",type=int,default=1000000) parser.add_argument("--profile",action="store_true") parser.add_argument("--unittest", action="store_true") parser.add_argument("--task",choices=["copy","reverse_copy","repeat_copy"],default="copy") args = parser.parse_args() np.seterr("raise") cgt.set_precision("quad" if args.grad_check else "double") np.random.seed(0) # model parameters if args.grad_check: opt = NTMOpts( b = 1, # batch size h = 1, # number of heads n = 2, # number of memory sites m = 3, # dimension at each memory site k = 4, # dimension of input p = 2, # dimension of output ff_hid_sizes = [] ) seq_length = 2 else: opt = NTMOpts( b = 64, # batch size h = 3, # number of heads n = 128, # number of memory sites m = 20, # dimension at each memory site k = 3, # dimension of input p = 1, # dimension of output ff_hid_sizes = [128,128] ) seq_length = 10 if args.unittest: seq_length=3 args.n_batches=3 tstart = time.time() ntm = make_ntm(opt) if args.task == "copy": task = CopyTask(opt.b, seq_length, opt.p) elif args.task == "reverse_copy": task = ReverseCopyTask(opt.b, seq_length, opt.p) elif args.task == "repeat_copy": n_copies = 4 task = RepeatCopyTask(opt.b, seq_length, opt.p, n_copies) f_loss, f_loss_and_grad, params = make_funcs(opt, ntm, task.total_time(), task.loss_timesteps()) print "graph construction and compilation took %g seconds"%(time.time()-tstart) pc = ParamCollection(params) pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(),))) if args.grad_check: x,y = task.gen_batch() def f(thnew): thold = th.copy() pc.set_value_flat(thnew) loss = f_loss(x,y) pc.set_value_flat(thold) return loss from cgt.numeric_diff import numeric_grad g_num = numeric_grad(f, th,eps=1e-8) _, _, g_anal = f_loss_and_grad(x,y) assert np.allclose(g_num, g_anal, atol=1e-8) print "Gradient check succeeded!" print "%i/%i elts of grad are nonzero"%( (g_anal != 0).sum(), g_anal.size ) return seq_num = 0 state = make_rmsprop_state(pc.get_value_flat(), .01, .95) print fmt_row(13, ["seq num", "CE (bits)", "accuracy", "|g|_inf"], header=True) if args.profile: cgt.profiler.start() for i in xrange(args.n_batches): x,y = task.gen_batch() seq_num += x.shape[1] l,l01,g = f_loss_and_grad(x,y) print fmt_row(13, [seq_num, l,l01,np.abs(g).max()]) rmsprop_update(g, state) pc.set_value_flat(state.theta) if not np.isfinite(l): break if args.profile: cgt.profiler.print_stats()
def main(): nr.seed(0) parser = argparse.ArgumentParser() parser.add_argument("--data_dir", type=str, default="alice") parser.add_argument("--size_mem", type=int,default=64) parser.add_argument("--size_batch", type=int,default=64) parser.add_argument("--n_layers",type=int,default=2) parser.add_argument("--n_unroll",type=int,default=16) parser.add_argument("--step_size",type=float,default=.01) parser.add_argument("--decay_rate",type=float,default=0.95) parser.add_argument("--n_epochs",type=int,default=20) parser.add_argument("--arch",choices=["lstm","gru"],default="lstm") parser.add_argument("--grad_check",action="store_true") parser.add_argument("--profile",action="store_true") parser.add_argument("--unittest",action="store_true") parser.add_argument("--temperature",type=float,default=1) args = parser.parse_args() cgt.set_precision("quad" if args.grad_check else "single") assert args.n_unroll > 1 loader = Loader(args.data_dir,args.size_batch, args.n_unroll, (1.0,0,0)) network, f_loss, f_loss_and_grad, f_step = make_loss_and_grad_and_step(args.arch, loader.size_vocab, loader.size_vocab, args.size_mem, args.size_batch, args.n_layers, args.n_unroll) if args.profile: profiler.start() params = network.get_parameters() pc = ParamCollection(params) pc.set_value_flat(nr.uniform(-.1, .1, size=(pc.get_total_size(),))) def initialize_hiddens(n): return [np.zeros((n, args.size_mem), cgt.floatX) for _ in xrange(get_num_hiddens(args.arch, args.n_layers))] if args.grad_check: x,y = loader.train_batches_iter().next() prev_hiddens = initialize_hiddens(args.size_batch) def f(thnew): thold = pc.get_value_flat() pc.set_value_flat(thnew) loss = f_loss(x,y, *prev_hiddens) pc.set_value_flat(thold) return loss from cgt.numeric_diff import numeric_grad g_num = numeric_grad(f, pc.get_value_flat(),eps=1e-10) result = f_loss_and_grad(x,y,*prev_hiddens) g_anal = result[1] assert np.allclose(g_num, g_anal, atol=1e-4) print "Gradient check succeeded!" return optim_state = make_rmsprop_state(theta=pc.get_value_flat(), step_size = args.step_size, decay_rate = args.decay_rate) for iepoch in xrange(args.n_epochs): losses = [] tstart = time() print "starting epoch",iepoch cur_hiddens = initialize_hiddens(args.size_batch) for (x,y) in loader.train_batches_iter(): out = f_loss_and_grad(x,y, *cur_hiddens) loss = out[0] grad = out[1] cur_hiddens = out[2:] rmsprop_update(grad, optim_state) pc.set_value_flat(optim_state.theta) losses.append(loss) if args.unittest: return print "%.3f s/batch. avg loss = %.3f"%((time()-tstart)/len(losses), np.mean(losses)) optim_state.step_size *= .98 #pylint: disable=E1101 sample(f_step, initialize_hiddens(1), char2ind=loader.char2ind, n_steps=1000, temperature=args.temperature, seed_text = "") if args.profile: profiler.print_stats()