def __call__(self, x, prev_c, prev_h): """ x is the input prev_h is the previous timestep prev_c is the previous memory context Returns (next_c, next_h). next_h should be cloned since it's feed into the next layer and the next timstep. """ forget_gate = cgt.sigmoid(x.dot(self.W_xf) + prev_h.dot(self.W_hf)) input_gate = cgt.sigmoid(x.dot(self.W_xi) + prev_h.dot(self.W_hi)) output_gate = cgt.sigmoid(x.dot(self.W_xo) + prev_h.dot(self.W_ho)) candidate_values = cgt.tanh(x.dot(self.W_xc) + prev_h.dot(self.W_hc)) # new cell state next_c = forget_gate * prev_c + input_gate * candidate_values # input for next timestep next_h = output_gate * cgt.tanh(next_c) # NOTE: we feed next_h into the next layer and the next timestep # so we should clone the next_h output. return next_c, next_h
def make_ff_controller(opt): b, h, m, p, k = opt.b, opt.h, opt.m, opt.p, opt.k H = 2*h in_size = k + h*m out_size = H*m + H + H + H*3 + H + h*m + h*m + p # Previous reads r_bhm = cgt.tensor3("r", fixed_shape = (b,h,m)) # External inputs X_bk = cgt.matrix("x", fixed_shape = (b,k)) r_b_hm = r_bhm.reshape([r_bhm.shape[0], r_bhm.shape[1]*r_bhm.shape[2]]) # Input to controller inp_bq = cgt.concatenate([X_bk, r_b_hm], axis=1) hid_sizes = opt.ff_hid_sizes activation = cgt.tanh layer_out_sizes = [in_size] + hid_sizes + [out_size] last_out = inp_bq # feedforward part. we could simplify a bit by using nn.Affine for i in xrange(len(layer_out_sizes)-1): indim = layer_out_sizes[i] outdim = layer_out_sizes[i+1] W = cgt.shared(.02*nr.randn(indim, outdim), name="W%i"%i, fixed_shape_mask="all") bias = cgt.shared(.02*nr.randn(1, outdim), name="b%i"%i, fixed_shape_mask="all") last_out = cgt.broadcast("+",last_out.dot(W),bias,"xx,1x") # Don't apply nonlinearity at the last layer if i != len(layer_out_sizes)-2: last_out = activation(last_out) idx = 0 k_bHm = last_out[:,idx:idx+H*m]; idx += H*m; k_bHm = k_bHm.reshape([b,H,m]) beta_bH = last_out[:,idx:idx+H]; idx += H g_bH = last_out[:,idx:idx+H]; idx += H s_bH3 = last_out[:,idx:idx+3*H]; idx += 3*H; s_bH3 = s_bH3.reshape([b,H,3]) gamma_bH = last_out[:,idx:idx+H]; idx += H e_bhm = last_out[:,idx:idx+h*m]; idx += h*m; e_bhm = e_bhm.reshape([b,h,m]) a_bhm = last_out[:,idx:idx+h*m]; idx += h*m; a_bhm = a_bhm.reshape([b,h,m]) y_bp = last_out[:,idx:idx+p]; idx += p k_bHm = cgt.tanh(k_bHm) beta_bH = nn.softplus(beta_bH) g_bH = cgt.sigmoid(g_bH) s_bH3 = sum_normalize2(cgt.exp(s_bH3)) gamma_bH = cgt.sigmoid(gamma_bH)+1 e_bhm = cgt.sigmoid(e_bhm) a_bhm = cgt.tanh(a_bhm) # y_bp = y_bp assert infer_shape(k_bHm) == (b,H,m) assert infer_shape(beta_bH) == (b,H) assert infer_shape(g_bH) == (b,H) assert infer_shape(s_bH3) == (b,H,3) assert infer_shape(gamma_bH) == (b,H) assert infer_shape(e_bhm) == (b,h,m) assert infer_shape(a_bhm) == (b,h,m) assert infer_shape(y_bp) == (b,p) return nn.Module([r_bhm, X_bk], [k_bHm, beta_bH, g_bH, s_bH3, gamma_bH, e_bhm, a_bhm, y_bp])
def make_deep_lstm(size_input, size_mem, n_layers, size_output, size_batch): inputs = [cgt.matrix(fixed_shape=(size_batch, size_input))] for _ in xrange(2*n_layers): inputs.append(cgt.matrix(fixed_shape=(size_batch, size_mem))) outputs = [] for i_layer in xrange(n_layers): prev_h = inputs[i_layer*2] prev_c = inputs[i_layer*2+1] if i_layer==0: x = inputs[0] size_x = size_input else: x = outputs[(i_layer-1)*2] size_x = size_mem input_sums = nn.Affine(size_x, 4*size_mem)(x) + nn.Affine(size_x, 4*size_mem)(prev_h) sigmoid_chunk = cgt.sigmoid(input_sums[:,0:3*size_mem]) in_gate = sigmoid_chunk[:,0:size_mem] forget_gate = sigmoid_chunk[:,size_mem:2*size_mem] out_gate = sigmoid_chunk[:,2*size_mem:3*size_mem] in_transform = cgt.tanh(input_sums[:,3*size_mem:4*size_mem]) next_c = forget_gate*prev_c + in_gate * in_transform next_h = out_gate*cgt.tanh(next_c) outputs.append(next_c) outputs.append(next_h) category_activations = nn.Affine(size_mem, size_output)(outputs[-1]) logprobs = nn.logsoftmax(category_activations) outputs.append(logprobs) return nn.Module(inputs, outputs)
def make_deep_lstm(size_input, size_mem, n_layers, size_output, size_batch): inputs = [cgt.matrix(fixed_shape=(size_batch, size_input))] for _ in xrange(2 * n_layers): inputs.append(cgt.matrix(fixed_shape=(size_batch, size_mem))) outputs = [] for i_layer in xrange(n_layers): prev_h = inputs[i_layer * 2] prev_c = inputs[i_layer * 2 + 1] if i_layer == 0: x = inputs[0] size_x = size_input else: x = outputs[(i_layer - 1) * 2] size_x = size_mem input_sums = nn.Affine(size_x, 4 * size_mem)(x) + nn.Affine( size_x, 4 * size_mem)(prev_h) sigmoid_chunk = cgt.sigmoid(input_sums[:, 0:3 * size_mem]) in_gate = sigmoid_chunk[:, 0:size_mem] forget_gate = sigmoid_chunk[:, size_mem:2 * size_mem] out_gate = sigmoid_chunk[:, 2 * size_mem:3 * size_mem] in_transform = cgt.tanh(input_sums[:, 3 * size_mem:4 * size_mem]) next_c = forget_gate * prev_c + in_gate * in_transform next_h = out_gate * cgt.tanh(next_c) outputs.append(next_c) outputs.append(next_h) category_activations = nn.Affine(size_mem, size_output)(outputs[-1]) logprobs = nn.logsoftmax(category_activations) outputs.append(logprobs) return nn.Module(inputs, outputs)
def __init__(self, obs_dim, ctrl_dim): cgt.set_precision('double') Serializable.__init__(self, obs_dim, ctrl_dim) self.obs_dim = obs_dim self.ctrl_dim = ctrl_dim o_no = cgt.matrix("o_no",fixed_shape=(None,obs_dim)) a_na = cgt.matrix("a_na",fixed_shape = (None, ctrl_dim)) adv_n = cgt.vector("adv_n") oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2*ctrl_dim)) self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a") std_1a = cgt.exp(logstd_1a) # Here's where we apply the network h0 = o_no nhid = 32 h1 = cgt.tanh(nn.Affine(obs_dim,nhid,weight_init=nn.IIDGaussian(std=0.1))(h0)) h2 = cgt.tanh(nn.Affine(nhid,nhid,weight_init=nn.IIDGaussian(std=0.1))(h1)) mean_na = nn.Affine(nhid,ctrl_dim,weight_init=nn.IIDGaussian(std=0.01))(h2) b = cgt.size(o_no, 0) std_na = cgt.repeat(std_1a, b, axis=0) oldmean_na = oldpdist_np[:, 0:self.ctrl_dim] oldstd_na = oldpdist_np[:, self.ctrl_dim:2*self.ctrl_dim] logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na ).sum(axis=1)) - logstd_1a.sum() oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na ).sum(axis=1)) - cgt.log(oldstd_na).sum(axis=1) ratio_n = cgt.exp(logp_n - oldlogp_n) surr = (ratio_n*adv_n).mean() pdists_np = cgt.concatenate([mean_na, std_na], axis=1) # kl = cgt.log(sigafter/) params = nn.get_parameters(surr) oldvar_na = cgt.square(oldstd_na) var_na = cgt.square(std_na) kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean() lam = cgt.scalar() penobj = surr - lam * kl self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self._compute_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_na, adv_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)])) self.f_pdist = cgt.function([o_no], pdists_np) self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self.pc = ParamCollection(params)
def make_deep_gru(size_input, size_mem, n_layers, size_output, size_batch): inputs = [cgt.matrix() for i_layer in xrange(n_layers + 1)] outputs = [] for i_layer in xrange(n_layers): prev_h = inputs[ i_layer + 1] # note that inputs[0] is the external input, so we add 1 x = inputs[0] if i_layer == 0 else outputs[i_layer - 1] size_x = size_input if i_layer == 0 else size_mem update_gate = cgt.sigmoid( nn.Affine(size_x, size_mem, name="i2u")(x) + nn.Affine(size_mem, size_mem, name="h2u")(prev_h)) reset_gate = cgt.sigmoid( nn.Affine(size_x, size_mem, name="i2r")(x) + nn.Affine(size_mem, size_mem, name="h2r")(prev_h)) gated_hidden = reset_gate * prev_h p2 = nn.Affine(size_mem, size_mem)(gated_hidden) p1 = nn.Affine(size_x, size_mem)(x) hidden_target = cgt.tanh(p1 + p2) next_h = (1.0 - update_gate) * prev_h + update_gate * hidden_target outputs.append(next_h) category_activations = nn.Affine(size_mem, size_output, name="pred")(outputs[-1]) logprobs = nn.logsoftmax(category_activations) outputs.append(logprobs) return nn.Module(inputs, outputs)
def __init__(self, n_actions): Serializable.__init__(self, n_actions) cgt.set_precision('double') n_in = 128 o_no = cgt.matrix("o_no",fixed_shape=(None,n_in)) a_n = cgt.vector("a_n",dtype='i8') q_n = cgt.vector("q_n") oldpdist_np = cgt.matrix("oldpdists") h0 = (o_no - 128.0)/128.0 nhid = 64 h1 = cgt.tanh(nn.Affine(128,nhid,weight_init=nn.IIDGaussian(std=.1))(h0)) probs_na = nn.softmax(nn.Affine(nhid,n_actions,weight_init=nn.IIDGaussian(std=0.01))(h1)) logprobs_na = cgt.log(probs_na) b = cgt.size(o_no, 0) logps_n = logprobs_na[cgt.arange(b), a_n] surr = (logps_n*q_n).mean() kl = (oldpdist_np * cgt.log(oldpdist_np/probs_na)).sum(axis=1).mean() params = nn.get_parameters(surr) gradsurr = cgt.grad(surr, params) flatgrad = cgt.concatenate([p.flatten() for p in gradsurr]) lam = cgt.scalar() penobj = surr - lam * kl self._f_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_n, q_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)])) self.f_pdist = cgt.function([o_no], probs_na) self.f_probs = cgt.function([o_no], probs_na) self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl]) self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad) self.pc = ParamCollection(params)
def __call__(self, x, prev_h): """ x is the input prev_h is the input from the previous timestep Returns next_h. For the GRU the output to the next timestep and next layer is one and the same. Copy it first! """ reset_gate = cgt.sigmoid(x.dot(self.W_xr) + prev_h.dot(self.W_hr)) update_gate = cgt.sigmoid(x.dot(self.W_xz) + prev_h.dot(self.W_hz)) # the elementwise multiplication here tells what of the previous # input we should forget. forget_gate = reset_gate * prev_h # this part is very similar to vanilla RNN h_candidate = cgt.tanh(x.dot(self.W_xc) + prev_h.dot(forget_gate)) # this isn't super clear in the paper just it's an elementwise mult here next_h = (1. - update_gate) * h + update_gate * h_candidate # In a standard GRU cell we only have 1 output. # However, it should be be copied and feed to # both the next timestep and the next layer return next_h
def get_context(self, prev_state_bf): state_step_bf = self.states_mlp_bf(prev_state_bf) state_step_b1f = cgt.dimshuffle(state_step_bf, [0, 'x', 1]) # Compute the inner product <phi(s_i), psi(h_u)> where phi and psi are MLPs. # The below line computes the pointwise product of phi(s_i) and psi(h_u) and then sums to get the inner product. # scalar_energies_vec_bt = cgt.sqrt(cgt.sum(cgt.broadcast('*', state_step_b1f, self.features_post_mlp_btf, 'x1x,xxx'), axis=2)) # Compute tau=tanh(h_u*W + s_i*V), broadcasting to do all h_u mults at once. scalar_energies_vec_btf = cgt.tanh(cgt.broadcast('+', self.features_post_mlp_btf, state_step_b1f, 'xxx,x1x')) # The next two lines compute w^T*(tau) with a pointwise product and then a sum. scalar_energies_vec_btf = cgt.broadcast('*', self.mixing_vec_w, scalar_energies_vec_btf, '11x,xxx') scalar_energies_vec_bt = cgt.sum(scalar_energies_vec_btf, axis=2) # Softmax weights the blended features over their time dimesions. softmax_weights_bt = nn.softmax(scalar_energies_vec_bt, axis=1) # This weight multiplies all features. extended_softmax_bt1 = cgt.dimshuffle(softmax_weights_bt, [0, 1, 'x']) # Weight the features by it's temporally dependent softmax weight. pre_blended = cgt.broadcast('*', extended_softmax_bt1, self.features_post_mlp_btf, 'xx1,xxx') # Integrate out time. blended_features_bf = cgt.sum(pre_blended, axis=1) return blended_features_bf
def __call__(self, x, prev_h): """ x is the input prev_h is the input from the previous timestep Returns (out, next_h). Feed out into the next layer and next_h to the next timestep. """ next_h = cgt.tanh(prev_h.dot(self.W_hh) + x.dot(self.W_xh)) out = next_h.dot(self.W_ho) return out, next_h
def lstm_block(h_prev, c_prev, x_curr, size_x, size_c, name=''): """ Construct a LSTM cell block of specified number of cells :param h_prev: self activations at previous time step :param c_prev: self memory state at previous time step :param x_curr: inputs from previous layer at current time step :param size_x: size of inputs :param size_c: size of both c and h :return: c and h at current time step :rtype: """ input_sums = nn.Affine(size_x, 4 * size_c, name=name+'*x')(x_curr) + \ nn.Affine(size_c, 4 * size_c, name=name+'*h')(h_prev) c_new = cgt.tanh(input_sums[:, 3*size_c:]) sigmoid_chunk = cgt.sigmoid(input_sums[:, :3*size_c]) in_gate = sigmoid_chunk[:, :size_c] forget_gate = sigmoid_chunk[:, size_c:2*size_c] out_gate = sigmoid_chunk[:, 2*size_c:3*size_c] c_curr = forget_gate * c_prev + in_gate * c_new h_curr = out_gate * cgt.tanh(c_curr) return c_curr, h_curr
def lstm_block(h_prev, c_prev, x_curr, size_x, size_c, name=''): """ Construct a LSTM cell block of specified number of cells :param h_prev: self activations at previous time step :param c_prev: self memory state at previous time step :param x_curr: inputs from previous layer at current time step :param size_x: size of inputs :param size_c: size of both c and h :return: c and h at current time step :rtype: """ input_sums = nn.Affine(size_x, 4 * size_c, name=name+'*x')(x_curr) + \ nn.Affine(size_c, 4 * size_c, name=name+'*h')(h_prev) c_new = cgt.tanh(input_sums[:, 3 * size_c:]) sigmoid_chunk = cgt.sigmoid(input_sums[:, :3 * size_c]) in_gate = sigmoid_chunk[:, :size_c] forget_gate = sigmoid_chunk[:, size_c:2 * size_c] out_gate = sigmoid_chunk[:, 2 * size_c:3 * size_c] c_curr = forget_gate * c_prev + in_gate * c_new h_curr = out_gate * cgt.tanh(c_curr) return c_curr, h_curr
def __call__(self,M,*inputs): assert len(inputs) == len(self.Wizs) n = M.shape[0] summands = [Xi.dot(Wiz) for (Xi,Wiz) in zip(inputs,self.Wizs)] + [M.dot(self.Wmz),cgt.repeat(self.bz,n, axis=0)] z = cgt.sigmoid(cgt.add_multi(summands)) summands = [Xi.dot(Wir) for (Xi,Wir) in zip(inputs,self.Wirs)] + [M.dot(self.Wmr),cgt.repeat(self.br,n, axis=0)] r = cgt.sigmoid(cgt.add_multi(summands)) summands = [Xi.dot(Wim) for (Xi,Wim) in zip(inputs,self.Wims)] + [(r*M).dot(self.Wmm),cgt.repeat(self.bm,n, axis=0)] Mtarg = cgt.tanh(cgt.add_multi(summands)) #pylint: disable=E1111 Mnew = (1-z)*M + z*Mtarg return Mnew
def __call__(self, M, *inputs): assert len(inputs) == len(self.Wizs) n = M.shape[0] summands = [Xi.dot(Wiz) for (Xi, Wiz) in zip(inputs, self.Wizs)] + [ M.dot(self.Wmz), cgt.repeat(self.bz, n, axis=0) ] z = cgt.sigmoid(cgt.add_multi(summands)) summands = [Xi.dot(Wir) for (Xi, Wir) in zip(inputs, self.Wirs)] + [ M.dot(self.Wmr), cgt.repeat(self.br, n, axis=0) ] r = cgt.sigmoid(cgt.add_multi(summands)) summands = [Xi.dot(Wim) for (Xi, Wim) in zip(inputs, self.Wims) ] + [(r * M).dot(self.Wmm), cgt.repeat(self.bm, n, axis=0)] Mtarg = cgt.tanh(cgt.add_multi(summands)) #pylint: disable=E1111 Mnew = (1 - z) * M + z * Mtarg return Mnew
def __init__(self, n_actions): Serializable.__init__(self, n_actions) cgt.set_precision('double') n_in = 128 o_no = cgt.matrix("o_no", fixed_shape=(None, n_in)) a_n = cgt.vector("a_n", dtype='i8') q_n = cgt.vector("q_n") oldpdist_np = cgt.matrix("oldpdists") h0 = (o_no - 128.0) / 128.0 nhid = 64 h1 = cgt.tanh( nn.Affine(128, nhid, weight_init=nn.IIDGaussian(std=.1))(h0)) probs_na = nn.softmax( nn.Affine(nhid, n_actions, weight_init=nn.IIDGaussian(std=0.01))(h1)) logprobs_na = cgt.log(probs_na) b = cgt.size(o_no, 0) logps_n = logprobs_na[cgt.arange(b), a_n] surr = (logps_n * q_n).mean() kl = (oldpdist_np * cgt.log(oldpdist_np / probs_na)).sum(axis=1).mean() params = nn.get_parameters(surr) gradsurr = cgt.grad(surr, params) flatgrad = cgt.concatenate([p.flatten() for p in gradsurr]) lam = cgt.scalar() penobj = surr - lam * kl self._f_grad_lagrangian = cgt.function( [lam, oldpdist_np, o_no, a_n, q_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)])) self.f_pdist = cgt.function([o_no], probs_na) self.f_probs = cgt.function([o_no], probs_na) self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl]) self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad) self.pc = ParamCollection(params)
def make_deep_gru(size_input, size_mem, n_layers, size_output, size_batch): inputs = [cgt.matrix() for i_layer in xrange(n_layers+1)] outputs = [] for i_layer in xrange(n_layers): prev_h = inputs[i_layer+1] # note that inputs[0] is the external input, so we add 1 x = inputs[0] if i_layer==0 else outputs[i_layer-1] size_x = size_input if i_layer==0 else size_mem update_gate = cgt.sigmoid( nn.Affine(size_x, size_mem,name="i2u")(x) + nn.Affine(size_mem, size_mem, name="h2u")(prev_h)) reset_gate = cgt.sigmoid( nn.Affine(size_x, size_mem,name="i2r")(x) + nn.Affine(size_mem, size_mem, name="h2r")(prev_h)) gated_hidden = reset_gate * prev_h p2 = nn.Affine(size_mem, size_mem)(gated_hidden) p1 = nn.Affine(size_x, size_mem)(x) hidden_target = cgt.tanh(p1+p2) next_h = (1.0-update_gate)*prev_h + update_gate*hidden_target outputs.append(next_h) category_activations = nn.Affine(size_mem, size_output,name="pred")(outputs[-1]) logprobs = nn.logsoftmax(category_activations) outputs.append(logprobs) return nn.Module(inputs, outputs)
def __init__(self, obs_dim, ctrl_dim): cgt.set_precision('double') Serializable.__init__(self, obs_dim, ctrl_dim) self.obs_dim = obs_dim self.ctrl_dim = ctrl_dim o_no = cgt.matrix("o_no", fixed_shape=(None, obs_dim)) a_na = cgt.matrix("a_na", fixed_shape=(None, ctrl_dim)) adv_n = cgt.vector("adv_n") oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2 * ctrl_dim)) self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a") std_1a = cgt.exp(logstd_1a) # Here's where we apply the network h0 = o_no nhid = 32 h1 = cgt.tanh( nn.Affine(obs_dim, nhid, weight_init=nn.IIDGaussian(std=0.1))(h0)) h2 = cgt.tanh( nn.Affine(nhid, nhid, weight_init=nn.IIDGaussian(std=0.1))(h1)) mean_na = nn.Affine(nhid, ctrl_dim, weight_init=nn.IIDGaussian(std=0.01))(h2) b = cgt.size(o_no, 0) std_na = cgt.repeat(std_1a, b, axis=0) oldmean_na = oldpdist_np[:, 0:self.ctrl_dim] oldstd_na = oldpdist_np[:, self.ctrl_dim:2 * self.ctrl_dim] logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na).sum(axis=1)) - logstd_1a.sum() oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na).sum(axis=1) ) - cgt.log(oldstd_na).sum(axis=1) ratio_n = cgt.exp(logp_n - oldlogp_n) surr = (ratio_n * adv_n).mean() pdists_np = cgt.concatenate([mean_na, std_na], axis=1) # kl = cgt.log(sigafter/) params = nn.get_parameters(surr) oldvar_na = cgt.square(oldstd_na) var_na = cgt.square(std_na) kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean() lam = cgt.scalar() penobj = surr - lam * kl self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self._compute_grad_lagrangian = cgt.function( [lam, oldpdist_np, o_no, a_na, adv_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)])) self.f_pdist = cgt.function([o_no], pdists_np) self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self.pc = ParamCollection(params)
def tanh(x): return cgt.tanh(x)
import nn import cgt from opt import sgd_update N_LAYERS = 2 hid_size = X.shape[1] # 28 * 28 out_size = 10 inps = [cgt.matrix(dtype=cgt.floatX)] param_list = [] for k in xrange(N_LAYERS): tmp = nn.Affine(hid_size, hid_size)#(inps[k]) param_list.extend([tmp.weight, tmp.bias]) inps.append(cgt.tanh(tmp(inps[k]))) tmp = nn.Affine(hid_size, out_size) param_list.extend([tmp.weight, tmp.bias]) logprobs = nn.logsoftmax(tmp(inps[-1])) #dnn = nn.Module(inps[0:1], [logprobs]) #params = dnn.get_parameters() # XXX think should just make this part of get_parameters theta = nn.setup_contiguous_storage(param_list) # XXX initialize theta[:] = np.random.uniform(-0.08, 0.08, theta.shape) # XXX taken from other demo, move def ind2onehot(inds, n_cls): out = np.zeros(list(inds.shape)+[n_cls,], cgt.floatX)