def __init__(self, obs_dim, ctrl_dim): cgt.set_precision('double') Serializable.__init__(self, obs_dim, ctrl_dim) self.obs_dim = obs_dim self.ctrl_dim = ctrl_dim o_no = cgt.matrix("o_no",fixed_shape=(None,obs_dim)) a_na = cgt.matrix("a_na",fixed_shape = (None, ctrl_dim)) adv_n = cgt.vector("adv_n") oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2*ctrl_dim)) self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a") std_1a = cgt.exp(logstd_1a) # Here's where we apply the network h0 = o_no nhid = 32 h1 = cgt.tanh(nn.Affine(obs_dim,nhid,weight_init=nn.IIDGaussian(std=0.1))(h0)) h2 = cgt.tanh(nn.Affine(nhid,nhid,weight_init=nn.IIDGaussian(std=0.1))(h1)) mean_na = nn.Affine(nhid,ctrl_dim,weight_init=nn.IIDGaussian(std=0.01))(h2) b = cgt.size(o_no, 0) std_na = cgt.repeat(std_1a, b, axis=0) oldmean_na = oldpdist_np[:, 0:self.ctrl_dim] oldstd_na = oldpdist_np[:, self.ctrl_dim:2*self.ctrl_dim] logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na ).sum(axis=1)) - logstd_1a.sum() oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na ).sum(axis=1)) - cgt.log(oldstd_na).sum(axis=1) ratio_n = cgt.exp(logp_n - oldlogp_n) surr = (ratio_n*adv_n).mean() pdists_np = cgt.concatenate([mean_na, std_na], axis=1) # kl = cgt.log(sigafter/) params = nn.get_parameters(surr) oldvar_na = cgt.square(oldstd_na) var_na = cgt.square(std_na) kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean() lam = cgt.scalar() penobj = surr - lam * kl self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self._compute_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_na, adv_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)])) self.f_pdist = cgt.function([o_no], pdists_np) self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self.pc = ParamCollection(params)
def softplus(x): op = core.ElwiseUnary( "softplus", core.UnaryInfo("SoftPlus", _nu_softplus, True, 'f', lambda x, g, gy: gy / (cgt.exp(-x) + 1.0), "(x > 0) ? (x + log(exp(-x) + 1)) : log(1+exp(x))")) return core.Result(op, [x])
def ntm_address(opt, wprev_bhn, M_bnm, k_bhm, beta_bh, g_bh, s_bh3, gamma_bh): # Content addressing # Cosine similarity # take inner product along memory axis k * M numer_bhn = cgt.einsum("bhm,bnm->bhn", k_bhm, M_bnm) # compute denominator |k| * |m| denom_bhn = cgt.broadcast( "*", cgt.norm(k_bhm, axis=2, keepdims=True), # -> shape bh1 cgt.norm(M_bnm, axis=2, keepdims=True).transpose([0, 2, 1]), # -> bn1 -> b1n "xx1,x1x") csim_bhn = numer_bhn / denom_bhn assert infer_shape(csim_bhn) == (opt.b, 2 * opt.h, opt.n) # scale by beta tmp_bhn = cgt.broadcast("*", beta_bh[:, :, None], csim_bhn, "xx1,xxx") wc_bhn = sum_normalize2(cgt.exp(tmp_bhn)) # Interpolation g_bh1 = g_bh[:, :, None] wg_bhn = cgt.broadcast("*", wprev_bhn, (1 - g_bh1), "xxx,xx1") \ + cgt.broadcast("*", wc_bhn, g_bh1, "xxx,xx1") # Shift wtil_bhn = circ_conv_1d(wg_bhn, s_bh3, axis=2) # Sharpening wfin_bhn = sum_normalize2( cgt.broadcast("**", wtil_bhn, gamma_bh.reshape([opt.b, 2 * opt.h, 1]), "xxx,xx1")) b, h, n = opt.b, 2 * opt.h, opt.n assert infer_shape(wtil_bhn) == (b, h, n) assert infer_shape(gamma_bh) == (b, h) assert infer_shape(gamma_bh[:, :, None]) == (b, h, 1) return wfin_bhn
def ntm_address(opt, wprev_bhn, M_bnm, k_bhm, beta_bh, g_bh, s_bh3, gamma_bh): # Content addressing # Cosine similarity # take inner product along memory axis k * M numer_bhn = cgt.einsum("bhm,bnm->bhn", k_bhm, M_bnm) # compute denominator |k| * |m| denom_bhn = cgt.broadcast("*", cgt.norm(k_bhm, axis=2, keepdims=True), # -> shape bh1 cgt.norm(M_bnm, axis=2, keepdims=True).transpose([0,2,1]), # -> bn1 -> b1n "xx1,x1x" ) csim_bhn = numer_bhn / denom_bhn assert infer_shape(csim_bhn) == (opt.b, 2*opt.h, opt.n) # scale by beta tmp_bhn = cgt.broadcast("*", beta_bh[:,:,None], csim_bhn, "xx1,xxx") wc_bhn = sum_normalize2(cgt.exp( tmp_bhn )) # Interpolation g_bh1 = g_bh[:,:,None] wg_bhn = cgt.broadcast("*", wprev_bhn, (1 - g_bh1), "xxx,xx1") \ + cgt.broadcast("*", wc_bhn, g_bh1, "xxx,xx1") # Shift wtil_bhn = circ_conv_1d(wg_bhn, s_bh3, axis=2) # Sharpening wfin_bhn = sum_normalize2(cgt.broadcast("**", wtil_bhn, gamma_bh.reshape([opt.b,2*opt.h,1]), "xxx,xx1")) b,h,n = opt.b, 2*opt.h, opt.n assert infer_shape(wtil_bhn) == (b,h,n) assert infer_shape(gamma_bh) == (b,h) assert infer_shape(gamma_bh[:,:,None]) == (b,h,1) return wfin_bhn
def make_ff_controller(opt): b, h, m, p, k = opt.b, opt.h, opt.m, opt.p, opt.k H = 2*h in_size = k + h*m out_size = H*m + H + H + H*3 + H + h*m + h*m + p # Previous reads r_bhm = cgt.tensor3("r", fixed_shape = (b,h,m)) # External inputs X_bk = cgt.matrix("x", fixed_shape = (b,k)) r_b_hm = r_bhm.reshape([r_bhm.shape[0], r_bhm.shape[1]*r_bhm.shape[2]]) # Input to controller inp_bq = cgt.concatenate([X_bk, r_b_hm], axis=1) hid_sizes = opt.ff_hid_sizes activation = cgt.tanh layer_out_sizes = [in_size] + hid_sizes + [out_size] last_out = inp_bq # feedforward part. we could simplify a bit by using nn.Affine for i in xrange(len(layer_out_sizes)-1): indim = layer_out_sizes[i] outdim = layer_out_sizes[i+1] W = cgt.shared(.02*nr.randn(indim, outdim), name="W%i"%i, fixed_shape_mask="all") bias = cgt.shared(.02*nr.randn(1, outdim), name="b%i"%i, fixed_shape_mask="all") last_out = cgt.broadcast("+",last_out.dot(W),bias,"xx,1x") # Don't apply nonlinearity at the last layer if i != len(layer_out_sizes)-2: last_out = activation(last_out) idx = 0 k_bHm = last_out[:,idx:idx+H*m]; idx += H*m; k_bHm = k_bHm.reshape([b,H,m]) beta_bH = last_out[:,idx:idx+H]; idx += H g_bH = last_out[:,idx:idx+H]; idx += H s_bH3 = last_out[:,idx:idx+3*H]; idx += 3*H; s_bH3 = s_bH3.reshape([b,H,3]) gamma_bH = last_out[:,idx:idx+H]; idx += H e_bhm = last_out[:,idx:idx+h*m]; idx += h*m; e_bhm = e_bhm.reshape([b,h,m]) a_bhm = last_out[:,idx:idx+h*m]; idx += h*m; a_bhm = a_bhm.reshape([b,h,m]) y_bp = last_out[:,idx:idx+p]; idx += p k_bHm = cgt.tanh(k_bHm) beta_bH = nn.softplus(beta_bH) g_bH = cgt.sigmoid(g_bH) s_bH3 = sum_normalize2(cgt.exp(s_bH3)) gamma_bH = cgt.sigmoid(gamma_bH)+1 e_bhm = cgt.sigmoid(e_bhm) a_bhm = cgt.tanh(a_bhm) # y_bp = y_bp assert infer_shape(k_bHm) == (b,H,m) assert infer_shape(beta_bH) == (b,H) assert infer_shape(g_bH) == (b,H) assert infer_shape(s_bH3) == (b,H,3) assert infer_shape(gamma_bH) == (b,H) assert infer_shape(e_bhm) == (b,h,m) assert infer_shape(a_bhm) == (b,h,m) assert infer_shape(y_bp) == (b,p) return nn.Module([r_bhm, X_bk], [k_bHm, beta_bH, g_bH, s_bH3, gamma_bH, e_bhm, a_bhm, y_bp])
def make_ntm_initial_states(opt): n, m, h, b = opt.n, opt.m, opt.h, opt.b M_1nm = cgt.shared(.1*nr.randn(1,n,m)) winit_1Hn = cgt.shared(.1*nr.rand(1,2*h,n)) winit_1Hn = sum_normalize2(cgt.exp(winit_1Hn)) rinit_1hm = cgt.shared(np.zeros((1,h,m))) return [cgt.repeat(arr, b, axis=0) for arr in (M_1nm, winit_1Hn, rinit_1hm)]
def __init__(self, x, n_in, n_hid, n_out, nlayers=1, y=None, eps=None): super(GaussianMLP, self).__init__(x, n_in, n_hid, nlayers=nlayers, prefix="GaussianMLP_hidden") self.mu_layer = HiddenLayer( input=self.hidden_layers[-1].output, n_in=self.hidden_layers[-1].n_out, n_out=n_out, activation=None, prefix="GaussianMLP_mu" ) # log(sigma^2) self.logvar_layer = HiddenLayer( input=self.hidden_layers[-1].output, n_in=self.hidden_layers[-1].n_out, n_out=n_out, activation=None, prefix="GaussianMLP_logvar" ) self.mu = self.mu_layer.output self.var = cgt.exp(self.logvar_layer.output) self.sigma = cgt.sqrt(self.var) self.params = self.params + self.mu_layer.params +\ self.logvar_layer.params # for use as encoder if eps is not None: assert(y is None) self.out = self.mu + self.sigma * eps # for use as decoder if y: assert(eps is None) self.out = cgt.sigmoid(self.mu) self.cost = -cgt.sum(log_diag_mvn(self.out, self.var)(y))
def softmax(x, axis=1): # x = cgt.broadcast("-", x, x.max(axis=1,keepdims=True),"xx,x1") out = cgt.exp(x) out = cgt.broadcast("/", out, out.sum(axis=axis, keepdims=True), "xx,x1") return out
def softmax(x,axis=1): # x = cgt.broadcast("-", x, x.max(axis=1,keepdims=True),"xx,x1") out = cgt.exp(x) out = cgt.broadcast("/", out, out.sum(axis=axis,keepdims=True), "xx,x1") return out
def softplus(x): op = core.ElwiseUnary("softplus",core.UnaryInfo("SoftPlus", _nu_softplus, True, 'f', lambda x, g, gy: gy/(cgt.exp(-x)+1.0), "(x > 0) ? (x + log(exp(-x) + 1)) : log(1+exp(x))")) return core.Result(op, [x])
def prod(x, axis=None, keepdims=False): """ Like numpy.prod """ return cgt.exp(cgt.sum(cgt.log(x), axis=axis, keepdims=keepdims))
def exp(x): return cgt.exp(x)
def __init__(self, obs_dim, ctrl_dim): cgt.set_precision('double') Serializable.__init__(self, obs_dim, ctrl_dim) self.obs_dim = obs_dim self.ctrl_dim = ctrl_dim o_no = cgt.matrix("o_no", fixed_shape=(None, obs_dim)) a_na = cgt.matrix("a_na", fixed_shape=(None, ctrl_dim)) adv_n = cgt.vector("adv_n") oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2 * ctrl_dim)) self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a") std_1a = cgt.exp(logstd_1a) # Here's where we apply the network h0 = o_no nhid = 32 h1 = cgt.tanh( nn.Affine(obs_dim, nhid, weight_init=nn.IIDGaussian(std=0.1))(h0)) h2 = cgt.tanh( nn.Affine(nhid, nhid, weight_init=nn.IIDGaussian(std=0.1))(h1)) mean_na = nn.Affine(nhid, ctrl_dim, weight_init=nn.IIDGaussian(std=0.01))(h2) b = cgt.size(o_no, 0) std_na = cgt.repeat(std_1a, b, axis=0) oldmean_na = oldpdist_np[:, 0:self.ctrl_dim] oldstd_na = oldpdist_np[:, self.ctrl_dim:2 * self.ctrl_dim] logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na).sum(axis=1)) - logstd_1a.sum() oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na).sum(axis=1) ) - cgt.log(oldstd_na).sum(axis=1) ratio_n = cgt.exp(logp_n - oldlogp_n) surr = (ratio_n * adv_n).mean() pdists_np = cgt.concatenate([mean_na, std_na], axis=1) # kl = cgt.log(sigafter/) params = nn.get_parameters(surr) oldvar_na = cgt.square(oldstd_na) var_na = cgt.square(std_na) kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean() lam = cgt.scalar() penobj = surr - lam * kl self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self._compute_grad_lagrangian = cgt.function( [lam, oldpdist_np, o_no, a_na, adv_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)])) self.f_pdist = cgt.function([o_no], pdists_np) self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self.pc = ParamCollection(params)