def __init__(self, n_actions): Serializable.__init__(self, n_actions) cgt.set_precision('double') n_in = 128 o_no = cgt.matrix("o_no",fixed_shape=(None,n_in)) a_n = cgt.vector("a_n",dtype='i8') q_n = cgt.vector("q_n") oldpdist_np = cgt.matrix("oldpdists") h0 = (o_no - 128.0)/128.0 nhid = 64 h1 = cgt.tanh(nn.Affine(128,nhid,weight_init=nn.IIDGaussian(std=.1))(h0)) probs_na = nn.softmax(nn.Affine(nhid,n_actions,weight_init=nn.IIDGaussian(std=0.01))(h1)) logprobs_na = cgt.log(probs_na) b = cgt.size(o_no, 0) logps_n = logprobs_na[cgt.arange(b), a_n] surr = (logps_n*q_n).mean() kl = (oldpdist_np * cgt.log(oldpdist_np/probs_na)).sum(axis=1).mean() params = nn.get_parameters(surr) gradsurr = cgt.grad(surr, params) flatgrad = cgt.concatenate([p.flatten() for p in gradsurr]) lam = cgt.scalar() penobj = surr - lam * kl self._f_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_n, q_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)])) self.f_pdist = cgt.function([o_no], probs_na) self.f_probs = cgt.function([o_no], probs_na) self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl]) self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad) self.pc = ParamCollection(params)
def logprob(self, x, mu, sigma): """ Calculate logprob for each row of x, mu, sigma """ assert sigma.ndim == mu.ndim == x.ndim == 2 k = x.shape[1] log_det = cgt.sum(cgt.log(sigma), axis=1, keepdims=True) prob_z = -.5 * (k * np.log(2. * np.pi) + log_det) prob_e = cgt.sum(-.5 * sigma * ((x - mu)**2), axis=1, keepdims=True) # output shape: (size_batch, 1) return prob_z + prob_e
def __init__(self, obs_dim, ctrl_dim): cgt.set_precision('double') Serializable.__init__(self, obs_dim, ctrl_dim) self.obs_dim = obs_dim self.ctrl_dim = ctrl_dim o_no = cgt.matrix("o_no",fixed_shape=(None,obs_dim)) a_na = cgt.matrix("a_na",fixed_shape = (None, ctrl_dim)) adv_n = cgt.vector("adv_n") oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2*ctrl_dim)) self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a") std_1a = cgt.exp(logstd_1a) # Here's where we apply the network h0 = o_no nhid = 32 h1 = cgt.tanh(nn.Affine(obs_dim,nhid,weight_init=nn.IIDGaussian(std=0.1))(h0)) h2 = cgt.tanh(nn.Affine(nhid,nhid,weight_init=nn.IIDGaussian(std=0.1))(h1)) mean_na = nn.Affine(nhid,ctrl_dim,weight_init=nn.IIDGaussian(std=0.01))(h2) b = cgt.size(o_no, 0) std_na = cgt.repeat(std_1a, b, axis=0) oldmean_na = oldpdist_np[:, 0:self.ctrl_dim] oldstd_na = oldpdist_np[:, self.ctrl_dim:2*self.ctrl_dim] logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na ).sum(axis=1)) - logstd_1a.sum() oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na ).sum(axis=1)) - cgt.log(oldstd_na).sum(axis=1) ratio_n = cgt.exp(logp_n - oldlogp_n) surr = (ratio_n*adv_n).mean() pdists_np = cgt.concatenate([mean_na, std_na], axis=1) # kl = cgt.log(sigafter/) params = nn.get_parameters(surr) oldvar_na = cgt.square(oldstd_na) var_na = cgt.square(std_na) kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean() lam = cgt.scalar() penobj = surr - lam * kl self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self._compute_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_na, adv_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)])) self.f_pdist = cgt.function([o_no], pdists_np) self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self.pc = ParamCollection(params)
def __init__(self, n_actions): Serializable.__init__(self, n_actions) cgt.set_precision('double') n_in = 128 o_no = cgt.matrix("o_no", fixed_shape=(None, n_in)) a_n = cgt.vector("a_n", dtype='i8') q_n = cgt.vector("q_n") oldpdist_np = cgt.matrix("oldpdists") h0 = (o_no - 128.0) / 128.0 nhid = 64 h1 = cgt.tanh( nn.Affine(128, nhid, weight_init=nn.IIDGaussian(std=.1))(h0)) probs_na = nn.softmax( nn.Affine(nhid, n_actions, weight_init=nn.IIDGaussian(std=0.01))(h1)) logprobs_na = cgt.log(probs_na) b = cgt.size(o_no, 0) logps_n = logprobs_na[cgt.arange(b), a_n] surr = (logps_n * q_n).mean() kl = (oldpdist_np * cgt.log(oldpdist_np / probs_na)).sum(axis=1).mean() params = nn.get_parameters(surr) gradsurr = cgt.grad(surr, params) flatgrad = cgt.concatenate([p.flatten() for p in gradsurr]) lam = cgt.scalar() penobj = surr - lam * kl self._f_grad_lagrangian = cgt.function( [lam, oldpdist_np, o_no, a_n, q_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)])) self.f_pdist = cgt.function([o_no], probs_na) self.f_probs = cgt.function([o_no], probs_na) self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl]) self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad) self.pc = ParamCollection(params)
def test_noncontiguous_matrix(): x = np.arange(1,7).reshape(2,3).astype(cgt.floatX) result = np.log(x.sum(axis=0)).sum() xvar = cgt.matrix() f = cgt.function([xvar],cgt.log(xvar.sum(axis=0)).sum()) assert np.allclose( f(np.asarray(x, order='C')), result) assert np.allclose( f(np.asarray(x, order='C', dtype='int64')), result) assert np.allclose( f(np.asarray(x, order='F')), result) X = np.zeros((4,6)) X[::2,::2] = x assert np.allclose( f(X[::2,::2]), result)
def get_train_objective(self, max_label_length, ground_labels_basis_btc): context_i_bf = parameter(init_array(IIDUniform(-0.1, 0.1), (self.batch_size, self.feature_size)), name=None) state_i_bf = parameter(init_array(IIDUniform(-0.1, 0.1), (self.batch_size, self.decoder_size)), name=None) prev_out_bc = cgt.zeros((self.batch_size, self.true_number_classes), dtype='i8') #+ self.start_token_index log_probs = None for iter_step in range(0, max_label_length): state_i_bf = self.get_decoder_state(context_i_bf, prev_out_bc, state_i_bf) context_i_bf = self.get_context(state_i_bf) this_character_dist_bc = self.get_character_distribution(state_i_bf, context_i_bf) prev_out_bc = ground_labels_basis_btc[:, iter_step, :] log_probs_pre = prev_out_bc * this_character_dist_bc log_probs_pre = cgt.log(cgt.sum(log_probs_pre, axis=1)) if log_probs is None: log_probs = cgt.sum(log_probs_pre) else: log_probs += cgt.sum(log_probs_pre) log_probs = -log_probs return log_probs
def logsoftmax(x, axis=1): return cgt.log(softmax(x, axis=axis))
def prod(x, axis=None, keepdims=False): """ Like numpy.prod """ return cgt.exp(cgt.sum(cgt.log(x), axis=axis, keepdims=keepdims))
def bernoulli_crossentropy(bins, probs): "bins = binary values. probs = Pr(b=1)" return -(bins * cgt.log(probs) + (1 - bins) * cgt.log(1 - probs))
def crossent(self, p, q): assert p.ndim == 2 and q.ndim == 2 return -(p * cgt.log(q)).sum(axis=1)
def loglik(self, labels, p): return cgt.log(p[cgt.arange(cgt.size(labels, 0)), labels])
def loglik(self, labels, p): return cgt.log(p[cgt.arange(cgt.size(labels,0)),labels])
def kld_unit_mvn(mu, var): # KL divergence from N(0, I) return (mu.shape[1] + cgt.sum(cgt.log(var), axis=1) - cgt.sum(cgt.square(mu), axis=1) - cgt.sum(var, axis=1)) / 2.0
def binary_crossentropy(x, y): return -(y * cgt.log(x) + (1 - y) * cgt.log(1 - x))
def f(x): # expects batches k = mu.shape[1] logp = (-k / 2.0) * np.log(2 * np.pi) - 0.5 * cgt.sum(cgt.log(var), axis=1) - cgt.sum(0.5 * (1.0 / var) * (x - mu) * (x - mu), axis=1) return logp
def log(x): return cgt.log(x)
def test_the_test_problem(): #Works batch_size = 32 # How many samples do you want to batch. feat_t_steps = 20 # How many 10ms sound clips. feat_num_features = 10 # The dimension of the 10ms clips. max_label_length = feat_t_steps # The maximal label length of the transcription. includes start character. num_out_classes = 27 num_out_classes_true = num_out_classes + 2 num_batches = 756 num_epochs = 30 feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features)) ground_labels_basis = cgt.tensor3(fixed_shape=(batch_size, max_label_length, num_out_classes_true)) last_time = time.time() print 'initializing temporal dense layer' d1 = nnbuilder.temporalDenseLayer(feats, num_units=128, activation=cgt.sigmoid) #d2 = nnbuilder.temporalDenseLayer(d1, num_units=128, activation=cgt.sigmoid) d3 = nnbuilder.temporalDenseLayer(d1, num_units=num_out_classes_true, activation=nnbuilder.linear) out = nn.three_d_softmax(d3, axis=2) log_probs = None for iter_step in range(0, max_label_length): this_character_dist_bc = out[:, iter_step, :] prev_out_bc = ground_labels_basis[:, iter_step, :] log_probs_pre = prev_out_bc * this_character_dist_bc log_probs_pre = cgt.log(cgt.sum(log_probs_pre, axis=1)) if log_probs is None: log_probs = cgt.sum(log_probs_pre) else: log_probs += cgt.sum(log_probs_pre) log_probs = -log_probs print 'that took ' + str(time.time() - last_time) + ' seconds' last_time = time.time() print 'compiling objective function' updates = nn.rmsprop(log_probs, nn.get_parameters(log_probs), learning_rate=0.01) pred_train = cgt.function([feats, ground_labels_basis], [], updates=updates) pred_fun = cgt.function([feats, ground_labels_basis], [log_probs]) most_likely_chars = cgt.argmax(out, axis=1) actual_predictions = cgt.function([feats, ground_labels_basis], [most_likely_chars]) print 'that took ' + str(time.time() - last_time) + ' seconds' test_data = np.load('test_data.npy') test_labels = np.load('test_labels.npy') data_mean = np.mean(test_data) data_sd = np.mean(test_data) print 'now training' for one_epoch in range(0, num_epochs): trained = 0 last_time = time.time() print 'starting epoch ' + str(one_epoch) for batch_iter in range(0, num_batches): batch, labels_basis = normalize_batch_and_labels(test_data, batch_iter, feat_t_steps, data_mean, data_sd, test_labels, num_out_classes_true) pred_train(batch, labels_basis) for batch_iter in range(0, num_batches): batch, labels_basis = normalize_batch_and_labels(test_data, batch_iter, feat_t_steps, data_mean, data_sd, test_labels, num_out_classes_true) trained += pred_fun(batch, labels_basis)[0] trained = trained/batch_iter print 'train loss is ' + str(trained) print 'that took ' + str(time.time() - last_time) + ' seconds' act_pred = actual_predictions(batch, labels_basis)[0] print 'an actual prediction is ' print act_pred
def __init__(self, obs_dim, ctrl_dim): cgt.set_precision('double') Serializable.__init__(self, obs_dim, ctrl_dim) self.obs_dim = obs_dim self.ctrl_dim = ctrl_dim o_no = cgt.matrix("o_no", fixed_shape=(None, obs_dim)) a_na = cgt.matrix("a_na", fixed_shape=(None, ctrl_dim)) adv_n = cgt.vector("adv_n") oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2 * ctrl_dim)) self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)), name="std_1a") std_1a = cgt.exp(logstd_1a) # Here's where we apply the network h0 = o_no nhid = 32 h1 = cgt.tanh( nn.Affine(obs_dim, nhid, weight_init=nn.IIDGaussian(std=0.1))(h0)) h2 = cgt.tanh( nn.Affine(nhid, nhid, weight_init=nn.IIDGaussian(std=0.1))(h1)) mean_na = nn.Affine(nhid, ctrl_dim, weight_init=nn.IIDGaussian(std=0.01))(h2) b = cgt.size(o_no, 0) std_na = cgt.repeat(std_1a, b, axis=0) oldmean_na = oldpdist_np[:, 0:self.ctrl_dim] oldstd_na = oldpdist_np[:, self.ctrl_dim:2 * self.ctrl_dim] logp_n = ((-.5) * cgt.square( (a_na - mean_na) / std_na).sum(axis=1)) - logstd_1a.sum() oldlogp_n = ((-.5) * cgt.square( (a_na - oldmean_na) / oldstd_na).sum(axis=1) ) - cgt.log(oldstd_na).sum(axis=1) ratio_n = cgt.exp(logp_n - oldlogp_n) surr = (ratio_n * adv_n).mean() pdists_np = cgt.concatenate([mean_na, std_na], axis=1) # kl = cgt.log(sigafter/) params = nn.get_parameters(surr) oldvar_na = cgt.square(oldstd_na) var_na = cgt.square(std_na) kl = (cgt.log(std_na / oldstd_na) + (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) - .5).sum(axis=1).mean() lam = cgt.scalar() penobj = surr - lam * kl self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self._compute_grad_lagrangian = cgt.function( [lam, oldpdist_np, o_no, a_na, adv_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)])) self.f_pdist = cgt.function([o_no], pdists_np) self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n], [surr, kl]) self.pc = ParamCollection(params)
def bernoulli_crossentropy(bins, probs): "bins = binary values. probs = Pr(b=1)" return -( bins*cgt.log(probs) + (1-bins)*cgt.log(1-probs))
def logprob(self, x, p): """ Element-wise log prob for each component in x """ p = core.as_node(p) l = x * cgt.log(p) + (1 - x) * cgt.log(1 - p) return l
def crossent(self, p, q): assert p.ndim==2 and q.ndim==2 return -(p*cgt.log(q)).sum(axis=1)