def p_p(cur_z): p_p_2_linear = self.bn(F.relu(F.dropout(self.p_p_2_lin(cur_z)))) p_p_2_mean = self.p_p_2_m(p_p_2_linear) p_p_2_var = F.exp(self.p_p_2_v(p_p_2_linear)) p_p_2 = D.GaussianDistribution(p_p_2_mean, p_p_2_var) z_2 = p_p_2.sample() return z_2, p_p_2
def __init__(self, n_input_channels, action_size, var, n_hidden_layers=0, n_hidden_channels=None, min_action=None, max_action=None, bound_mean=False, nonlinearity=F.relu): self.n_input_channels = n_input_channels self.action_size = action_size self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.min_action = min_action self.max_action = max_action self.bound_mean = bound_mean self.nonlinearity = nonlinearity self.var = var layers = [] layers.append(L.Linear(n_input_channels, n_hidden_channels)) for _ in range(n_hidden_layers - 1): layers.append(self.nonlinearity) layers.append(L.Linear(n_hidden_channels, n_hidden_channels)) layers.append(L.Linear(n_hidden_channels, action_size)) if self.bound_mean: layers.append( lambda x: bound_by_tanh(x, self.min_action, self.max_action)) layers.append(lambda x: distribution.GaussianDistribution( x, self.xp.broadcast_to(self.var, x.shape))) super().__init__(*layers)
def p_b(state): p_b_1_linear = self.bn(F.relu(F.dropout(self.p_b_1_lin(state)))) p_b_1_mean = self.p_b_1_m(p_b_1_linear) p_b_1_var = F.exp(self.p_b_1_v(p_b_1_linear)) p_b_1 = D.GaussianDistribution(p_b_1_mean, p_b_1_var) z_1 = p_b_1.sample() return z_1, p_b_1
def setUp(self): self.mean = np.random.normal(size=(self.batch_size, self.ndim)).astype(np.float32) self.var = np.random.uniform(low=0.5, high=2.0, size=(self.batch_size, self.ndim)).astype(np.float32) self.distrib = distribution.GaussianDistribution(self.mean, self.var)
def test_kl(self): # Compare it to chainer.functions.gaussian_kl_divergence standard = distribution.GaussianDistribution( mean=np.zeros((self.batch_size, self.ndim), dtype=np.float32), var=np.ones((self.batch_size, self.ndim), dtype=np.float32)) kl = self.distrib.kl(standard) chainer_kl = chainer.functions.gaussian_kl_divergence( self.distrib.mean, self.distrib.ln_var) np.testing.assert_allclose(kl.data.sum(), chainer_kl.data, rtol=1e-5)
def __init__(self, n_input_channels, action_size, var, n_hidden_layers=0, n_hidden_channels=None, min_action=None, max_action=None, bound_mean=False, nonlinearity=F.relu, mean_wscale=1): self.n_input_channels = n_input_channels self.action_size = action_size self.n_hidden_layers = n_hidden_layers self.n_hidden_channels = n_hidden_channels self.min_action = min_action self.max_action = max_action self.bound_mean = bound_mean self.nonlinearity = nonlinearity if np.isscalar(var): self.var = np.full(action_size, var, dtype=np.float32) else: self.var = var layers = [] if n_hidden_layers > 0: # Input to hidden layers.append(L.Linear(n_input_channels, n_hidden_channels)) layers.append(self.nonlinearity) for _ in range(n_hidden_layers - 1): # Hidden to hidden layers.append(L.Linear(n_hidden_channels, n_hidden_channels)) layers.append(self.nonlinearity) # The last layer is used to compute the mean layers.append( L.Linear(n_hidden_channels, action_size, initialW=LeCunNormal(mean_wscale))) else: # There's only one layer for computing the mean layers.append( L.Linear(n_input_channels, action_size, initialW=LeCunNormal(mean_wscale))) if self.bound_mean: layers.append( lambda x: bound_by_tanh(x, self.min_action, self.max_action)) def get_var_array(shape): self.var = self.xp.asarray(self.var) return self.xp.broadcast_to(self.var, shape) layers.append(lambda x: distribution.GaussianDistribution( x, get_var_array(x.shape))) super().__init__(*layers)
def __call__(self, mean): """Return a Gaussian with given mean. Args: mean (chainer.Variable or ndarray): Mean of Gaussian. Returns: chainerrl.distribution.Distribution: Gaussian whose mean is the mean argument and whose variance is computed from the parameter of this link. """ var = F.broadcast_to(self.var_func(self.var_param), mean.shape) return distribution.GaussianDistribution(mean, var)
def __call__(self, x, t): """ X: (batch_size, how_many_prev, H, W) t: (batch_size, how_many_out, H, W) """ #should be passed by __call__ jmp_idx = random.randint(1, 4) #step 3 start_idx = random.randint(4, 10) #step 2 end_idx = jmp_idx + start_idx loss = None #Reshape to (784) from (28, 28) new_x = F.reshape( x.astype(np.float32), (x.shape[0], x.shape[1], x.shape[2] * x.shape[3])).data new_t = F.reshape( t.astype(np.float32), (t.shape[0], t.shape[1], t.shape[2] * t.shape[3])).data batch_size = x.shape[0] num_prev = x.shape[1] num_pred = t.shape[1] num_prev = x.shape[1] num_pred = t.shape[1] self.encoder.reset_state() """ ============================================== Training Loop ============================================== """ for i in range(start_idx): inp = t[:, i, :, :].reshape( (batch_size, 1, 28, 28)).astype(np.float32) self.encoder(self.bn(self.lin1(F.dropout(F.relu( self.conv1(inp)))))) state_at_t = self.encoder.h for i in range(start_idx, start_idx + jmp_idx): inp = t[:, i, :, :].reshape( (batch_size, 1, 28, 28)).astype(np.float32) self.encoder(self.bn(self.lin1(F.dropout(F.relu( self.conv1(inp)))))) observation_at_jmp = new_t[:, start_idx + jmp_idx - 1, :] observation_at_start = new_t[:, start_idx, :] state_at_jmp = self.encoder.h """ Get a sample of z_2_target """ p_b_2_linear = self.bn(F.relu(F.dropout(self.p_b_2_lin(state_at_jmp)))) p_b_2_mean = self.p_b_2_m(p_b_2_linear) p_b_2_var = F.exp(self.p_b_2_v(p_b_2_linear)) p_b_2 = D.GaussianDistribution(p_b_2_mean, p_b_2_var) z_2_target = p_b_2.sample() """ Get a sample of z_1_target """ q_I_input = F.concat((z_2_target, state_at_jmp, state_at_t)) q_I_linear = self.bn(F.relu(F.dropout(self.q_I_lin(q_I_input)))) q_I_mean = self.q_I_m(q_I_linear) q_I_var = F.exp(self.q_I_v(q_I_linear)) q_I = D.GaussianDistribution(q_I_mean, q_I_var) z_1_target = q_I.sample() """ Get a sample of z_1 (sample state_at_t only) """ def p_b(state): p_b_1_linear = self.bn(F.relu(F.dropout(self.p_b_1_lin(state)))) p_b_1_mean = self.p_b_1_m(p_b_1_linear) p_b_1_var = F.exp(self.p_b_1_v(p_b_1_linear)) p_b_1 = D.GaussianDistribution(p_b_1_mean, p_b_1_var) z_1 = p_b_1.sample() return z_1, p_b_1 z_1, p_b_1 = p_b(state_at_t) """ Get sample of z_2(from z_1) """ def p_p(cur_z): p_p_2_linear = self.bn(F.relu(F.dropout(self.p_p_2_lin(cur_z)))) p_p_2_mean = self.p_p_2_m(p_p_2_linear) p_p_2_var = F.exp(self.p_p_2_v(p_p_2_linear)) p_p_2 = D.GaussianDistribution(p_p_2_mean, p_p_2_var) z_2 = p_p_2.sample() return z_2, p_p_2 z_2, p_p_2 = p_p(z_1) """ Get Reconstruction from z_2 """ def p_d(z): p_d_1_lin = F.relu(F.dropout(self.p_d_1(z))) conv_input = F.reshape(p_d_1_lin, (p_d_1_lin.shape[0], 20, 20, 20)) recon = F.sigmoid(self.p_d_conv(conv_input)) return F.reshape(recon, (z.shape[0], 784)) recon = p_d(z_2_target) """ ============================================== Losses ============================================== """ l_3 = F.mean_squared_error(p_d(z_1), observation_at_start) l_4 = F.mean_squared_error(p_d(z_1_target), observation_at_start) l_x = F.mean_squared_error(recon, observation_at_jmp) l_1 = p_b_2.log_prob(z_2_target) - p_p_2.log_prob(z_2_target) #switch? l_2 = q_I.kl(p_b_1) loss = 0 #loss = l_x + F.sum(l_1) + F.sum(l_2) + l_3 + l_4 loss += l_x #diverges with the log prob loss in its current form # loss += .01 * F.sum(l_1) / batch_size #loss += .01 * F.sum(l_2) / batch_size loss += l_3 loss += l_4 """ ============================================== Testing Loop ============================================== """ test_int = 100 if self.it % test_int == 0: num_left = 5 samples = np.zeros((batch_size, num_left + 1, 784)) init_state, _ = p_b(state_at_jmp) local_recon = p_d(init_state) samples[:, 0, :] = chainer.backends.cuda.to_cpu(local_recon.data) for i in range(num_left - 1): i += 1 update, _ = p_p(init_state) recon = p_d(update) samples[:, i, :] = chainer.backends.cuda.to_cpu(recon.data) init_state = update true_in = chainer.backends.cuda.to_cpu(new_t[:, end_idx, :]) samples[:, num_left, :] = true_in fn = self.directory + str(self.it) + '.png' g_vis(samples[4, :, :].reshape((num_left + 1, 28, 28)), save_file=fn) if (self.it % 4000 == 0) and (self.it != 0): #import pdb; pdb.set_trace() pass reporter.report( { 'loss': loss, 'KL': F.sum(l_2), 'avg_KL': F.sum(l_2), 'mse': l_x }, self) self.it += 1 return loss
def setUp(self): self.mean = np.random.rand(self.batch_size, self.ndim).astype(np.float32) self.var = np.random.rand(self.batch_size, self.ndim).astype(np.float32) self.distrib = distribution.GaussianDistribution(self.mean, self.var)
def __call__(self, x): mean = self.hidden_layers(x) var = F.broadcast_to(self.var_func(self.var_param), mean.shape) return distribution.GaussianDistribution(mean, var)
def __call__(self, x): mean, var = self.compute_mean_and_var(x) return distribution.GaussianDistribution(mean, var=var)
def __call__(self, x, test=False): mean, var = self.compute_mean_and_var(x, test=test) return distribution.GaussianDistribution(mean, var=var)