def __call__(self, x, gt): batchsize = len(x.array) gen_dx = F.absolute(self.filter_x(x)) gen_dy = F.absolute(self.filter_y(x)) gen_dz = F.absolute(self.filter_z(x)) gt_dx = F.absolute(self.filter_x(gt)) gt_dy = F.absolute(self.filter_y(gt)) gt_dz = F.absolute(self.filter_z(gt)) loss = F.squared_error(gt_dx, gen_dx) + F.squared_error(gt_dy, gen_dy) + F.squared_error(gt_dz, gen_dz) loss = F.sum(loss) / batchsize return loss
def average_k_step_squared_error(x1, x2, k_step): """ Average k-step squared error introduced by Oh et al. .. math:: \\frac{1}{2K}\sum_{i}\sum_{t}\sum_{k}\|\hat{\mathbf{x}}_{t+k}^{(i)} - \mathbf{x}_{t+k}^{(i)}\|^{2} See: https://arxiv.org/abs/1507.08750 Parameters ------- x1 : array predicted image x2 : array expected image k_step : int maximum steps to predict from given input Returns ------- error : Variable k-step squared error """ return F.sum(F.squared_error(x1, x2)) / k_step / 2
def update_model(self): # start minibatch learning for t in range(self.num_train_per_episode): # get learning data with self.lock: states, actions, advantages = self.get_data_from_train_buffer() # get policy and value policies, values = self.model(states) old_policies, _ = self.old_model(states) # calculate loss loss_v = F.squared_error(values, np.array(advantages).astype(np.float32)) loss_ent = -policies.entropy() r = (policies.get_prob(actions) + 1.0e-10) / (old_policies.get_prob(actions) + 1.0e-10) loss_clip = (advantages - values.data) * F.minimum( r, F.clip(r, 1.0 - self.eps, 1.0 + self.eps)) loss = F.mean(-loss_clip + loss_v * 0.2 + 0.01 * loss_ent) self.model.cleargrads() loss.backward() self.optimizer.update() # update old model self.old_model = self.copy_model() self.clear_buffer()
def loss_comp_low(x, y, threshold, norm='l2'): mask = ((x.array <= threshold) ^ (y.array <= threshold)).astype( x.xp.float32) if norm == 'l1': return (F.average(mask * F.absolute_error(x, y))) else: return (F.average(mask * F.squared_error(x, y)))
def __call__(self, x, t): self.y = self.model(x) self.mean_loss = F.mean_squared_error(self.y, t) reporter.report({'mean_loss': self.mean_loss}, self) self.worst_loss = F.max(F.squared_error(self.y, t)) reporter.report({'worst_loss': self.worst_loss}, self) return self.mean_loss
def calc_loss(self, x, t): # h = self.encode_model.feature(x) # print('encode_model_space', h) xp = backend.get_array_module(x) VAE_LOSS_SCALE = 1e-5 JOINT_LOSS_WEIGHTS = xp.linspace(10, 1, t.shape[1]) # print(JOINT_LOSS_WEIGHTS) # action model output output, z_mu, z_ln_var = self.forward_with_z(x) # print(t.shape, output.shape, z_mu.shape, z_ln_var.shape) # MAR of action model self.mean_abs_error = F.mean_absolute_error(t, output) self.weighted_joint_error = F.mean(F.squared_error(t, output) * JOINT_LOSS_WEIGHTS) self.gnll = self.action.negative_log_likelihood(F.concat((z_mu, z_ln_var)), t) # VAE loss self.vae_loss_rec, self.vae_loss_kl = self.VAE_loss_func()(self, x, z_mu, z_ln_var, split=True) self.vae_loss = VAE_LOSS_SCALE * (self.vae_loss_rec + self.vae_loss_kl) # Total loss self.total_loss = self.weighted_joint_error + \ self.vae_loss + self.gnll chainer.report({'mae': self.mean_abs_error}, self) chainer.report({'gnll': self.gnll}, self) chainer.report({'weighted': self.weighted_joint_error}, self) chainer.report({'VAE': self.vae_loss}, self) chainer.report({'VAE_KL': self.vae_loss_kl}, self) chainer.report({'VAE_REC': self.vae_loss_rec}, self) chainer.report({'loss': self.total_loss}, self) return self.total_loss
def main(): n_nodes = int(input('Enter the number of nodes: ')) model = mlp.MLP(n_nodes) optimizer = optimizers.Adam() optimizer.setup(model) modes = ['Calculation', 'Learning', 'Exit'] while True: draw_modes() select = int(input('Enter the number of mode: ')) selected_mode = modes[select-1] if (selected_mode == 'Exit'): sys.exit() if (selected_mode == 'Calculation'): calc(model) if (selected_mode == 'Learning'): epoch = int(input('Enter the number of epoch: ')) digit = input('Enter the number of learning digit: ') for _ in range(epoch): a = np.random.rand(2).reshape(1, 2).astype(np.float32) * eval('1e+' + digit) x = chainer.Variable(a) x = model(x) t = chainer.Variable(a.sum().reshape(1, 1)) model.zerograds() loss = F.squared_error(x, t) loss.backward() optimizer.update() print('loss: ' + str(loss))
def check_forward(self, x0_data, x1_data): x0 = chainer.Variable(x0_data) x1 = chainer.Variable(x1_data) loss = functions.squared_error(x0, x1) loss_value = cuda.to_cpu(loss.data) self.assertEqual(loss_value.dtype, numpy.float32) self.assertEqual(loss_value.shape, x0_data.shape) for i in numpy.ndindex(self.x0.shape): # Compute expected value loss_expect = (self.x0[i] - self.x1[i])**2 self.assertAlmostEqual(loss_value[i], loss_expect, places=5)
def check_forward(self, x0_data, x1_data): x0 = chainer.Variable(x0_data) x1 = chainer.Variable(x1_data) loss = functions.squared_error(x0, x1) loss_value = cuda.to_cpu(loss.data) assert loss_value.dtype == numpy.float32 assert loss_value.shape == x0_data.shape for i in numpy.ndindex(self.x0.shape): # Compute expected value loss_expect = (self.x0[i] - self.x1[i]) ** 2 assert round(loss_value[i] - loss_expect, 5) == 0
def check_forward(self, x0_data, x1_data): x0 = chainer.Variable(x0_data) x1 = chainer.Variable(x1_data) loss = functions.squared_error(x0, x1) loss_value = cuda.to_cpu(loss.data) assert loss_value.dtype == numpy.float32 assert loss_value.shape == x0_data.shape for i in numpy.ndindex(self.x0.shape): # Compute expected value loss_expect = (self.x0[i] - self.x1[i])**2 assert round(loss_value[i] - loss_expect, 5) == 0
def check_forward(self, x0_data, x1_data): x0 = chainer.Variable(x0_data) x1 = chainer.Variable(x1_data) loss = functions.squared_error(x0, x1) loss_value = cuda.to_cpu(loss.data) self.assertEqual(loss_value.dtype, numpy.float32) self.assertEqual(loss_value.shape, x0_data.shape) for i in numpy.ndindex(self.x0.shape): # Compute expected value loss_expect = (self.x0[i] - self.x1[i]) ** 2 self.assertAlmostEqual(loss_value[i], loss_expect, places=5)
def process_data(data): """ Process data by computing their new Q (target) values, compute the loss and backprop To stabilize the training procedure we precompute Q-Values. This somewhat circumvents the use of a target network (read Double DQN). :param data: mini-batch of Transitions """ data = np.array(data) cur_states = np.stack(data[:, 0], axis=1) cur_states = np.squeeze(cur_states, axis=0) cur_states = chainer.cuda.to_gpu(cur_states) if chainer.cuda.available else cur_states cur_qs = net(cur_states) next_states = np.stack(data[:, 1], axis=1) next_states = np.squeeze(next_states, axis=0) next_states = chainer.cuda.to_gpu(next_states) if chainer.cuda.available else next_states next_qs = net(next_states) with chainer.no_backprop_mode(): # Don't think this actually does anything, but better safe than sorry target_qs = deepcopy(cur_qs.data) target_qs = chainer.cuda.to_cpu(target_qs) if chainer.cuda.available else target_qs # Memory Efficiency # taken_action = data[:, 2] # terminal = data[:, 3] # rewards = data[:, 4] for i in range(len(target_qs)): # Q(s, a) = r + gamma * max_a(Q(s',a')) if game not over else r target_qs[i, data[i, 2]] = data[i, 4] + args.gamma * next_qs.data[i, :].max() if not data[i, 3] \ else data[i, 2] target_qs = chainer.cuda.to_gpu(target_qs) if chainer.cuda.available else target_qs loss = F.squared_error(cur_qs, target_qs) # Debugging Information # g = c.build_computational_graph(loss) # with open('computational_graph.graph', 'w') as o: # o.write(g.dump()) loss = F.mean(loss) net.cleargrads() loss.backward() optim.update() return float(loss.data)
def __call__(self, x_image, t_image, x_action, t_action): self.y_image, self.y_action = self.predictor(x_image, x_action) predicted_action = self.action_meaning( F.argmax(self.y_action, axis=1).data[0]) real_action = self.action_meaning(t_action) if predicted_action != real_action: print("Predicted action:", predicted_action, "it was actually", real_action) image_loss = F.mean_squared_error(self.y_image, t_image) self.error_mask = normalize_2d(F.squared_error(self.y_image, t_image)) action_loss = F.softmax_cross_entropy( self.y_action, F.expand_dims(np.array(t_action, dtype=np.int32), axis=0), ) print('Image loss', image_loss.data, ', Action loss:', action_loss.data) return self.weight * image_loss + (1.0 - self.weight) * action_loss
def loss(self, x, t, **kwargs): """ """ activations = self.extract(x, layers=["digitcaps", "prob"]) recons = self.reconstruct(activations["digitcaps"], t) # classification loss c_loss = self._margin_loss(activations["prob"], t) # reconstruction loss recons = recons.reshape(x.shape) r_loss = F.mean(F.sum(F.squared_error(x, recons), axis=(1, 2, 3))) r_loss *= self.recon_loss_weight total_loss = c_loss + r_loss report({"loss": total_loss, "c_loss": c_loss, "r_loss": r_loss}, self) accuracy = F.accuracy(activations["prob"], t) report({"accuracy": accuracy}, self) return total_loss
def update_model(self): # get learning data with self.lock: if len(self.train_buffer[0]) < self.batch_size: time.sleep(0) return states, actions, advantages = self.get_data_from_train_buffer() # get policy and value policies, values = self.model(states) # calculate loss loss_v = F.squared_error(values, advantages) loss_pi = (advantages - values.data) * policies.get_log_prob(actions) loss_ent = -policies.entropy() loss = F.mean(loss_v * 0.5 - loss_pi + 0.01 * loss_ent) self.model.cleargrads() loss.backward() self.optimizer.update()
def forward(self, st, act, r, st_dash, ep_end, ISWeights): s = Variable(st) s_dash = Variable(st_dash) Q = self.model.Q_func(s) Q_dash = self.model.Q_func(s_dash) Q_dash_target = self.target_model.Q_func(s_dash).data Q_dash_idmax = np.asanyarray(list(map(np.argmax, Q_dash.data))) max_Q_dash = np.asanyarray([ Q_dash_target[i][Q_dash_idmax[i]] for i in range(len(Q_dash_idmax)) ]) target = np.asanyarray(copy.deepcopy(Q.data), dtype=np.float32) for i in range(self.batch_size): target[ i, act[i]] = r[i] + (self.gamma * max_Q_dash[i]) * (not ep_end[i]) squared_error = F.squared_error(Q, Variable(target)) loss = F.mean(squared_error * ISWeights) td_error = F.add(Q, Variable(-1 * target)) self.loss = loss.data return loss, td_error
def loss_function(self,state, action, reward, next_state, terminal): s = Variable(cuda.to_gpu(state)) s_dash = Variable(cuda.to_gpu(next_state)) s_dash_pre0 = Variable(cuda.to_gpu(next_state[:, 3].reshape(32, 1, 84, 84))) q, h3 = self.q_model.q_function(s) # Get Q-value y_hat = self.pred_model.predict(h3) loss_pred = F.mean_squared_error(y_hat, s_dash_pre0/255.) L = F.squared_error(y_hat, s_dash_pre0/255.) L = cuda.to_cpu(L.data).reshape(32, 84, 84) weight = [] for i in L: weight.append(i.sum()) # Generate Target Signals tmp, _ = self.target_model.q_function(s_dash) # Q(s',*) tmp = list(map(np.max, tmp.data)) # max_a Q(s',a) max_q_prime = np.asanyarray(tmp, dtype=np.float32) target = np.asanyarray(copy.deepcopy(q.data.get()), dtype=np.float32) for i in range(MINI_BATCH_SIZE): if terminal[i]: tmp_ = reward[i] else: # The sign of reward is used as the reward of DQN! tmp_ = reward[i] + DISCOUNT_RATE * max_q_prime[i] target[i, action[i]] = tmp_ # TD-error clipping td = Variable(cuda.to_gpu(target)) - q # TD error td_tmp = td.data + 1000.0 * (abs(td.data) <= 1) # Avoid zero division td_clip = td * (abs(td.data) <= 1) + td/abs(td_tmp) * (abs(td.data) > 1) zero_val = Variable(cuda.to_gpu(np.zeros((MINI_BATCH_SIZE, self.num_actions), dtype=np.float32))) loss_q = F.mean_squared_error(td_clip, zero_val) return loss_q, loss_pred, weight
def lf(x): y, energy, sigma = self.fwd(x, isTraining=True, gpu=gpu) # reconstLoss = F.mean_squared_error(x, y) # mseで誤差は計算しない? reconstLoss = F.sum(F.squared_error(x, y)) / len( x) # 再構築誤差(ミニバッチ平均) avgEnergy = F.sum(energy) / len(x) # エネルギー(ミニバッチ平均) # 正則化項について NumOfClass, zDim, _ = sigma.shape diagMat = Variable( xp.array( np.array(list(np.eye(zDim)) * NumOfClass).reshape( NumOfClass, zDim, zDim).astype(np.float32))) reg = F.sum((1 / sigma) * diagMat) #重みパラメータ lambda_1 = 0.1 lambda_2 = 0.005 self.loss = reconstLoss + lambda_1 * avgEnergy + lambda_2 * reg chainer.report({'loss': self.loss}, observer=self) return self.loss
def coral_func(xp, src, tar): """ inputs: -src(Variable) : features extracted from source data -tar(Variable) : features extracted from target data return coral loss between source and target features ref: Deep CORAL: Correlation Alignment for Deep Domain Adaptation \ (https://arxiv.org/abs/1607.01719 """ ns, nt = src.data.shape[0], tar.data.shape[0] dim = src.data.shape[1] ones_s = xp.ones((1, ns), dtype=np.float32) ones_t = xp.ones((1, nt), dtype=np.float32) tmp_s = F.matmul(Variable(ones_s), src) tmp_t = F.matmul(Variable(ones_t), tar) cov_s = (F.matmul(F.transpose(src), src) - F.matmul(F.transpose(tmp_s), tmp_s) / ns) / (ns - 1) cov_t = (F.matmul(F.transpose(tar), tar) - F.matmul(F.transpose(tmp_t), tmp_t) / nt) / (nt - 1) coral = F.sum(F.squared_error(cov_s, cov_t)) / (4 * dim * dim) return coral
def __call__(self, X, Y): pred_Y = self.predict(X) loss = F.sum(F.squared_error(pred_Y, Y)) return loss
def test(): parser = argparse.ArgumentParser(description='DAGMM') parser.add_argument('--epoch', '-e', type=int, default=10000, help='Number of sweeps over the dataset to train') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--cn_h_unit', type=int, default=10, help='Number of Compression Network hidden units') parser.add_argument('--cn_z_unit', type=int, default=2, help='Number of Compression Network z units') parser.add_argument('--en_h_unit', type=int, default=10, help='Number of Estimation Network hidden units') parser.add_argument('--en_o_unit', type=int, default=2, help='Number of Estimation Network output units') args = parser.parse_args() print('# epoch: {}'.format(args.epoch)) print('# Output-directory when training: {}'.format(args.out)) print('# Compression Network: Dim - {0} - {1} - {0} - Dim'.format( args.cn_h_unit, args.cn_z_unit)) print('# Estimation Network: {} - {} - {}'.format(args.cn_z_unit + 2, args.en_h_unit, args.en_o_unit)) print('') # データセット読み込み x_data = np.loadtxt('./dataset_arrhythmia/ExplanatoryVariables.csv', delimiter=',') y_label = np.loadtxt('./dataset_arrhythmia/CriterionVariables.csv', delimiter=',') # 正常データのみを抽出 HealthData = x_data[y_label[:] == 1] # 正常データを学習用と検証用に分割 NumOfHealthData = len(HealthData) trainData = HealthData[:math.floor(NumOfHealthData * 0.9)] validData = HealthData[len(trainData):] # 正常ではないデータ(異常データ)を抽出 diseaseData = x_data[y_label[:] != 1] # 型変換 trainData = trainData.astype(np.float32) validData = validData.astype(np.float32) diseaseData = diseaseData.astype(np.float32) model = DAGMM(args.cn_h_unit, args.cn_z_unit, len(trainData[0]), args.en_h_unit, args.en_o_unit) optimizer = optimizers.Adam(alpha=0.0001) optimizer.setup(model) print("------------------") print("Health trainData Energy") with chainer.using_config('train', False), chainer.using_config( 'enable_backprop', False): _, _, _, y_htr = model.fwd(trainData) # print(y_htr.data) print('# mse: {}'.format(F.mean_squared_error(trainData, y_htr))) se_htr = F.sum(F.squared_error(trainData, y_htr), axis=1) euc_htr = model.relativeEuclideanDistance(trainData, y_htr) cos_htr = model.cosineSimilarity(trainData, y_htr) print("------------------") print("Health testData Energy") with chainer.using_config('train', False), chainer.using_config( 'enable_backprop', False): _, _, _, y_hte = model.fwd(validData) # print(y_hte.data) print('# mse: {}'.format(F.mean_squared_error(validData, y_hte))) se_hte = F.sum(F.squared_error(validData, y_hte), axis=1) euc_hte = model.relativeEuclideanDistance(validData, y_hte) cos_hte = model.cosineSimilarity(validData, y_hte) print("------------------") print("Disease testData Energy") with chainer.using_config('train', False), chainer.using_config( 'enable_backprop', False): _, _, _, y_di = model.fwd(diseaseData) # print(y_di.data) print('# mse: {}'.format(F.mean_squared_error(diseaseData, y_di))) se_di = F.sum(F.squared_error(diseaseData, y_di), axis=1) euc_di = model.relativeEuclideanDistance(diseaseData, y_di) cos_di = model.cosineSimilarity(diseaseData, y_di) print("") print("------------------") print("squared_error") plt.hist(se_htr.data, bins=100, alpha=0.4, histtype='stepfilled', color='b') plt.hist(se_hte.data, bins=100, alpha=0.4, histtype='stepfilled', color='g') plt.hist(se_di.data, bins=100, alpha=0.4, histtype='stepfilled', color='r') plt.show() print("------------------") print("relativeEuclideanDistance") plt.hist(euc_htr.data, bins=100, alpha=0.4, histtype='stepfilled', color='b') plt.hist(euc_hte.data, bins=100, alpha=0.4, histtype='stepfilled', color='g') plt.hist(euc_di.data, bins=100, alpha=0.4, histtype='stepfilled', color='r') plt.show() print("------------------") print("cosineSimilarity") plt.hist(cos_htr.data, bins=100, alpha=0.4, histtype='stepfilled', color='b') plt.hist(cos_hte.data, bins=100, alpha=0.4, histtype='stepfilled', color='g') plt.hist(cos_di.data, bins=100, alpha=0.4, histtype='stepfilled', color='r') plt.show()
def update(self, experiences_agent, experiences_demo): """Combined DQfD loss function for Demonstration and agent/RL. """ num_exp_agent = len(experiences_agent) experiences = experiences_agent + experiences_demo exp_batch = batch_experiences(experiences, xp=self.xp, phi=self.phi, gamma=self.gamma, batch_states=self.batch_states) exp_batch['weights'] = self.xp.asarray( [elem[0]['weight']for elem in experiences], dtype=self.xp.float32) errors_out = [] loss_q_nstep, loss_q_1step = self._compute_ddqn_losses( exp_batch, errors_out=errors_out) # Add the agent/demonstration bonus priorities and update err_agent = errors_out[:num_exp_agent] err_demo = errors_out[num_exp_agent:] err_agent = [e + self.bonus_priority_agent for e in err_agent] err_demo = [e + self.bonus_priority_demo for e in err_demo] self.replay_buffer.update_errors(err_agent, err_demo) # Large-margin supervised loss # Grab the cached Q(s) in the forward pass & subset demo exp. q_picked = self.qout.evaluate_actions(exp_batch["action"]) q_expert_demos = q_picked[num_exp_agent:] # unwrap DiscreteActionValue and subset demos q_demos = self.qout.q_values[num_exp_agent:] # Calculate margin forall actions (l(a_E,a) in the paper) margin = self.xp.zeros_like( q_demos.array) + self.demo_supervised_margin a_expert_demos = exp_batch["action"][num_exp_agent:] margin[self.xp.arange(len(experiences_demo)), a_expert_demos] = 0.0 supervised_targets = F.max(q_demos + margin, axis=-1) iweights_demos = exp_batch['weights'][num_exp_agent:] loss_supervised = F.squared_error(q_expert_demos, supervised_targets) loss_supervised = F.sum(loss_supervised * iweights_demos) if self.batch_accumulator == "mean": loss_supervised /= max(len(experiences_demo), 1.0) # L2 loss is directly applied as chainer optimizer hook in init loss_combined = loss_q_1step + \ self.loss_coeff_nstep * loss_q_nstep + \ self.loss_coeff_supervised * loss_supervised self.model.cleargrads() loss_combined.backward() self.optimizer.update() # Update stats self.average_loss *= self.average_loss_decay self.average_loss += (1 - self.average_loss_decay) * \ float(loss_combined.array) self.average_loss_1step *= self.average_loss_decay self.average_loss_1step += (1 - self.average_loss_decay) * \ float(loss_q_1step.array) self.average_loss_nstep *= self.average_loss_decay self.average_loss_nstep += (1 - self.average_loss_decay) * \ float(loss_q_nstep.array) self.average_loss_supervised *= self.average_loss_decay self.average_loss_supervised += (1 - self.average_loss_decay) * \ float(loss_supervised.array) if len(err_demo): self.average_demo_td_error *= self.average_loss_decay self.average_demo_td_error += (1 - self.average_loss_decay) * \ np.mean(err_demo) if len(err_agent): self.average_agent_td_error *= self.average_loss_decay self.average_agent_td_error += (1 - self.average_loss_decay) * \ np.mean(err_agent)
def update(self, experiences_agent, experiences_demo): """Combined DQfD loss function for Demonstration and agent/RL. """ num_exp_agent = len(experiences_agent) experiences = experiences_agent+experiences_demo exp_batch = batch_experiences(experiences, xp=self.xp, phi=self.phi, reward_transform=self.reward_transform, gamma=self.gamma, batch_states=self.batch_states) exp_batch['weights'] = self.xp.asarray( [elem[0]['weight']for elem in experiences], dtype=self.xp.float32) errors_out = [] loss_q_nstep, loss_q_1step = self._compute_ddqn_losses( exp_batch, errors_out=errors_out) # Add the agent/demonstration bonus priorities and update err_agent = errors_out[:num_exp_agent] err_demo = errors_out[num_exp_agent:] err_agent = [e+self.bonus_priority_agent for e in err_agent] err_demo = [e+self.bonus_priority_demo for e in err_demo] self.replay_buffer.update_errors(err_agent, err_demo) # Large-margin supervised loss # Grab the cached Q(s) in the forward pass & subset demo exp. q_picked = self.qout.evaluate_actions(exp_batch["action"]) q_expert_demos = q_picked[num_exp_agent:] # unwrap DiscreteActionValue and subset demos q_demos = [] for branch in self.qout.branches: q_demos.append(branch.q_values[num_exp_agent:]) q_demos = F.hstack(q_demos) # Calculate margin forall actions (l(a_E,a) in the paper) margin = self.xp.zeros_like( q_demos.array) + self.demo_supervised_margin a_expert_demos = exp_batch["action"][num_exp_agent:] pos = 0 for i, branch in enumerate(self.qout.branches): margin[self.xp.arange(len(experiences_demo)), a_expert_demos[:, i] + pos] = 0 pos += branch.q_values.shape[1] # Supervised targets q_demos_penalized = q_demos + margin supervised_targets = [] pos = 0 for i, branch in enumerate(self.qout.branches): branch_size = branch.q_values.shape[1] supervised_targets.append( F.max(q_demos_penalized[:, pos:pos + branch_size], axis=-1).reshape(-1, 1)) pos += branch_size supervised_targets = F.hstack(supervised_targets) # Supervised loss calculation n_branches = len(self.qout.branches) iweights_demos = exp_batch['weights'][num_exp_agent:].reshape(-1, 1) iweights_demos = self.xp.tile(iweights_demos, (1, n_branches)) loss_supervised = F.squared_error(supervised_targets, q_expert_demos) loss_supervised = F.sum(iweights_demos * loss_supervised) if self.batch_accumulator == "mean": loss_supervised /= n_branches loss_supervised /= max(len(experiences_demo), 1) loss_combined = loss_q_1step + \ self.loss_coeff_nstep * loss_q_nstep + \ self.loss_coeff_supervised * loss_supervised self.model.cleargrads() loss_combined.backward() self.optimizer.update() self.average_loss *= self.average_loss_decay self.average_loss += (1 - self.average_loss_decay) * \ float(loss_combined.array) self.average_loss_1step *= self.average_loss_decay self.average_loss_1step += (1 - self.average_loss_decay) * \ float(loss_q_1step.array) self.average_loss_nstep *= self.average_loss_decay self.average_loss_nstep += (1 - self.average_loss_decay) * \ float(loss_q_nstep.array) self.average_loss_supervised *= self.average_loss_decay self.average_loss_supervised += (1 - self.average_loss_decay) * \ float(loss_supervised.array) self.average_iweights *= self.average_loss_decay self.average_iweights += (1 - self.average_loss_decay) * \ cuda.to_cpu(exp_batch['weights'].mean()) if len(err_demo): self.average_demo_td_error *= self.average_loss_decay self.average_demo_td_error += (1 - self.average_loss_decay) * \ np.mean(err_demo) if len(err_agent): self.average_agent_td_error *= self.average_loss_decay self.average_agent_td_error += (1 - self.average_loss_decay) * \ np.mean(err_agent)
def f(*xs): y = functions.squared_error(*xs) return y * y
for loop in six.moves.range(n_iter): ix_batch = perm[loop * batchsize:(loop + 1) * batchsize] # prep input data x = mp.asarray(X_train[ix_batch]) #t = mp.asarray(T_train[ix_batch]) # feedforward & retrieve #y = fc(model(x, wr=args.wr)) y = model(x, wr=args.wr) # calc loss values #loss = F.softmax_cross_entropy(y, t) dummy = mp.zeros_like(y) loss = F.squared_error(y, dummy) model.cleargrads() loss.forward_grad(rho=args.rho, decay=0.8) # 1e-2, 0.5 # 1e-2 for SGD, 1e-6 AdaDelta, 1e-11 for Adam opt.update() model.norm() # evaluation if (loop + 1) % chk_trial_interval == 0: print('{0:7} -> loss: '.format(loop + 1), end='') #print(loss.data.shape) l2 = cuda.to_cpu(loss.data) l3 = np.mean(l2**2, axis=(1, 2, 3)).mean() print(' {0:.4f}'.format(l3))
def train(alignment_filepath, epoch, model, optim, bs, vocab_dict, max_num_trees): trees = [] # データの読み込み print_message('Preparing data from file...') tree_i = 0 with gzip.open(alignment_filepath, 'rb', 'utf-8') as alignfile: for _ in alignfile: source = alignfile.readline().strip().decode('utf-8') target = alignfile.readline().strip().decode('utf-8') tree = [ Leaf(w, [], idx) for idx, w in enumerate(source.split(' ')) ] target_words_align = re.split('\(\{|\}\)', target)[:-1] target_align = [ a for i, a in enumerate(target_words_align) if i % 2 == 1 ] num_align = 0 for i, a in enumerate(target_align): if a.strip(): for _a in a.strip().split(): num_align += 1 tree[int(_a) - 1].alignment.append(i + 1) if not (3 < len(tree) < 20) or len(tree) >= 2 * num_align: continue trees.append(tree) tree_i += 1 print("\r%dth tree appended" % tree_i, end='') if 0 <= max_num_trees == len(trees): break print() idxes = [i for i in range(len(trees))] losses = [] for e in range(epoch): print_message('Epoch %d start' % (e + 1)) random.shuffle(idxes) total_loss = 0 batch_loss = 0 for i, idx in enumerate(idxes): if (i + 1) % 10 == 0: print_message('%dth tree processing...' % (i + 1)) tree = trees[idx] ground_truth_score, tree_loss, gt_node_list = max_gt_score( tree, model, vocab_dict) cand_score, num_diff = max_cand_score(tree, model, vocab_dict, gt_node_list) print('gt score: %.4f' % ground_truth_score.data, 'cand score: %.4f' % cand_score.data, 'diff: %d' % num_diff) loss = chainF.squared_error( cand_score + num_diff, ground_truth_score) + tree_loss.reshape( (1, 1)) print('tree loss: %.4f' % loss.data) batch_loss += loss total_loss += loss.data if (i + 1) % bs == 0: batch_loss /= bs model.cleargrads() batch_loss.backward() optim.update() batch_loss = 0 if (i + 1) % bs != 0: batch_loss /= ((i + 1) % bs) model.cleargrads() batch_loss.backward() optim.update() print_message('loss: %.4f' % total_loss) losses.append(total_loss) plt.clf() plt.plot(np.array([(i + 1) for i in range(len(losses))]), np.asarray(losses).reshape((10, ))) plt.xlabel("epoch") plt.ylabel("loss") plt.show() return model
def update_core(self): self._iter += 1 opt_gen = self.get_optimizer('gen') opt_dis = self.get_optimizer('dis') ## Create batch batch = self.get_iterator('main').next() batchsize = len(batch) # get data x_batch, x_class_labels, y_batch, y_class_labels, cont_id = [self.xp.expand_dims(self.xp.array(b), axis=1).astype("f") for b in zip(*batch)] x_data = self.converter(x_batch, self.device) y_data = self.converter(y_batch, self.device) x_labels = self.converter(x_class_labels.transpose(0,3,1,2), self.device) y_labels = self.converter(y_class_labels.transpose(0,3,1,2), self.device) cont_label = self.converter(cont_id.transpose(0,3,1,2), self.device) ## Forward x = Variable(x_data) y_data = Variable(y_data) x_labels = Variable(x_labels) y_labels = Variable(y_labels) x_y = self.gen(x, F.concat((x_labels, y_labels), axis=1)) if self._lam_g_rec > 0: x_y_x = self.gen(x_y, F.concat((y_labels, x_labels), axis=1)) ## Annealing learning rate if self._learning_rate_anneal > 0 and self._iter % self._learning_rate_anneal_interval == 0: opt_gen.alpha *= 1.0 - self._learning_rate_anneal opt_dis.alpha *= 1.0 - self._learning_rate_anneal #============================ # ## update Discriminator # #============================ d_x_y = self.dis(x_y) d_y = self.dis(y_data) # mask for adv and equal loss (if x_class == y_class then 0 else 1) isDiffTargetAndInput = self.xp.sum(x_labels[:,:,0,0].data != y_labels[:,:,0,0].data, axis=1).astype(float) * 0.5 if self._lam_g_eq > 0 else self.xp.zeros(batchsize) # weight for adv and equal loss # w_adv have weight only for data which x_class != y_class w_adv = isDiffTargetAndInput * float(batchsize) / (self.xp.sum(isDiffTargetAndInput.astype(float)) + 1e-6) # w_eq have weight only for data which x_class = y_class w_eq = (1.0 - isDiffTargetAndInput) * float(batchsize) / (self.xp.sum(1.0 - isDiffTargetAndInput.astype(float)) + 1e-6) # style-class loss and content-class loss loss_dis_style = F.average(loss_class(d_y[:,1:1+y_labels.shape[1]], y_labels)) loss_dis_cont = F.average(loss_class(d_y[:,-cont_label.shape[1]:], cont_label)) # adv loss if self.loss_type == 'hinge': loss_dis_adv_fake = F.average(w_adv * loss_hinge_dis_fake(d_x_y)) loss_dis_adv_real = F.average(w_adv * loss_hinge_dis_real(d_y)) loss_dis_adv = loss_dis_adv_fake + loss_dis_adv_real loss_dis = self._lam_d_ad * loss_dis_adv + self._lam_d_style * loss_dis_style + self._lam_d_cont * loss_dis_cont elif self.loss_type == 'ls': loss_dis_adv_fake = F.average(w_adv * loss_ls_dis_fake(d_x_y)) loss_dis_adv_real = F.average(w_adv * loss_ls_dis_real(d_y)) loss_dis_adv = loss_dis_adv_fake + loss_dis_adv_real loss_dis = self._lam_d_ad * loss_dis_adv + self._lam_d_style * loss_dis_style + self._lam_d_cont * loss_dis_cont elif self.loss_type == 'wgan-gp': loss_dis_adv = F.average(d_x_y[:,0] - d_y[:,0]) # calcurate GP epsilon = self.xp.random.rand(batchsize,1,1,1).astype("f") y_hat = Variable(epsilon * x_y.data + (1-epsilon) * y_data .data) d_y_hat = self.dis(y_hat) g_d, = chainer.grad([w_adv * d_y_hat[:,:1]], [y_hat], enable_double_backprop=True) g_d_norm = F.sqrt(F.batch_l2_norm_squared(g_d) + 1e-6) loss_dis_gp = F.mean_squared_error(g_d_norm, self.xp.ones_like(g_d_norm.data)) loss_dis_drift = F.average(d_y[:,0]*d_y[:,0]) loss_dis = self._lam_d_ad * loss_dis_adv + self._lam_d_style * loss_dis_style + self._lam_d_cont * loss_dis_cont + self._lam_d_gp * loss_dis_gp + self._lam_d_drift * loss_dis_drift chainer.report({'loss_gp': self._lam_d_gp*loss_dis_gp}, self.dis) chainer.report({'loss_drift': self._lam_d_drift*loss_dis_drift}, self.dis) else: print(f'invalid loss type!!! ({self.loss_type})') assert False chainer.report({'loss_adv': self._lam_d_ad*loss_dis_adv}, self.dis) chainer.report({'loss_style': self._lam_d_style*loss_dis_style}, self.dis) chainer.report({'loss_cont': self._lam_d_cont*loss_dis_cont}, self.dis) self.dis.cleargrads() loss_dis.backward() opt_dis.update() #============================ # ## update Generator # #============================ d_x_y2 = self.dis(x_y) # adv loss if self.loss_type == 'hinge': loss_gen_adv = F.average(w_adv * loss_hinge_gen(d_x_y2)) elif self.loss_type == 'ls': loss_gen_adv = F.average(w_adv * loss_ls_gen(d_x_y2)) elif self.loss_type == 'wgan-gp': loss_gen_adv = F.average(w_adv * -d_x_y2[:,0]) else: print(f'invalid loss type!!! ({self.loss_type})') assert False # equal loss if self.criterion == 'l2': loss_gen_equal = F.average(w_eq * F.average(F.squared_error(x_y, x), axis=(1,2,3))) elif self.criterion == 'l1': loss_gen_equal = F.average(w_eq * F.average(F.absolute_error(x_y, x), axis=(1,2,3))) # smoothness loss loss_gen_sm = F.mean_absolute_error(x_y[:,:,1:,:], x_y[:,:,:-1,:]) # style-class loss and content-class loss loss_gen_style = F.average(loss_class(d_x_y2[:,1:1+y_labels.shape[1]], y_labels)) loss_gen_cont = F.average(loss_class(d_x_y2[:,-cont_label.shape[1]:], cont_label)) # cyclic loss if self.criterion == 'l2': loss_rec = F.mean_squared_error(x_y_x, x) if self._lam_g_rec > 0 else 0 elif self.criterion == 'l1': loss_rec = F.mean_absolute_error(x_y_x, x) if self._lam_g_rec > 0 else 0 loss_gen = self._lam_g_ad * loss_gen_adv + self._lam_g_sm * loss_gen_sm + self._lam_g_style * loss_gen_style + self._lam_g_cont * loss_gen_cont + self._lam_g_rec * loss_rec if self.cfg.train.class_equal: loss_gen += self._lam_g_eq * loss_gen_equal if loss_dis_adv.data < 0.5 * loss_gen_adv.data: n_gen = 5 else: n_gen = 1 for _ in range(n_gen): self.gen.cleargrads() loss_gen.backward() opt_gen.update() chainer.report({'loss_rec': loss_rec * self._lam_g_rec}, self.gen) if self.cfg.train.class_equal: chainer.report({'loss_eq': loss_gen_equal * self._lam_g_eq}, self.gen) chainer.report({'loss_sm': loss_gen_sm * self._lam_g_sm}, self.gen) chainer.report({'loss_adv': loss_gen_adv * self._lam_g_ad}, self.gen) chainer.report({'loss_style': loss_gen_style * self._lam_g_style}, self.gen) chainer.report({'loss_cont': loss_gen_cont * self._lam_g_cont}, self.gen) chainer.report({'lr': opt_gen.alpha}) # save preview if self._iter % self.preview_interval == 0: save_path = os.path.join(self.save_dir, 'preview') if not os.path.exists(save_path): os.makedirs(save_path) x = chainer.backends.cuda.to_cpu(x.data) x_y = chainer.backends.cuda.to_cpu(x_y.data) x_y_x = chainer.backends.cuda.to_cpu(x_y_x.data) y_data = chainer.backends.cuda.to_cpu(y_data.data) data_list = np.concatenate([x, x_y, x_y_x, y_data], axis=1) np.save(os.path.join(save_path, f'iter_{self._iter:04d}'), data_list) # save x_class, y_class and cont_id of preview with open(os.path.join(save_path, 'preview.txt'), 'a') as f: source = np.where(chainer.backends.cuda.to_cpu(x_class_labels)==1)[-1] target = np.where(chainer.backends.cuda.to_cpu(y_class_labels)==1)[-1] cont = np.where(chainer.backends.cuda.to_cpu(cont_id)==1)[-1] f.write(f'iter {self._iter:04d}\n') for b in range(batchsize): f.write(f'{b}: {source[b]} -> {target[b]} ({cont[b]})\n') f.write('\n')
def main(): try: os.mkdir(args.snapshot_directory) except: pass comm = chainermn.create_communicator() device = comm.intra_rank cuda.get_device(device).use() xp = cp images = [] files = os.listdir(args.dataset_path) files.sort() subset_size = int(math.ceil(len(files) / comm.size)) files = deque(files) files.rotate(-subset_size * comm.rank) files = list(files)[:subset_size] for filename in files: image = np.load(os.path.join(args.dataset_path, filename)) image = image / 256 images.append(image) print(comm.rank, files) images = np.vstack(images) images = images.transpose((0, 3, 1, 2)).astype(np.float32) train_dev_split = 0.9 num_images = images.shape[0] num_train_images = int(num_images * train_dev_split) num_dev_images = num_images - num_train_images images_train = images[:num_train_images] # To avoid OpenMPI bug # multiprocessing.set_start_method("forkserver") # p = multiprocessing.Process(target=print, args=("", )) # p.start() # p.join() hyperparams = HyperParameters() hyperparams.chz_channels = args.chz_channels hyperparams.generator_generation_steps = args.generation_steps hyperparams.generator_share_core = args.generator_share_core hyperparams.generator_share_prior = args.generator_share_prior hyperparams.generator_share_upsampler = args.generator_share_upsampler hyperparams.generator_downsampler_channels = args.generator_downsampler_channels hyperparams.inference_share_core = args.inference_share_core hyperparams.inference_share_posterior = args.inference_share_posterior hyperparams.inference_downsampler_channels = args.inference_downsampler_channels hyperparams.batch_normalization_enabled = args.enable_batch_normalization hyperparams.use_gru = args.use_gru hyperparams.no_backprop_diff_xr = args.no_backprop_diff_xr if comm.rank == 0: hyperparams.save(args.snapshot_directory) hyperparams.print() if args.use_gru: model = GRUModel(hyperparams, snapshot_directory=args.snapshot_directory) else: model = LSTMModel(hyperparams, snapshot_directory=args.snapshot_directory) model.to_gpu() optimizer = AdamOptimizer(model.parameters, lr_i=args.initial_lr, lr_f=args.final_lr, beta_1=args.adam_beta1, communicator=comm) if comm.rank == 0: optimizer.print() num_pixels = images.shape[1] * images.shape[2] * images.shape[3] dataset = draw.data.Dataset(images_train) iterator = draw.data.Iterator(dataset, batch_size=args.batch_size) num_updates = 0 for iteration in range(args.training_steps): mean_kld = 0 mean_nll = 0 mean_mse = 0 start_time = time.time() for batch_index, data_indices in enumerate(iterator): x = dataset[data_indices] x += np.random.uniform(0, 1 / 256, size=x.shape) x = to_gpu(x) z_t_param_array, x_param, r_t_array = model.sample_z_and_x_params_from_posterior( x) loss_kld = 0 for params in z_t_param_array: mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p = params kld = draw.nn.functions.gaussian_kl_divergence( mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p) loss_kld += cf.sum(kld) loss_sse = 0 for r_t in r_t_array: loss_sse += cf.sum(cf.squared_error(r_t, x)) mu_x, ln_var_x = x_param loss_nll = cf.gaussian_nll(x, mu_x, ln_var_x) loss_nll /= args.batch_size loss_kld /= args.batch_size loss_sse /= args.batch_size loss = args.loss_beta * loss_nll + loss_kld + args.loss_alpha * loss_sse model.cleargrads() loss.backward(loss_scale=optimizer.loss_scale()) optimizer.update(num_updates, loss_value=float(loss.array)) num_updates += 1 mean_kld += float(loss_kld.data) mean_nll += float(loss_nll.data) mean_mse += float(loss_sse.data) / num_pixels / ( hyperparams.generator_generation_steps - 1) printr( "Iteration {}: Batch {} / {} - loss: nll_per_pixel: {:.6f} - mse: {:.6f} - kld: {:.6f} - lr: {:.4e}" .format( iteration + 1, batch_index + 1, len(iterator), float(loss_nll.data) / num_pixels + math.log(256.0), float(loss_sse.data) / num_pixels / (hyperparams.generator_generation_steps - 1), float(loss_kld.data), optimizer.learning_rate)) if comm.rank == 0 and batch_index > 0 and batch_index % 100 == 0: model.serialize(args.snapshot_directory) if comm.rank == 0: model.serialize(args.snapshot_directory) if comm.rank == 0: elapsed_time = time.time() - start_time print( "\r\033[2KIteration {} - loss: nll_per_pixel: {:.6f} - mse: {:.6f} - kld: {:.6f} - lr: {:.4e} - elapsed_time: {:.3f} min" .format( iteration + 1, mean_nll / len(iterator) / num_pixels + math.log(256.0), mean_mse / len(iterator), mean_kld / len(iterator), optimizer.learning_rate, elapsed_time / 60))
def relativeEuclideanDistance(self, x, y): L2NormXY = F.sqrt(F.sum(F.squared_error(x, y), axis=1, keepdims=True)) L2NormX = F.reshape(F.sqrt(F.batch_l2_norm_squared(x)), (-1, 1)) return L2NormXY / (L2NormX)
def compute_logit_loss(a, b): return F.squared_error(logit(a, F), logit(b, F))