def compute_loss(self, xc, yc): ''' :功能 :损失计算函数 :参数 xc :tensors,一次网络输入 :参数 yc :tensors,网络输入对应真实标注信息 :返回 :dict,各损失和预测结果 ''' y_pos, y_cos, y_sin, y_width = yc pos_pred, cos_pred, sin_pred, width_pred = self.forward(xc) p_loss = F.mse_loss(pos_pred, y_pos) cos_loss = F.mse_loss(cos_pred, y_cos) sin_loss = F.mse_loss(sin_pred, y_sin) width_loss = F.mse_loss(width_pred, y_width) return { 'loss': p_loss + cos_loss + sin_loss + width_loss, 'losses': { 'p_loss': p_loss, 'cos_loss': cos_loss, 'sin_loss': sin_loss, 'width_loss': width_loss }, 'pred': { 'pos': pos_pred, 'cos': cos_pred, 'sin': sin_pred, 'width': width_pred } }
def train(): global epoch total_reward = 0 # 重置游戏状态 state = env.reset() while True: action = actor.select_action(state) next_state, reward, done, info = env.step(action) env.render() rpm.append((state, action, reward, next_state, np.float(done))) state = next_state if done: break total_reward += reward if len(rpm) > batch_size: # 获取训练数据 batch_state, batch_action, batch_reward, batch_next_state, batch_done = rpm.sample(batch_size) # 计算损失函数 expected_Q = Q_net(batch_state, batch_action) expected_value = critic(batch_state) new_action, log_prob, z, mean, log_std = actor.get_action(batch_state) target_value = target_critic(batch_next_state) next_q_value = batch_reward + (1 - batch_done) * gamma * target_value Q_loss = F.mse_loss(expected_Q, next_q_value.detach()) expected_new_Q = Q_net(batch_state, new_action) next_value = expected_new_Q - log_prob value_loss = F.mse_loss(expected_value, next_value.detach()) log_prob_target = expected_new_Q - expected_value policy_loss = (log_prob * (log_prob - log_prob_target).detach()).mean() Q_loss.backward() Q_optimizer.step() Q_optimizer.clear_grad() value_loss.backward() critic_optimizer.step() critic_optimizer.clear_grad() policy_loss.backward() actor_optimizer.step() actor_optimizer.clear_grad() # 指定的训练次数更新一次目标模型的参数 if epoch % 200 == 0: for target_param, param in zip(target_critic.parameters(), critic.parameters()): target_param.set_value(target_param * (1.0 - ratio) + param * ratio) epoch += 1 return total_reward
def forward(self, input): dtype = input.dtype flatten = input.reshape([-1, self.dim]) dist = (flatten.pow(2).sum(1, keepdim=True) - 2 * flatten.transpose([0, 1]).matmul(self.embed) + self.embed.pow(2).sum(0, keepdim=True)) embed_ind = (-dist).argmax(1) embed_onehot = F.one_hot(embed_ind, self.n_embed).astype(dtype) embed_ind = embed_ind.reshape(input.shape[:-1]) quantize = F.embedding(embed_ind, self.embed.transpose([1, 0]), padding_idx=-1) if self.training: embed_onehot_sum = embed_onehot.sum(0) embed_sum = flatten.transpose([1, 0]).matmul(embed_onehot) if dist_fn.get_world_size() > 1: dist_fn.all_reduce(embed_onehot_sum) dist_fn.all_reduce(embed_sum) ema_inplace(self.cluster_size, embed_onehot_sum, self.decay) ema_inplace(self.embed_avg, embed_sum, self.decay) cluster_size = laplace_smoothing( self.cluster_size, self.n_embed, self.eps) * self.cluster_size.sum() embed_normalized = self.embed_avg / cluster_size.unsqueeze(0) self.embed[:] = embed_normalized loss = F.mse_loss(quantize.detach(), input) * self.commitment quantize = input + (quantize - input).detach() return quantize, embed_ind, loss
def train_step(self, state_batch, mcts_probs, winner_batch, lr=0.002): """perform a training step""" # wrap in Variable state_batch = paddle.to_tensor(state_batch) mcts_probs = paddle.to_tensor(mcts_probs) winner_batch = paddle.to_tensor(winner_batch) # zero the parameter gradients self.optimizer.clear_gradients() # set learning rate self.optimizer.set_lr(lr) # forward log_act_probs, value = self.policy_value_net(state_batch) # define the loss = (z - v)^2 - pi^T * log(p) + c||theta||^2 # Note: the L2 penalty is incorporated in optimizer value = paddle.reshape(x=value, shape=[-1]) value_loss = F.mse_loss(input=value, label=winner_batch) policy_loss = -paddle.mean(paddle.sum(mcts_probs*log_act_probs, axis=1)) loss = value_loss + policy_loss # backward and optimize loss.backward() self.optimizer.minimize(loss) # calc policy entropy, for monitoring only entropy = -paddle.mean( paddle.sum(paddle.exp(log_act_probs) * log_act_probs, axis=1) ) return loss.numpy(), entropy.numpy()[0]
def p_loss(self, model, x_0, t, noise=None): if noise is None: noise = paddle.randn(x_0.shape) x_noise = self.q_sample(x_0, t, noise) x_recon = model(x_noise, t) return F.mse_loss(x_recon, noise)
def _critic_learn(self, obs, action, reward, next_obs, terminal): with paddle.no_grad(): next_action, next_log_pro = self.sample(next_obs) q1_next, q2_next = self.target_model.critic_model( next_obs, next_action) target_Q = paddle.minimum(q1_next, q2_next) - self.alpha * next_log_pro terminal = paddle.cast(terminal, dtype='float32') target_Q = reward + self.gamma * (1. - terminal) * target_Q cur_q1, cur_q2 = self.model.critic_model(obs, action) critic_loss = F.mse_loss(cur_q1, target_Q) + F.mse_loss( cur_q2, target_Q) self.critic_optimizer.clear_grad() critic_loss.backward() self.critic_optimizer.step() return critic_loss
def forward(self, fstudent, fteacher): loss_all = 0.0 for fs, ft in zip(fstudent, fteacher): h = fs.shape[2] loss = F.mse_loss(fs, ft) cnt = 1.0 tot = 1.0 for l in [4, 2, 1]: if l >= h: continue if self.mode == "max": tmpfs = F.adaptive_max_pool2d(fs, (l, l)) tmpft = F.adaptive_max_pool2d(ft, (l, l)) else: tmpfs = F.adaptive_avg_pool2d(fs, (l, l)) tmpft = F.adaptive_avg_pool2d(ft, (l, l)) cnt /= 2.0 loss += F.mse_loss(tmpfs, tmpft) * cnt tot += cnt loss = loss / tot loss_all = loss_all + loss return loss_all
def get_loss(self, model, batch_data, pred_dict, train=True, flag = 0): n_support_train = self.args.n_shot_train n_support_test = self.args.n_shot_test n_query = self.args.n_query if not train: losses_adapt = self.criterion(pred_dict['s_logits'].reshape((2*n_support_test*n_query,2)), paddle.expand(batch_data['s_label'],[n_query,n_support_test*2]).reshape((1,2*n_support_test*n_query)).squeeze(0)) else: if flag: losses_adapt = self.criterion(pred_dict['s_logits'].reshape((2*n_support_train*n_query,2)), paddle.expand(batch_data['s_label'],[n_query,n_support_train*2]).reshape((1,2*n_support_train*n_query)).squeeze(0)) else: losses_adapt = self.criterion(pred_dict['q_logits'], batch_data['q_label']) if paddle.isnan(losses_adapt).any() or paddle.isinf(losses_adapt).any(): print('!!!!!!!!!!!!!!!!!!! Nan value for supervised CE loss', losses_adapt) print(pred_dict['s_logits']) losses_adapt = paddle.zeros_like(losses_adapt) if self.args.reg_adj > 0: n_support = batch_data['s_label'].shape[0] adj = pred_dict['adj'][-1] if train: if flag: s_label = paddle.expand(batch_data['s_label'], [n_query,batch_data['s_label'].shape[0]]) n_d = n_query * n_support label_edge = model.layers.label2edge(s_label).reshape((n_d, -1)) pred_edge = adj[:,:,:-1,:-1].reshape((n_d, -1)) else: s_label = paddle.expand(batch_data['s_label'], [n_query,batch_data['s_label'].shape[0]]) q_label = batch_data['q_label'].unsqueeze(1) total_label = paddle.concat([s_label, q_label], 1) label_edge = model.layers.label2edge(total_label)[:,:,-1,:-1] pred_edge = adj[:,:,-1,:-1] else: s_label = batch_data['s_label'].unsqueeze(0) n_d = n_support * self.args.rel_edge label_edge = model.layers.label2edge(s_label).reshape((n_d, -1)) pred_edge = adj[:, :, :n_support, :n_support].mean(0).reshape((n_d, -1)) adj_loss_val = F.mse_loss(pred_edge, label_edge) if paddle.isnan(adj_loss_val).any() or paddle.isinf(adj_loss_val).any(): print('!!!!!!!!!!!!!!!!!!! Nan value for adjacency loss', adj_loss_val) adj_loss_val = paddle.zeros_like(adj_loss_val) losses_adapt += self.args.reg_adj * adj_loss_val return losses_adapt
def get_loss(self, model, batch_data, pred_dict, train=True): if not train and self.update_s_q: losses_adapt = self.criterion(pred_dict['s_logits'], batch_data['s_label']) else: losses_adapt = self.criterion(pred_dict['logits'], batch_data['label']) if paddle.isnan(losses_adapt).any() or paddle.isinf( losses_adapt).any(): print('!!!!!!!!!!!!!!!!!!! Nan value for supervised CE loss', losses_adapt) print(pred_dict['s_logits']) losses_adapt = paddle.zeros_like(losses_adapt) if self.args.reg_adj > 0: n_support = batch_data['s_label'].shape[0] adj = pred_dict['adj'][-1] if train: n_query = batch_data['q_label'].shape[0] s_label = paddle.expand( batch_data['s_label'], [n_query, batch_data['s_label'].shape[0]]) q_label = batch_data['q_label'].unsqueeze(1) total_label = paddle.concat([s_label, q_label], 1) n_d = n_query * self.args.rel_edge * (n_support + 1) label_edge = model.layers.label2edge(total_label).reshape( (n_d, -1)) pred_edge = adj.reshape((n_d, -1)) else: s_label = batch_data['s_label'].unsqueeze(0) n_d = n_support * self.args.rel_edge label_edge = model.layers.label2edge(s_label).reshape( (n_d, -1)) pred_edge = adj[:, :, :n_support, :n_support].mean(0).reshape( (n_d, -1)) adj_loss_val = F.mse_loss(pred_edge, label_edge) if paddle.isnan(adj_loss_val).any() or paddle.isinf( adj_loss_val).any(): print('!!!!!!!!!!!!!!!!!!! Nan value for adjacency loss', adj_loss_val) adj_loss_val = paddle.zeros_like(adj_loss_val) losses_adapt += self.args.reg_adj * adj_loss_val return losses_adapt
def forward(self, predicts, batch): structure_probs = predicts['structure_probs'] structure_targets = batch[1].astype("int64") structure_targets = structure_targets[:, 1:] if len(batch) == 6: structure_mask = batch[5].astype("int64") structure_mask = structure_mask[:, 1:] structure_mask = paddle.reshape(structure_mask, [-1]) structure_probs = paddle.reshape(structure_probs, [-1, structure_probs.shape[-1]]) structure_targets = paddle.reshape(structure_targets, [-1]) structure_loss = self.loss_func(structure_probs, structure_targets) if len(batch) == 6: structure_loss = structure_loss * structure_mask # structure_loss = paddle.sum(structure_loss) * self.structure_weight structure_loss = paddle.mean(structure_loss) * self.structure_weight loc_preds = predicts['loc_preds'] loc_targets = batch[2].astype("float32") loc_targets_mask = batch[4].astype("float32") loc_targets = loc_targets[:, 1:, :] loc_targets_mask = loc_targets_mask[:, 1:, :] loc_loss = F.mse_loss(loc_preds * loc_targets_mask, loc_targets) * self.loc_weight if self.use_giou: loc_loss_giou = self.giou_loss(loc_preds * loc_targets_mask, loc_targets) * self.giou_weight total_loss = structure_loss + loc_loss + loc_loss_giou return { 'loss': total_loss, "structure_loss": structure_loss, "loc_loss": loc_loss, "loc_loss_giou": loc_loss_giou } else: total_loss = structure_loss + loc_loss return { 'loss': total_loss, "structure_loss": structure_loss, "loc_loss": loc_loss }
def _critic_learn(self, obs, action, reward, next_obs, terminal): with paddle.no_grad(): # Compute the target Q value target_Q = self.target_model.critic_model( next_obs, self.target_model.actor_model(next_obs)) terminal = paddle.cast(terminal, dtype='float32') target_Q = reward + ((1. - terminal) * self.gamma * target_Q) # Get current Q estimate current_Q = self.model.critic_model(obs, action) # Compute critic loss critic_loss = F.mse_loss(current_Q, target_Q) # Optimize the critic self.critic_optimizer.clear_grad() critic_loss.backward() self.critic_optimizer.step() return critic_loss
def train(self, replay_buffer, batch=64): # 从缓存容器中采样 state, action, next_state, reward, done = replay_buffer.sample(batch) # 计算目标网络q值 q_target = self.critic_target(next_state, self.actor_target(next_state)) q_target = reward + ((1 - done) * self.gamma * q_target).detach() # 计算当前网络q值 q_eval = self.critic(state, action) # 计算值网络的损失函数 critic_loss = F.mse_loss(q_eval, q_target) # print(critic_loss) # 梯度回传,优化网络参数 self.critic_optimizer.clear_grad() critic_loss.backward() self.critic_optimizer.step() # 计算动作网络的损失函数 actor_loss = -self.critic(state, self.actor(state)).mean() # print(actor_loss) # 梯度回传,优化网络参数 self.actor_optimizer.clear_grad() actor_loss.backward() self.actor_optimizer.step() # 更新目标网络参数 for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()): target_param.set_value(target_param * (1.0 - self.tau) + param * self.tau) for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()): target_param.set_value(target_param * (1.0 - self.tau) + param * self.tau)
def train(model): print('start training ... ') # 开启模型训练模式 model.train() EPOCH_NUM = 500 train_num = 0 optimizer = paddle.optimizer.SGD(learning_rate=0.001, parameters=model.parameters()) for epoch_id in range(EPOCH_NUM): # 在每轮迭代开始之前,将训练数据的顺序随机的打乱 np.random.shuffle(train_data) # 将训练数据进行拆分,每个batch包含20条数据 mini_batches = [ train_data[k:k + BATCH_SIZE] for k in range(0, len(train_data), BATCH_SIZE) ] for batch_id, data in enumerate(mini_batches): features_np = np.array(data[:, :13], np.float32) labels_np = np.array(data[:, -1:], np.float32) features = paddle.to_tensor(features_np) labels = paddle.to_tensor(labels_np) #前向计算 y_pred = model(features) cost = F.mse_loss(y_pred, label=labels) train_cost = cost.numpy()[0] #反向传播 cost.backward() #最小化loss,更新参数 optimizer.step() # 清除梯度 optimizer.clear_grad() if batch_id % 30 == 0 and epoch_id % 50 == 0: print("Pass:%d,Cost:%0.5f" % (epoch_id, train_cost)) train_num = train_num + BATCH_SIZE train_nums.append(train_num) train_costs.append(train_cost)
def run( self, image, need_align=False, start_lr=0.1, final_lr=0.025, latent_level=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], # for ffhq (0~17) step=100, mse_weight=1, pre_latent=None): if need_align: src_img = run_alignment(image) else: src_img = Image.open(image).convert("RGB") generator = self.generator generator.train() percept = LPIPS(net='vgg') # on PaddlePaddle, lpips's default eval mode means no gradients. percept.train() n_mean_latent = 4096 transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(256), transforms.Transpose(), transforms.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5]), ]) imgs = paddle.to_tensor(transform(src_img)).unsqueeze(0) if pre_latent is None: with paddle.no_grad(): noise_sample = paddle.randn( (n_mean_latent, generator.style_dim)) latent_out = generator.style(noise_sample) latent_mean = latent_out.mean(0) latent_in = latent_mean.detach().clone().unsqueeze(0).tile( (imgs.shape[0], 1)) latent_in = latent_in.unsqueeze(1).tile( (1, generator.n_latent, 1)).detach() else: latent_in = paddle.to_tensor(np.load(pre_latent)).unsqueeze(0) var_levels = list(latent_level) const_levels = [ i for i in range(generator.n_latent) if i not in var_levels ] assert len(var_levels) > 0 if len(const_levels) > 0: latent_fix = latent_in.index_select(paddle.to_tensor(const_levels), 1).detach().clone() latent_in = latent_in.index_select(paddle.to_tensor(var_levels), 1).detach().clone() latent_in.stop_gradient = False optimizer = optim.Adam(parameters=[latent_in], learning_rate=start_lr) pbar = tqdm(range(step)) for i in pbar: t = i / step lr = get_lr(t, step, start_lr, final_lr) optimizer.set_lr(lr) if len(const_levels) > 0: latent_dict = {} for idx, idx2 in enumerate(var_levels): latent_dict[idx2] = latent_in[:, idx:idx + 1] for idx, idx2 in enumerate(const_levels): latent_dict[idx2] = (latent_fix[:, idx:idx + 1]).detach() latent_list = [] for idx in range(generator.n_latent): latent_list.append(latent_dict[idx]) latent_n = paddle.concat(latent_list, 1) else: latent_n = latent_in img_gen, _ = generator([latent_n], input_is_latent=True, randomize_noise=False) batch, channel, height, width = img_gen.shape if height > 256: factor = height // 256 img_gen = img_gen.reshape((batch, channel, height // factor, factor, width // factor, factor)) img_gen = img_gen.mean([3, 5]) p_loss = percept(img_gen, imgs).sum() mse_loss = F.mse_loss(img_gen, imgs) loss = p_loss + mse_weight * mse_loss optimizer.clear_grad() loss.backward() optimizer.step() pbar.set_description( (f"perceptual: {p_loss.numpy()[0]:.4f}; " f"mse: {mse_loss.numpy()[0]:.4f}; lr: {lr:.4f}")) img_gen, _ = generator([latent_n], input_is_latent=True, randomize_noise=False) dst_img = make_image(img_gen)[0] dst_latent = latent_n.numpy()[0] os.makedirs(self.output_path, exist_ok=True) save_src_path = os.path.join(self.output_path, 'src.fitting.png') cv2.imwrite(save_src_path, cv2.cvtColor(np.asarray(src_img), cv2.COLOR_RGB2BGR)) save_dst_path = os.path.join(self.output_path, 'dst.fitting.png') cv2.imwrite(save_dst_path, cv2.cvtColor(dst_img, cv2.COLOR_RGB2BGR)) save_npy_path = os.path.join(self.output_path, 'dst.fitting.npy') np.save(save_npy_path, dst_latent) return np.asarray(src_img), dst_img, dst_latent
def train(): global epoch total_reward = 0 # 重置游戏状态 state = env.reset() while True: action = actor.select_action(state) noisy = paddle.normal(0, exploration_noise, shape=[env.action_space.shape[0] ]).clip(env.action_space.low, env.action_space.high) action = (action + noisy).clip(env.action_space.low, env.action_space.high).numpy() next_state, reward, done, info = env.step(action) env.render() rpm.append((state, action, reward, next_state, np.float(done))) state = next_state if done: break total_reward += reward if len(rpm) > batch_size: # 获取训练数据 batch_state, batch_action, batch_reward, batch_next_state, batch_done = rpm.sample( batch_size) # 计算损失函数 best_v_1 = target_critic_1(batch_next_state, target_actor(batch_next_state)) best_v_2 = target_critic_2(batch_next_state, target_actor(batch_next_state)) best_v = paddle.min(paddle.concat([best_v_1, best_v_2], axis=1), axis=1, keepdim=True) best_v = batch_reward + (gamma * best_v * (1 - batch_done)).detach() current_v_1 = critic_1(batch_state, batch_action) critic_loss = F.mse_loss(current_v_1, best_v) critic_1_optimizer.clear_grad() critic_loss.backward() critic_1_optimizer.step() current_v_2 = critic_2(batch_state, batch_action) critic_loss = F.mse_loss(current_v_2, best_v) critic_2_optimizer.clear_grad() critic_loss.backward() critic_2_optimizer.step() if epoch % policy_delay == 0: actor_loss = -critic_1(batch_state, actor(batch_state)).mean() actor_optimizer.clear_grad() actor_loss.backward() actor_optimizer.step() # 指定的训练次数更新一次目标模型的参数 if epoch % 200 == 0: for target_param, param in zip(target_actor.parameters(), actor.parameters()): target_param.set_value(target_param * (1.0 - ratio) + param * ratio) for target_param, param in zip(target_critic_1.parameters(), critic_1.parameters()): target_param.set_value(target_param * (1.0 - ratio) + param * ratio) for target_param, param in zip(target_critic_2.parameters(), critic_2.parameters()): target_param.set_value(target_param * (1.0 - ratio) + param * ratio) epoch += 1 return total_reward