Python mse_loss Exemples, paddle.nn.functional.mse_loss Python Exemples

Exemple #1

0

Afficher le fichier

    def compute_loss(self, xc, yc):
        '''
        :功能      :损失计算函数
        :参数 xc   :tensors,一次网络输入
        :参数 yc   :tensors,网络输入对应真实标注信息
        :返回      :dict，各损失和预测结果
        '''
        y_pos, y_cos, y_sin, y_width = yc
        pos_pred, cos_pred, sin_pred, width_pred = self.forward(xc)
        p_loss = F.mse_loss(pos_pred, y_pos)
        cos_loss = F.mse_loss(cos_pred, y_cos)
        sin_loss = F.mse_loss(sin_pred, y_sin)
        width_loss = F.mse_loss(width_pred, y_width)

        return {
            'loss': p_loss + cos_loss + sin_loss + width_loss,
            'losses': {
                'p_loss': p_loss,
                'cos_loss': cos_loss,
                'sin_loss': sin_loss,
                'width_loss': width_loss
            },
            'pred': {
                'pos': pos_pred,
                'cos': cos_pred,
                'sin': sin_pred,
                'width': width_pred
            }
        }

Exemple #2

0

Afficher le fichier

Fichier : train.py Projet : wobushihuair/Paddle-RLBooks

def train():
    global epoch
    total_reward = 0
    # 重置游戏状态
    state = env.reset()
    while True:
        action = actor.select_action(state)

        next_state, reward, done, info = env.step(action)
        env.render()
        rpm.append((state, action, reward, next_state, np.float(done)))

        state = next_state
        if done:
            break
        total_reward += reward

        if len(rpm) > batch_size:
            # 获取训练数据
            batch_state, batch_action, batch_reward, batch_next_state, batch_done = rpm.sample(batch_size)
            # 计算损失函数
            expected_Q = Q_net(batch_state, batch_action)
            expected_value = critic(batch_state)
            new_action, log_prob, z, mean, log_std = actor.get_action(batch_state)

            target_value = target_critic(batch_next_state)
            next_q_value = batch_reward + (1 - batch_done) * gamma * target_value
            Q_loss = F.mse_loss(expected_Q, next_q_value.detach())

            expected_new_Q = Q_net(batch_state, new_action)
            next_value = expected_new_Q - log_prob
            value_loss = F.mse_loss(expected_value, next_value.detach())

            log_prob_target = expected_new_Q - expected_value
            policy_loss = (log_prob * (log_prob - log_prob_target).detach()).mean()

            Q_loss.backward()
            Q_optimizer.step()
            Q_optimizer.clear_grad()

            value_loss.backward()
            critic_optimizer.step()
            critic_optimizer.clear_grad()

            policy_loss.backward()
            actor_optimizer.step()
            actor_optimizer.clear_grad()
            # 指定的训练次数更新一次目标模型的参数
            if epoch % 200 == 0:
                for target_param, param in zip(target_critic.parameters(), critic.parameters()):
                    target_param.set_value(target_param * (1.0 - ratio) + param * ratio)
            epoch += 1

    return total_reward

Exemple #3

0

Afficher le fichier

    def forward(self, input):
        dtype = input.dtype
        flatten = input.reshape([-1, self.dim])
        dist = (flatten.pow(2).sum(1, keepdim=True) -
                2 * flatten.transpose([0, 1]).matmul(self.embed) +
                self.embed.pow(2).sum(0, keepdim=True))
        embed_ind = (-dist).argmax(1)
        embed_onehot = F.one_hot(embed_ind, self.n_embed).astype(dtype)
        embed_ind = embed_ind.reshape(input.shape[:-1])
        quantize = F.embedding(embed_ind,
                               self.embed.transpose([1, 0]),
                               padding_idx=-1)

        if self.training:
            embed_onehot_sum = embed_onehot.sum(0)
            embed_sum = flatten.transpose([1, 0]).matmul(embed_onehot)

            if dist_fn.get_world_size() > 1:
                dist_fn.all_reduce(embed_onehot_sum)
                dist_fn.all_reduce(embed_sum)

            ema_inplace(self.cluster_size, embed_onehot_sum, self.decay)
            ema_inplace(self.embed_avg, embed_sum, self.decay)
            cluster_size = laplace_smoothing(
                self.cluster_size, self.n_embed,
                self.eps) * self.cluster_size.sum()
            embed_normalized = self.embed_avg / cluster_size.unsqueeze(0)
            self.embed[:] = embed_normalized

        loss = F.mse_loss(quantize.detach(), input) * self.commitment
        quantize = input + (quantize - input).detach()
        return quantize, embed_ind, loss

Exemple #4

0

Afficher le fichier

    def train_step(self, state_batch, mcts_probs, winner_batch, lr=0.002):
        """perform a training step"""
        # wrap in Variable
        state_batch = paddle.to_tensor(state_batch)
        mcts_probs = paddle.to_tensor(mcts_probs)
        winner_batch = paddle.to_tensor(winner_batch)

        # zero the parameter gradients
        self.optimizer.clear_gradients()
        # set learning rate
        self.optimizer.set_lr(lr)

                                     

        # forward
        log_act_probs, value = self.policy_value_net(state_batch)
        # define the loss = (z - v)^2 - pi^T * log(p) + c||theta||^2
        # Note: the L2 penalty is incorporated in optimizer
        value = paddle.reshape(x=value, shape=[-1])
        value_loss = F.mse_loss(input=value, label=winner_batch)
        policy_loss = -paddle.mean(paddle.sum(mcts_probs*log_act_probs, axis=1))
        loss = value_loss + policy_loss
        # backward and optimize
        loss.backward()
        self.optimizer.minimize(loss)
        # calc policy entropy, for monitoring only
        entropy = -paddle.mean(
                paddle.sum(paddle.exp(log_act_probs) * log_act_probs, axis=1)
                )
        return loss.numpy(), entropy.numpy()[0]

Exemple #5

0

Afficher le fichier

Fichier : diffusion.py Projet : HighCWu/denoising-diffusion-paddle

    def p_loss(self, model, x_0, t, noise=None):
        if noise is None:
            noise = paddle.randn(x_0.shape)

        x_noise = self.q_sample(x_0, t, noise)
        x_recon = model(x_noise, t)

        return F.mse_loss(x_recon, noise)

Exemple #6

0

Afficher le fichier

    def _critic_learn(self, obs, action, reward, next_obs, terminal):
        with paddle.no_grad():
            next_action, next_log_pro = self.sample(next_obs)
            q1_next, q2_next = self.target_model.critic_model(
                next_obs, next_action)
            target_Q = paddle.minimum(q1_next,
                                      q2_next) - self.alpha * next_log_pro
            terminal = paddle.cast(terminal, dtype='float32')
            target_Q = reward + self.gamma * (1. - terminal) * target_Q
        cur_q1, cur_q2 = self.model.critic_model(obs, action)

        critic_loss = F.mse_loss(cur_q1, target_Q) + F.mse_loss(
            cur_q2, target_Q)

        self.critic_optimizer.clear_grad()
        critic_loss.backward()
        self.critic_optimizer.step()
        return critic_loss

Exemple #7

0

Afficher le fichier

    def forward(self, fstudent, fteacher):
        loss_all = 0.0
        for fs, ft in zip(fstudent, fteacher):
            h = fs.shape[2]
            loss = F.mse_loss(fs, ft)
            cnt = 1.0
            tot = 1.0
            for l in [4, 2, 1]:
                if l >= h:
                    continue
                if self.mode == "max":
                    tmpfs = F.adaptive_max_pool2d(fs, (l, l))
                    tmpft = F.adaptive_max_pool2d(ft, (l, l))
                else:
                    tmpfs = F.adaptive_avg_pool2d(fs, (l, l))
                    tmpft = F.adaptive_avg_pool2d(ft, (l, l))

                cnt /= 2.0
                loss += F.mse_loss(tmpfs, tmpft) * cnt
                tot += cnt
            loss = loss / tot
            loss_all = loss_all + loss
        return loss_all

Exemple #8

0

Afficher le fichier

    def get_loss(self, model, batch_data, pred_dict, train=True, flag = 0):
        n_support_train = self.args.n_shot_train
        n_support_test = self.args.n_shot_test
        n_query = self.args.n_query
        if not train:
            losses_adapt = self.criterion(pred_dict['s_logits'].reshape((2*n_support_test*n_query,2)), 
                                          paddle.expand(batch_data['s_label'],[n_query,n_support_test*2]).reshape((1,2*n_support_test*n_query)).squeeze(0))
        else:
            if flag:
                losses_adapt = self.criterion(pred_dict['s_logits'].reshape((2*n_support_train*n_query,2)), 
                                              paddle.expand(batch_data['s_label'],[n_query,n_support_train*2]).reshape((1,2*n_support_train*n_query)).squeeze(0))
            else:
                losses_adapt = self.criterion(pred_dict['q_logits'], batch_data['q_label'])

        if paddle.isnan(losses_adapt).any() or paddle.isinf(losses_adapt).any():
            print('!!!!!!!!!!!!!!!!!!! Nan value for supervised CE loss', losses_adapt)
            print(pred_dict['s_logits'])
            losses_adapt = paddle.zeros_like(losses_adapt)

        if self.args.reg_adj > 0:
            n_support = batch_data['s_label'].shape[0]
            adj = pred_dict['adj'][-1]
            if train:
                if flag:
                    s_label = paddle.expand(batch_data['s_label'], [n_query,batch_data['s_label'].shape[0]])
                    n_d = n_query * n_support
                    label_edge = model.layers.label2edge(s_label).reshape((n_d, -1))
                    pred_edge = adj[:,:,:-1,:-1].reshape((n_d, -1))
                else:
                    s_label = paddle.expand(batch_data['s_label'], [n_query,batch_data['s_label'].shape[0]])
                    q_label = batch_data['q_label'].unsqueeze(1)
                    total_label = paddle.concat([s_label, q_label], 1)
                    label_edge = model.layers.label2edge(total_label)[:,:,-1,:-1]
                    pred_edge = adj[:,:,-1,:-1]
            else:
                s_label = batch_data['s_label'].unsqueeze(0)
                n_d = n_support * self.args.rel_edge
                label_edge = model.layers.label2edge(s_label).reshape((n_d, -1))
                pred_edge = adj[:, :, :n_support, :n_support].mean(0).reshape((n_d, -1))
            adj_loss_val = F.mse_loss(pred_edge, label_edge)
            if paddle.isnan(adj_loss_val).any() or paddle.isinf(adj_loss_val).any():
                print('!!!!!!!!!!!!!!!!!!!  Nan value for adjacency loss', adj_loss_val)
                adj_loss_val = paddle.zeros_like(adj_loss_val)

            losses_adapt += self.args.reg_adj * adj_loss_val

        return losses_adapt

Exemple #9

0

Afficher le fichier

Fichier : trainer.py Projet : xueeinstein/PaddleHelix

    def get_loss(self, model, batch_data, pred_dict, train=True):
        if not train and self.update_s_q:
            losses_adapt = self.criterion(pred_dict['s_logits'],
                                          batch_data['s_label'])
        else:
            losses_adapt = self.criterion(pred_dict['logits'],
                                          batch_data['label'])

        if paddle.isnan(losses_adapt).any() or paddle.isinf(
                losses_adapt).any():
            print('!!!!!!!!!!!!!!!!!!! Nan value for supervised CE loss',
                  losses_adapt)
            print(pred_dict['s_logits'])
            losses_adapt = paddle.zeros_like(losses_adapt)

        if self.args.reg_adj > 0:
            n_support = batch_data['s_label'].shape[0]
            adj = pred_dict['adj'][-1]
            if train:
                n_query = batch_data['q_label'].shape[0]
                s_label = paddle.expand(
                    batch_data['s_label'],
                    [n_query, batch_data['s_label'].shape[0]])
                q_label = batch_data['q_label'].unsqueeze(1)
                total_label = paddle.concat([s_label, q_label], 1)
                n_d = n_query * self.args.rel_edge * (n_support + 1)
                label_edge = model.layers.label2edge(total_label).reshape(
                    (n_d, -1))
                pred_edge = adj.reshape((n_d, -1))
            else:
                s_label = batch_data['s_label'].unsqueeze(0)
                n_d = n_support * self.args.rel_edge
                label_edge = model.layers.label2edge(s_label).reshape(
                    (n_d, -1))
                pred_edge = adj[:, :, :n_support, :n_support].mean(0).reshape(
                    (n_d, -1))
            adj_loss_val = F.mse_loss(pred_edge, label_edge)
            if paddle.isnan(adj_loss_val).any() or paddle.isinf(
                    adj_loss_val).any():
                print('!!!!!!!!!!!!!!!!!!!  Nan value for adjacency loss',
                      adj_loss_val)
                adj_loss_val = paddle.zeros_like(adj_loss_val)

            losses_adapt += self.args.reg_adj * adj_loss_val

        return losses_adapt

Exemple #10

0

Afficher le fichier

    def forward(self, predicts, batch):
        structure_probs = predicts['structure_probs']
        structure_targets = batch[1].astype("int64")
        structure_targets = structure_targets[:, 1:]
        if len(batch) == 6:
            structure_mask = batch[5].astype("int64")
            structure_mask = structure_mask[:, 1:]
            structure_mask = paddle.reshape(structure_mask, [-1])
        structure_probs = paddle.reshape(structure_probs,
                                         [-1, structure_probs.shape[-1]])
        structure_targets = paddle.reshape(structure_targets, [-1])
        structure_loss = self.loss_func(structure_probs, structure_targets)

        if len(batch) == 6:
            structure_loss = structure_loss * structure_mask

#         structure_loss = paddle.sum(structure_loss) * self.structure_weight
        structure_loss = paddle.mean(structure_loss) * self.structure_weight

        loc_preds = predicts['loc_preds']
        loc_targets = batch[2].astype("float32")
        loc_targets_mask = batch[4].astype("float32")
        loc_targets = loc_targets[:, 1:, :]
        loc_targets_mask = loc_targets_mask[:, 1:, :]
        loc_loss = F.mse_loss(loc_preds * loc_targets_mask,
                              loc_targets) * self.loc_weight
        if self.use_giou:
            loc_loss_giou = self.giou_loss(loc_preds * loc_targets_mask,
                                           loc_targets) * self.giou_weight
            total_loss = structure_loss + loc_loss + loc_loss_giou
            return {
                'loss': total_loss,
                "structure_loss": structure_loss,
                "loc_loss": loc_loss,
                "loc_loss_giou": loc_loss_giou
            }
        else:
            total_loss = structure_loss + loc_loss
            return {
                'loss': total_loss,
                "structure_loss": structure_loss,
                "loc_loss": loc_loss
            }

Exemple #11

0

Afficher le fichier

    def _critic_learn(self, obs, action, reward, next_obs, terminal):
        with paddle.no_grad():
            # Compute the target Q value
            target_Q = self.target_model.critic_model(
                next_obs, self.target_model.actor_model(next_obs))
            terminal = paddle.cast(terminal, dtype='float32')
            target_Q = reward + ((1. - terminal) * self.gamma * target_Q)

        # Get current Q estimate
        current_Q = self.model.critic_model(obs, action)

        # Compute critic loss
        critic_loss = F.mse_loss(current_Q, target_Q)

        # Optimize the critic
        self.critic_optimizer.clear_grad()
        critic_loss.backward()
        self.critic_optimizer.step()
        return critic_loss

Exemple #12

0

Afficher le fichier

Fichier : model.py Projet : PaddlePaddle/awesome-DeepLearning

    def train(self, replay_buffer, batch=64):
        # 从缓存容器中采样
        state, action, next_state, reward, done = replay_buffer.sample(batch)

        # 计算目标网络q值
        q_target = self.critic_target(next_state,
                                      self.actor_target(next_state))
        q_target = reward + ((1 - done) * self.gamma * q_target).detach()

        # 计算当前网络q值
        q_eval = self.critic(state, action)

        # 计算值网络的损失函数
        critic_loss = F.mse_loss(q_eval, q_target)
        # print(critic_loss)

        # 梯度回传，优化网络参数
        self.critic_optimizer.clear_grad()
        critic_loss.backward()
        self.critic_optimizer.step()

        # 计算动作网络的损失函数
        actor_loss = -self.critic(state, self.actor(state)).mean()
        # print(actor_loss)

        # 梯度回传，优化网络参数
        self.actor_optimizer.clear_grad()
        actor_loss.backward()
        self.actor_optimizer.step()

        # 更新目标网络参数
        for param, target_param in zip(self.critic.parameters(),
                                       self.critic_target.parameters()):
            target_param.set_value(target_param * (1.0 - self.tau) +
                                   param * self.tau)
        for param, target_param in zip(self.actor.parameters(),
                                       self.actor_target.parameters()):
            target_param.set_value(target_param * (1.0 - self.tau) +
                                   param * self.tau)

Exemple #13

0

Afficher le fichier

Fichier : housingPrice.py Projet : YueWang1996/volume-demo

def train(model):
    print('start training ... ')
    # 开启模型训练模式
    model.train()
    EPOCH_NUM = 500
    train_num = 0
    optimizer = paddle.optimizer.SGD(learning_rate=0.001,
                                     parameters=model.parameters())
    for epoch_id in range(EPOCH_NUM):
        # 在每轮迭代开始之前，将训练数据的顺序随机的打乱
        np.random.shuffle(train_data)
        # 将训练数据进行拆分，每个batch包含20条数据
        mini_batches = [
            train_data[k:k + BATCH_SIZE]
            for k in range(0, len(train_data), BATCH_SIZE)
        ]
        for batch_id, data in enumerate(mini_batches):
            features_np = np.array(data[:, :13], np.float32)
            labels_np = np.array(data[:, -1:], np.float32)
            features = paddle.to_tensor(features_np)
            labels = paddle.to_tensor(labels_np)
            #前向计算
            y_pred = model(features)
            cost = F.mse_loss(y_pred, label=labels)
            train_cost = cost.numpy()[0]
            #反向传播
            cost.backward()
            #最小化loss，更新参数
            optimizer.step()
            # 清除梯度
            optimizer.clear_grad()

            if batch_id % 30 == 0 and epoch_id % 50 == 0:
                print("Pass:%d,Cost:%0.5f" % (epoch_id, train_cost))

            train_num = train_num + BATCH_SIZE
            train_nums.append(train_num)
            train_costs.append(train_cost)

Exemple #14

0

Afficher le fichier

Fichier : Styleganv2fitting_predictor.py Projet : ynhj123/basePy

    def run(
            self,
            image,
            need_align=False,
            start_lr=0.1,
            final_lr=0.025,
            latent_level=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
                          11],  # for ffhq (0~17)
            step=100,
            mse_weight=1,
            pre_latent=None):

        if need_align:
            src_img = run_alignment(image)
        else:
            src_img = Image.open(image).convert("RGB")

        generator = self.generator
        generator.train()

        percept = LPIPS(net='vgg')
        # on PaddlePaddle, lpips's default eval mode means no gradients.
        percept.train()

        n_mean_latent = 4096

        transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(256),
            transforms.Transpose(),
            transforms.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5]),
        ])

        imgs = paddle.to_tensor(transform(src_img)).unsqueeze(0)

        if pre_latent is None:
            with paddle.no_grad():
                noise_sample = paddle.randn(
                    (n_mean_latent, generator.style_dim))
                latent_out = generator.style(noise_sample)

                latent_mean = latent_out.mean(0)

            latent_in = latent_mean.detach().clone().unsqueeze(0).tile(
                (imgs.shape[0], 1))
            latent_in = latent_in.unsqueeze(1).tile(
                (1, generator.n_latent, 1)).detach()

        else:
            latent_in = paddle.to_tensor(np.load(pre_latent)).unsqueeze(0)

        var_levels = list(latent_level)
        const_levels = [
            i for i in range(generator.n_latent) if i not in var_levels
        ]
        assert len(var_levels) > 0
        if len(const_levels) > 0:
            latent_fix = latent_in.index_select(paddle.to_tensor(const_levels),
                                                1).detach().clone()
            latent_in = latent_in.index_select(paddle.to_tensor(var_levels),
                                               1).detach().clone()

        latent_in.stop_gradient = False

        optimizer = optim.Adam(parameters=[latent_in], learning_rate=start_lr)

        pbar = tqdm(range(step))

        for i in pbar:
            t = i / step
            lr = get_lr(t, step, start_lr, final_lr)
            optimizer.set_lr(lr)

            if len(const_levels) > 0:
                latent_dict = {}
                for idx, idx2 in enumerate(var_levels):
                    latent_dict[idx2] = latent_in[:, idx:idx + 1]
                for idx, idx2 in enumerate(const_levels):
                    latent_dict[idx2] = (latent_fix[:, idx:idx + 1]).detach()
                latent_list = []
                for idx in range(generator.n_latent):
                    latent_list.append(latent_dict[idx])
                latent_n = paddle.concat(latent_list, 1)
            else:
                latent_n = latent_in

            img_gen, _ = generator([latent_n],
                                   input_is_latent=True,
                                   randomize_noise=False)

            batch, channel, height, width = img_gen.shape

            if height > 256:
                factor = height // 256

                img_gen = img_gen.reshape((batch, channel, height // factor,
                                           factor, width // factor, factor))
                img_gen = img_gen.mean([3, 5])

            p_loss = percept(img_gen, imgs).sum()
            mse_loss = F.mse_loss(img_gen, imgs)
            loss = p_loss + mse_weight * mse_loss

            optimizer.clear_grad()
            loss.backward()
            optimizer.step()

            pbar.set_description(
                (f"perceptual: {p_loss.numpy()[0]:.4f}; "
                 f"mse: {mse_loss.numpy()[0]:.4f}; lr: {lr:.4f}"))

        img_gen, _ = generator([latent_n],
                               input_is_latent=True,
                               randomize_noise=False)
        dst_img = make_image(img_gen)[0]
        dst_latent = latent_n.numpy()[0]

        os.makedirs(self.output_path, exist_ok=True)
        save_src_path = os.path.join(self.output_path, 'src.fitting.png')
        cv2.imwrite(save_src_path,
                    cv2.cvtColor(np.asarray(src_img), cv2.COLOR_RGB2BGR))
        save_dst_path = os.path.join(self.output_path, 'dst.fitting.png')
        cv2.imwrite(save_dst_path, cv2.cvtColor(dst_img, cv2.COLOR_RGB2BGR))
        save_npy_path = os.path.join(self.output_path, 'dst.fitting.npy')
        np.save(save_npy_path, dst_latent)

        return np.asarray(src_img), dst_img, dst_latent

Exemple #15

0

Afficher le fichier

def train():
    global epoch
    total_reward = 0
    # 重置游戏状态
    state = env.reset()
    while True:
        action = actor.select_action(state)
        noisy = paddle.normal(0,
                              exploration_noise,
                              shape=[env.action_space.shape[0]
                                     ]).clip(env.action_space.low,
                                             env.action_space.high)
        action = (action + noisy).clip(env.action_space.low,
                                       env.action_space.high).numpy()

        next_state, reward, done, info = env.step(action)
        env.render()
        rpm.append((state, action, reward, next_state, np.float(done)))

        state = next_state
        if done:
            break
        total_reward += reward

        if len(rpm) > batch_size:
            # 获取训练数据
            batch_state, batch_action, batch_reward, batch_next_state, batch_done = rpm.sample(
                batch_size)
            # 计算损失函数
            best_v_1 = target_critic_1(batch_next_state,
                                       target_actor(batch_next_state))
            best_v_2 = target_critic_2(batch_next_state,
                                       target_actor(batch_next_state))
            best_v = paddle.min(paddle.concat([best_v_1, best_v_2], axis=1),
                                axis=1,
                                keepdim=True)
            best_v = batch_reward + (gamma * best_v *
                                     (1 - batch_done)).detach()

            current_v_1 = critic_1(batch_state, batch_action)
            critic_loss = F.mse_loss(current_v_1, best_v)
            critic_1_optimizer.clear_grad()
            critic_loss.backward()
            critic_1_optimizer.step()

            current_v_2 = critic_2(batch_state, batch_action)
            critic_loss = F.mse_loss(current_v_2, best_v)
            critic_2_optimizer.clear_grad()
            critic_loss.backward()
            critic_2_optimizer.step()

            if epoch % policy_delay == 0:
                actor_loss = -critic_1(batch_state, actor(batch_state)).mean()
                actor_optimizer.clear_grad()
                actor_loss.backward()
                actor_optimizer.step()

            # 指定的训练次数更新一次目标模型的参数
            if epoch % 200 == 0:
                for target_param, param in zip(target_actor.parameters(),
                                               actor.parameters()):
                    target_param.set_value(target_param * (1.0 - ratio) +
                                           param * ratio)
                for target_param, param in zip(target_critic_1.parameters(),
                                               critic_1.parameters()):
                    target_param.set_value(target_param * (1.0 - ratio) +
                                           param * ratio)
                for target_param, param in zip(target_critic_2.parameters(),
                                               critic_2.parameters()):
                    target_param.set_value(target_param * (1.0 - ratio) +
                                           param * ratio)
            epoch += 1

    return total_reward