Example #1
0
def finish_episode():
    R = 0
    policy_loss = []
    value_loss = []
    returns = []
    for r in policy.rewards[::-1]:
        R = r + gamma * R
        returns.insert(0, R)
    returns = paddle.to_tensor(returns)
    returns = (returns - returns.mean()) / (returns.std() + eps)
    for (log_prob, value), R in zip(policy.saved_log_probs, returns):
        advantage = R - value

        policy_loss.append(-log_prob * advantage)
        value_loss.append(F.smooth_l1_loss(value.reshape([-1]), R.reshape([-1])))

    optimizer.clear_grad()
    policy_loss = paddle.concat(policy_loss).sum()
    value_loss = paddle.concat(value_loss).sum()
    loss = policy_loss + value_loss

    loss.backward()
    optimizer.step()
    del policy.rewards[:]
    del policy.saved_log_probs[:]
Example #2
0
    def forward(self, features, im_info, boxes=None):
        # prediction
        pred_cls_score_list = []
        pred_bbox_offsets_list = []
        for x in features:
            t = F.relu(self.rpn_conv(x))
            pred_cls_score_list.append(self.rpn_cls_score(t))
            pred_bbox_offsets_list.append(self.rpn_bbox_offsets(t))
        # get anchors
        all_anchors_list = []
        # stride: 64,32,16,8,4 p6->p2
        base_stride = 4
        off_stride = 2**(len(features) - 1)  # 16
        for fm in features:
            layer_anchors = self.anchors_generator(fm, base_stride, off_stride)
            off_stride = off_stride // 2
            all_anchors_list.append(layer_anchors)
        # sample from the predictions
        rpn_rois = find_top_rpn_proposals(self.training,
                                          pred_bbox_offsets_list,
                                          pred_cls_score_list,
                                          all_anchors_list, im_info)
        rpn_rois = rpn_rois.cast('float32')
        if self.training:
            rpn_labels, rpn_bbox_targets = fpn_anchor_target(
                boxes, im_info, all_anchors_list)
            #rpn_labels = rpn_labels.astype(np.int32)
            pred_cls_score, pred_bbox_offsets = fpn_rpn_reshape(
                pred_cls_score_list, pred_bbox_offsets_list)
            # rpn loss
            valid_masks = rpn_labels >= 0
            # objectness_loss = softmax_loss(
            #     torch.gather(pred_cls_score,torch.nonzero(valid_masks)),
            #     torch.gather(rpn_labels,torch.nonzero(valid_masks)))

            objectness_loss = F.binary_cross_entropy(
                F.softmax(
                    torch.gather(pred_cls_score, torch.nonzero(valid_masks))),
                torch.gather(
                    torch.eye(2),
                    torch.gather(rpn_labels, torch.nonzero(valid_masks))))

            pos_masks = rpn_labels > 0
            # localization_loss = smooth_l1_loss(
            #     pred_bbox_offsets[pos_masks],
            #     rpn_bbox_targets[pos_masks],
            #     config.rpn_smooth_l1_beta)
            localization_loss = \
            F.smooth_l1_loss(torch.gather(pred_bbox_offsets, torch.nonzero(pos_masks)),
                             torch.gather(rpn_bbox_targets, torch.nonzero(pos_masks)),delta=config.rcnn_smooth_l1_beta)
            normalizer = 1 / valid_masks.cast('float32').sum()
            loss_rpn_cls = objectness_loss.sum() * normalizer
            loss_rpn_loc = localization_loss.sum() * normalizer
            loss_dict = {}
            loss_dict['loss_rpn_cls'] = loss_rpn_cls
            loss_dict['loss_rpn_loc'] = loss_rpn_loc
            return rpn_rois, loss_dict
        else:
            return rpn_rois
Example #3
0
    def forward(self, student, teacher):
        bs = student.shape[0]
        student = student.reshape([bs, -1])
        teacher = teacher.reshape([bs, -1])

        t_d = pdist(teacher, squared=False, eps=self.eps)
        mean_td = t_d.mean()
        t_d = t_d / (mean_td + self.eps)

        d = pdist(student, squared=False, eps=self.eps)
        mean_d = d.mean()
        d = d / (mean_d + self.eps)

        loss = F.smooth_l1_loss(d, t_d, reduction="mean")
        return loss
    def forward(self, confidence, predicted_locations, labels, gt_locations):
        """Compute classification loss and smooth l1 loss.

        Args:
            confidence (batch_size, num_priors, num_classes): class predictions.
            locations (batch_size, num_priors, 4): predicted locations.
            labels (batch_size, num_priors): real labels of all the priors.
            boxes (batch_size, num_priors, 4): real boxes corresponding all the priors.
        """
        num_classes = confidence.shape[2]
        with paddle.no_grad():
            # derived from cross_entropy=sum(log(p))
            loss = -F.log_softmax(confidence, 2)[:, :, 0]
            mask = box_utils.hard_negative_mining(loss, labels,
                                                  self.neg_pos_ratio)

        confidence = paddle.concat([
            confidence[:, :, 0].masked_select(mask).reshape([-1, 1]),
            confidence[:, :, 1].masked_select(mask).reshape([-1, 1])
        ],
                                   axis=1)
        classification_loss = F.cross_entropy(confidence.reshape(
            [-1, num_classes]),
                                              labels.masked_select(mask),
                                              reduction='sum')
        pos_mask = labels > 0
        predicted_locations = predicted_locations.masked_select(
            paddle.concat([
                pos_mask.reshape(pos_mask.shape + [1]),
                pos_mask.reshape(pos_mask.shape + [1]),
                pos_mask.reshape(pos_mask.shape + [1]),
                pos_mask.reshape(pos_mask.shape + [1])
            ],
                          axis=2)).reshape([-1, 4])
        gt_locations = gt_locations.masked_select(
            paddle.concat([
                pos_mask.reshape(pos_mask.shape + [1]),
                pos_mask.reshape(pos_mask.shape + [1]),
                pos_mask.reshape(pos_mask.shape + [1]),
                pos_mask.reshape(pos_mask.shape + [1])
            ],
                          axis=2)).reshape([-1, 4])
        smooth_l1_loss = F.smooth_l1_loss(predicted_locations,
                                          gt_locations.cast('float32'),
                                          reduction='sum')  # smooth_l1_loss
        # smooth_l1_loss = F.mse_loss(predicted_locations, gt_locations, reduction='sum')  #l2 loss
        num_pos = gt_locations.shape[0]
        return smooth_l1_loss / num_pos, classification_loss / num_pos
Example #5
0
def train(model, data_loader, optimizer, lr_scheduler, epoch, LOG):

    stages = 4
    losses = [AverageMeter() for _ in range(stages)]
    length_loader = len(data_loader)

    model.train()

    for batch_id, data in enumerate(data_loader()):
        left_img, right_img, gt = data

        mask = paddle.to_tensor(gt.numpy() > 0)
        gt_mask = paddle.masked_select(gt, mask)

        outputs = model(left_img, right_img)
        outputs = [paddle.squeeze(output) for output in outputs]

        tem_stage_loss = []
        for index in range(stages):
            temp_loss = args.loss_weights[index] * F.smooth_l1_loss(
                paddle.masked_select(outputs[index], mask),
                gt_mask,
                reduction='mean')
            tem_stage_loss.append(temp_loss)
            losses[index].update(
                float(temp_loss.numpy() / args.loss_weights[index]))

        sum_loss = paddle.add_n(tem_stage_loss)
        sum_loss.backward()
        optimizer.step()
        optimizer.clear_grad()

        if batch_id % 5 == 0:
            info_str = [
                'Stage {} = {:.2f}({:.2f})'.format(x, losses[x].val,
                                                   losses[x].avg)
                for x in range(stages)
            ]
            info_str = '\t'.join(info_str)
            info_str = 'Train Epoch{} [{}/{}]  lr:{:.5f}\t{}'.format(
                epoch, batch_id, length_loader, optimizer.get_lr(), info_str)
            LOG.info(info_str)

    lr_scheduler.step()

    info_str = '\t'.join(
        ['Stage {} = {:.2f}'.format(x, losses[x].avg) for x in range(stages)])
    LOG.info('Average train loss: ' + info_str)
Example #6
0
    def forward(self, student, teacher):
        # reshape for feature map distillation
        bs = student.shape[0]
        student = student.reshape([bs, -1])
        teacher = teacher.reshape([bs, -1])

        td = (teacher.unsqueeze(0) - teacher.unsqueeze(1))
        norm_td = F.normalize(td, p=2, axis=2)
        t_angle = paddle.bmm(norm_td, norm_td.transpose([0, 2, 1])).reshape(
            [-1, 1])

        sd = (student.unsqueeze(0) - student.unsqueeze(1))
        norm_sd = F.normalize(sd, p=2, axis=2)
        s_angle = paddle.bmm(norm_sd, norm_sd.transpose([0, 2, 1])).reshape(
            [-1, 1])
        loss = F.smooth_l1_loss(s_angle, t_angle, reduction='mean')
        return loss
Example #7
0
    def det_loss(self, p_det, anchor, t_conf, t_box):
        pshape = paddle.shape(p_det)
        pshape.stop_gradient = True
        nB, nGh, nGw = pshape[0], pshape[-2], pshape[-1]
        nA = len(anchor)
        p_det = paddle.reshape(
            p_det, [nB, nA, self.num_classes + 5, nGh, nGw]).transpose(
                (0, 1, 3, 4, 2))

        # 1. loss_conf: cross_entropy
        p_conf = p_det[:, :, :, :, 4:6]
        p_conf_flatten = paddle.reshape(p_conf, [-1, 2])
        t_conf_flatten = t_conf.flatten()
        t_conf_flatten = paddle.cast(t_conf_flatten, dtype="int64")
        t_conf_flatten.stop_gradient = True
        loss_conf = F.cross_entropy(p_conf_flatten,
                                    t_conf_flatten,
                                    ignore_index=-1,
                                    reduction='mean')
        loss_conf.stop_gradient = False

        # 2. loss_box: smooth_l1_loss
        p_box = p_det[:, :, :, :, :4]
        p_box_flatten = paddle.reshape(p_box, [-1, 4])
        t_box_flatten = paddle.reshape(t_box, [-1, 4])
        fg_inds = paddle.nonzero(t_conf_flatten > 0).flatten()
        if fg_inds.numel() > 0:
            reg_delta = paddle.gather(p_box_flatten, fg_inds)
            reg_target = paddle.gather(t_box_flatten, fg_inds)
        else:
            reg_delta = paddle.to_tensor([0, 0, 0, 0], dtype='float32')
            reg_delta.stop_gradient = False
            reg_target = paddle.to_tensor([0, 0, 0, 0], dtype='float32')
        reg_target.stop_gradient = True
        loss_box = F.smooth_l1_loss(reg_delta,
                                    reg_target,
                                    reduction='mean',
                                    delta=1.0)
        loss_box.stop_gradient = False

        return loss_conf, loss_box
Example #8
0
    def forward(self, student, teacher):
        # GAP to reduce memory
        if self.avgpool is not None:
            # NxC1xH1xW1 -> NxC1x1x1
            student = self.avgpool(student)
            # NxC2xH2xW2 -> NxC2x1x1
            teacher = self.avgpool(teacher)

        bs = student.shape[0]
        student = student.reshape([bs, -1])
        teacher = teacher.reshape([bs, -1])

        t_d = pdist(teacher, squared=False)
        mean_td = t_d.mean()
        t_d = t_d / (mean_td + self.eps)

        d = pdist(student, squared=False)
        mean_d = d.mean()
        d = d / (mean_d + self.eps)

        loss = F.smooth_l1_loss(d, t_d, reduction="mean")
        return loss
Example #9
0
    def forward(self, input, label, conf):
        x_emb = self.embedding(input)
        fc = self.lin_a(x_emb)
        mask = conf > 0
        mask = paddle.cast(mask, dtype="int64")
        mask.stop_gradient = True
        emb_mask = mask.max(1).flatten()
        emb_mask_inds = paddle.nonzero(emb_mask > 0).flatten()
        emb_mask_inds.stop_gradient = True

        if emb_mask_inds.numel() == 0:
            loss_box = self.phony * 0
        else:
            projection = self.lin_b(fc)
            projection = paddle.reshape(projection, shape=[-1, 1])
            output = paddle.gather(projection, emb_mask_inds)
            target = paddle.gather(label, emb_mask_inds)
            loss_box = F.smooth_l1_loss(output,
                                        target,
                                        reduction='sum',
                                        delta=1.0)
            loss_box = loss_box / len(conf)

        return loss_box
Example #10
0
    def forward(self, student, teacher):
        # GAP to reduce memory
        if self.avgpool is not None:
            # NxC1xH1xW1 -> NxC1x1x1
            student = self.avgpool(student)
            # NxC2xH2xW2 -> NxC2x1x1
            teacher = self.avgpool(teacher)

        # reshape for feature map distillation
        bs = student.shape[0]
        student = student.reshape([bs, -1])
        teacher = teacher.reshape([bs, -1])

        td = (teacher.unsqueeze(0) - teacher.unsqueeze(1))
        norm_td = F.normalize(td, p=2, axis=2)
        t_angle = paddle.bmm(norm_td, norm_td.transpose([0, 2,
                                                         1])).reshape([-1, 1])

        sd = (student.unsqueeze(0) - student.unsqueeze(1))
        norm_sd = F.normalize(sd, p=2, axis=2)
        s_angle = paddle.bmm(norm_sd, norm_sd.transpose([0, 2,
                                                         1])).reshape([-1, 1])
        loss = F.smooth_l1_loss(s_angle, t_angle, reduction='mean')
        return loss
Example #11
0
    def forward(self, boxes, scores, gt_bbox, gt_label, prior_boxes):
        boxes = paddle.concat(boxes, axis=1)
        scores = paddle.concat(scores, axis=1)
        gt_label = gt_label.unsqueeze(-1).astype('int64')
        prior_boxes = paddle.concat(prior_boxes, axis=0)
        bg_index = scores.shape[-1] - 1

        # Match bbox and get targets.
        targets_bbox, targets_label = \
            self._bipartite_match_for_batch(gt_bbox, gt_label, prior_boxes, bg_index)
        targets_bbox.stop_gradient = True
        targets_label.stop_gradient = True

        # Compute regression loss.
        # Select positive samples.
        bbox_mask = (targets_label != bg_index).astype(boxes.dtype)
        loc_loss = bbox_mask * F.smooth_l1_loss(
            boxes, targets_bbox, reduction='none')
        loc_loss = loc_loss.sum() * self.loc_loss_weight

        # Compute confidence loss.
        conf_loss = F.softmax_with_cross_entropy(scores, targets_label)
        # Mining hard examples.
        label_mask = self._mine_hard_example(conf_loss.squeeze(-1),
                                             targets_label.squeeze(-1),
                                             bg_index)
        conf_loss = conf_loss * label_mask.unsqueeze(-1).astype(
            conf_loss.dtype)
        conf_loss = conf_loss.sum() * self.conf_loss_weight

        # Compute overall weighted loss.
        normalizer = (targets_label != bg_index).astype('float32').sum().clip(
            min=1)
        loss = (conf_loss + loc_loss) / (normalizer + 1e-9)

        return loss
Example #12
0
def train(args):
    # 使用 GPU训练
    if paddle.is_compiled_with_cuda():
        paddle.set_device("gpu:0")
    # 创建多进程的游戏环境
    envs = MultipleEnvironments(args.game, args.num_processes)
    # 固定初始化状态
    paddle.seed(123)
    # 创建模型
    model = Model(envs.num_states, envs.num_actions)
    # 加载预训练模型
    if args.trained_model is not None:
        model.load_dict(paddle.load(args.trained_model))
    # 创建保存模型的文件夹
    if not os.path.isdir(args.saved_path):
        os.makedirs(args.saved_path)
    paddle.save(model.state_dict(),
                "{}/model_{}.pdparams".format(args.saved_path, args.game))
    # 为游戏评估单独开一个进程
    mp = _mp.get_context("spawn")
    process = mp.Process(target=eval,
                         args=(args, envs.num_states, envs.num_actions))
    process.start()
    # 创建优化方法
    clip_grad = paddle.nn.ClipGradByNorm(clip_norm=0.5)
    optimizer = paddle.optimizer.Adam(parameters=model.parameters(),
                                      learning_rate=args.lr,
                                      grad_clip=clip_grad)
    # 刚开始给每个进程的游戏执行初始化
    [agent_conn.send(("reset", None)) for agent_conn in envs.agent_conns]
    # 获取游戏初始的界面
    curr_states = [agent_conn.recv() for agent_conn in envs.agent_conns]
    curr_states = paddle.to_tensor(np.concatenate(curr_states, 0),
                                   dtype='float32')
    curr_episode = 0
    while True:
        curr_episode += 1
        old_log_policies, actions, values, states, rewards, dones = [], [], [], [], [], []
        for _ in range(args.num_local_steps):
            states.append(curr_states)
            # 执行预测
            logits, value = model(curr_states)
            # 计算每个动作的概率值
            policy = F.softmax(logits)
            # 根据每个标签的概率随机生成符合概率的标签
            old_m = Categorical(policy)
            action = old_m.sample([1]).squeeze()
            # 记录预测数据
            actions.append(action)
            values.append(value.squeeze())
            # 计算类别的概率的对数
            old_log_policy = old_m.log_prob(paddle.unsqueeze(action, axis=1))
            old_log_policy = paddle.squeeze(old_log_policy)
            old_log_policies.append(old_log_policy)
            # 向各个进程游戏发送动作
            [
                agent_conn.send(("step", int(act[0])))
                for agent_conn, act in zip(envs.agent_conns, action)
            ]
            # 将多进程的游戏数据打包
            state, reward, done, info = zip(
                *[agent_conn.recv() for agent_conn in envs.agent_conns])
            # 进行数据转换
            state = paddle.to_tensor(np.concatenate(state, 0), dtype='float32')
            # 转换为tensor数据
            reward = paddle.to_tensor(reward, dtype='float32')
            done = paddle.to_tensor(done, dtype='float32')
            # 记录预测数据
            rewards.append(reward)
            dones.append(done)
            curr_states = state
        # 根据上面最后的图像预测
        _, next_value, = model(curr_states)
        next_value = next_value.squeeze()
        old_log_policies = paddle.concat(old_log_policies).detach().squeeze()
        actions = paddle.concat(actions).squeeze()
        values = paddle.concat(values).squeeze().detach()
        states = paddle.concat(states).squeeze()

        gae = 0.0
        R = []
        for value, reward, done in list(zip(values, rewards, dones))[::-1]:
            gae = gae * args.gamma * args.tau
            gae = gae + reward + args.gamma * next_value.detach() * (
                1.0 - done) - value.detach()
            next_value = value
            R.append(gae + value)
        R = R[::-1]
        R = paddle.concat(R).detach()
        advantages = R - values
        for i in range(args.num_epochs):
            indice = paddle.randperm(args.num_local_steps * args.num_processes)
            for j in range(args.batch_size):
                batch_indices = indice[int(j * (
                    args.num_local_steps * args.num_processes / args.batch_size
                )):int((j + 1) * (args.num_local_steps * args.num_processes /
                                  args.batch_size))]
                # 根据拿到的图像执行预测
                logits, value = model(paddle.gather(states, batch_indices))
                # 计算每个动作的概率值
                new_policy = F.softmax(logits)
                # 计算类别的概率的对数
                new_m = Categorical(new_policy)
                new_log_policy = new_m.log_prob(
                    paddle.unsqueeze(paddle.gather(actions, batch_indices),
                                     axis=1))
                new_log_policy = paddle.squeeze(new_log_policy)
                # 计算actor损失
                ratio = paddle.exp(
                    new_log_policy -
                    paddle.gather(old_log_policies, batch_indices))
                advantage = paddle.gather(advantages, batch_indices)
                actor_loss = paddle.clip(ratio, 1.0 - args.epsilon,
                                         1.0 + args.epsilon) * advantage
                actor_loss = paddle.concat([
                    paddle.unsqueeze(ratio * advantage, axis=0),
                    paddle.unsqueeze(actor_loss, axis=0)
                ])
                actor_loss = -paddle.mean(paddle.min(actor_loss, axis=0))
                # 计算critic损失
                critic_loss = F.smooth_l1_loss(paddle.gather(R, batch_indices),
                                               value.squeeze())
                entropy_loss = paddle.mean(new_m.entropy())
                # 计算全部损失
                total_loss = actor_loss + critic_loss - args.beta * entropy_loss
                # 计算梯度
                total_loss.backward()
                optimizer.step()
                optimizer.clear_grad()
            paddle.save(
                model.state_dict(),
                "{}/model_{}.pdparams".format(args.saved_path, args.game))
        print("Episode: {}. Total loss: {:.4f}".format(curr_episode,
                                                       total_loss.numpy()[0]))
Example #13
0
    def forward(self, boxes, scores, gt_box, gt_class, anchors):
        boxes = paddle.concat(boxes, axis=1)
        scores = paddle.concat(scores, axis=1)
        prior_boxes = paddle.concat(anchors, axis=0)
        gt_label = gt_class.unsqueeze(-1)
        batch_size, num_priors, num_classes = scores.shape

        def _reshape_to_2d(x):
            return paddle.flatten(x, start_axis=2)

        # 1. Find matched bounding box by prior box.
        #   1.1 Compute IOU similarity between ground-truth boxes and prior boxes.
        #   1.2 Compute matched bounding box by bipartite matching algorithm.
        matched_indices = []
        matched_dist = []
        for i in range(gt_box.shape[0]):
            iou = iou_similarity(gt_box[i], prior_boxes)
            matched_indice, matched_d = bipartite_match(
                iou, self.match_type, self.overlap_threshold)
            matched_indices.append(matched_indice)
            matched_dist.append(matched_d)
        matched_indices = paddle.concat(matched_indices, axis=0)
        matched_indices.stop_gradient = True
        matched_dist = paddle.concat(matched_dist, axis=0)
        matched_dist.stop_gradient = True

        # 2. Compute confidence for mining hard examples
        # 2.1. Get the target label based on matched indices
        target_label, _ = self._label_target_assign(gt_label, matched_indices)
        confidence = _reshape_to_2d(scores)
        # 2.2. Compute confidence loss.
        # Reshape confidence to 2D tensor.
        target_label = _reshape_to_2d(target_label).astype('int64')
        conf_loss = F.softmax_with_cross_entropy(confidence, target_label)
        conf_loss = paddle.reshape(conf_loss, [batch_size, num_priors])

        # 3. Mining hard examples
        neg_mask = self._mine_hard_example(conf_loss,
                                           matched_indices,
                                           matched_dist,
                                           neg_pos_ratio=self.neg_pos_ratio,
                                           neg_overlap=self.neg_overlap)

        # 4. Assign classification and regression targets
        # 4.1. Encoded bbox according to the prior boxes.
        prior_box_var = paddle.to_tensor(
            np.array([0.1, 0.1, 0.2, 0.2],
                     dtype='float32')).reshape([1, 4]).expand_as(prior_boxes)
        encoded_bbox = []
        for i in range(gt_box.shape[0]):
            encoded_bbox.append(
                box_coder(prior_box=prior_boxes,
                          prior_box_var=prior_box_var,
                          target_box=gt_box[i],
                          code_type='encode_center_size'))
        encoded_bbox = paddle.stack(encoded_bbox, axis=0)
        # 4.2. Assign regression targets
        target_bbox, target_loc_weight = self._bbox_target_assign(
            encoded_bbox, matched_indices)
        # 4.3. Assign classification targets
        target_label, target_conf_weight = self._label_target_assign(
            gt_label, matched_indices, neg_mask=neg_mask)

        # 5. Compute loss.
        # 5.1 Compute confidence loss.
        target_label = _reshape_to_2d(target_label).astype('int64')
        conf_loss = F.softmax_with_cross_entropy(confidence, target_label)

        target_conf_weight = _reshape_to_2d(target_conf_weight)
        conf_loss = conf_loss * target_conf_weight * self.conf_loss_weight

        # 5.2 Compute regression loss.
        location = _reshape_to_2d(boxes)
        target_bbox = _reshape_to_2d(target_bbox)

        loc_loss = F.smooth_l1_loss(location, target_bbox, reduction='none')
        loc_loss = paddle.sum(loc_loss, axis=-1, keepdim=True)
        target_loc_weight = _reshape_to_2d(target_loc_weight)
        loc_loss = loc_loss * target_loc_weight * self.loc_loss_weight

        # 5.3 Compute overall weighted loss.
        loss = conf_loss + loc_loss
        loss = paddle.reshape(loss, [batch_size, num_priors])
        loss = paddle.sum(loss, axis=1, keepdim=True)
        normalizer = paddle.sum(target_loc_weight)
        loss = paddle.sum(loss / normalizer)

        return loss
Example #14
0
def compute_res_loss(output, target):
    # return F.smooth_l1_loss(output, target, reduction='elementwise_mean')
    return F.smooth_l1_loss(output, target, reduction='mean')