コード例 #1
0
    def forward(self, out: Dict[str, torch.tensor],
                inp: Dict[str, torch.tensor]) -> Dict[str, torch.tensor]:

        annot = inp['annot']
        att_box = out['att_out']
        reg_box = out['bbx_out']
        feat_sizes = out['feat_sizes']
        num_f_out = out['num_f_out']

        device = att_box.device

        if len(num_f_out) > 1:
            num_f_out = int(num_f_out[0].item())
        else:
            num_f_out = int(num_f_out.item())

        feat_sizes = feat_sizes[:num_f_out, :]

        if self.anchs is None:
            feat_sizes = feat_sizes[:num_f_out, :]
            anchs = self.get_anchors(feat_sizes)
            anchs = anchs.to(device)
            self.anchs = anchs
        else:
            anchs = self.anchs

        att_box_sigmoid = torch.sigmoid(att_box).squeeze(-1)
        att_box_best, att_box_best_ids = att_box_sigmoid.max(1)
        # self.att_box_best = att_box_best

        ious1 = IoU_values(annot, anchs)
        gt_mask, expected_best_ids = ious1.max(1)

        actual_bbox = reg_params_to_bbox(anchs, reg_box)

        best_possible_result, _ = self.get_eval_result(actual_bbox, annot,
                                                       expected_best_ids)

        msk = None
        actual_result, pred_boxes = self.get_eval_result(
            actual_bbox, annot, att_box_best_ids, msk)

        out_dict = {}
        out_dict['Acc'] = actual_result
        out_dict['MaxPos'] = best_possible_result
        out_dict['idxs'] = inp['idxs']

        reshaped_boxes = x1y1x2y2_to_y1x1y2x2(
            reshape((pred_boxes + 1) / 2, (inp['img_size'])))
        out_dict['pred_boxes'] = reshaped_boxes
        out_dict['pred_scores'] = att_box_best
        # orig_annot = inp['orig_annot']
        # Sanity check
        # iou1 = (torch.diag(IoU_values(reshaped_boxes, orig_annot))
        #         >= self.acc_iou_threshold).float().mean()
        # assert actual_result.item() == iou1.item()
        return out_dict
コード例 #2
0
    def forward(self, out: Dict[str, torch.tensor],
                inp: Dict[str, torch.tensor]) -> Dict[str, torch.tensor]:
        """
        inp: att_box, reg_box, feat_sizes
        annot: gt box (r1c1r2c2 form)
        """
        annot = inp['annot']
        att_box = out['att_out']
        reg_box = out['bbx_out']
        feat_sizes = out['feat_sizes']
        num_f_out = out['num_f_out']
        att_maps = out['att_maps']

        if self.use_att_loss:
            # self.loss_keys.append('att_ls')
            iou_annot_stage_0 = inp['iou_annot_stage_0']
            iou_annot_stage_1 = inp['iou_annot_stage_1']
            iou_annot_stage_2 = inp['iou_annot_stage_2']
            att_loss = self.att_losses(
                att_maps[0], iou_annot_stage_0) + self.att_losses(
                    att_maps[1], iou_annot_stage_1) + self.att_losses(
                        att_maps[2], iou_annot_stage_2)
            att_loss = att_loss / 3.
        else:
            att_loss = torch.zeros([1]).to(att_box.device)
        device = att_box.device

        # get the correct number of output features
        # in the case of DataParallel
        if len(num_f_out) > 1:
            num_f_out = int(num_f_out[0].item())
        else:
            num_f_out = int(num_f_out.item())

        # Computes Anchors only once since size is kept fixed
        # Needs to be changed in case size is not fixed
        if self.anchs is None:
            feat_sizes = feat_sizes[:num_f_out, :]
            anchs = self.get_anchors(feat_sizes)
            anchs = anchs.to(device)
            self.anchs = anchs
        else:
            anchs = self.anchs
        matches = simple_match_anchors(
            anchs, annot, match_thr=self.cfg['matching_threshold'])
        #print(matches)
        bbx_mask = (matches >= 0)
        ious1 = IoU_values(annot, anchs)
        _, msk = ious1.max(1)

        bbx_mask2 = torch.eye(anchs.size(0))[msk]
        bbx_mask2 = bbx_mask2 > 0
        bbx_mask2 = bbx_mask2.to(device)
        top1_mask = bbx_mask2

        if not self.use_multi:
            bbx_mask = bbx_mask2
        else:
            bbx_mask = bbx_mask | bbx_mask2

        # all clear
        gt_reg_params = bbox_to_reg_params(anchs, annot)
        box_l = self.box_loss(reg_box, gt_reg_params)
        # box_l_relv = box_l.sum(dim=2)[bbx_mask]
        box_l_relv = box_l.sum(dim=2) * bbx_mask.float()
        box_l_relv = box_l_relv.sum(dim=1) / bbx_mask.sum(dim=-1).float()
        box_loss = box_l_relv.mean()

        if box_loss.cpu() == torch.Tensor([float("Inf")]):
            # There is a likely bug with annot box
            # being very small
            import pdb
            pdb.set_trace()

        att_box = att_box.squeeze(-1)
        att_box_sigm = torch.sigmoid(att_box)

        if self.use_softmax:
            assert self.use_multi is False
            gt_ids = msk
            clas_loss = F.cross_entropy(att_box, gt_ids, reduction='none')
        else:
            if self.use_focal:
                encoded_tgt = bbx_mask.float()
                ps = att_box_sigm
                weights = encoded_tgt * (1 - ps) + (1 - encoded_tgt) * ps
                alphas = ((1 - encoded_tgt) * self.alpha + encoded_tgt *
                          (1 - self.alpha))
                weights.pow_(self.gamma).mul_(alphas)
                weights = weights.detach()
            else:
                weights = None

            #print(att_box, att_box.shape, bbx_mask, bbx_mask.shape, weights, weights.shape)
            clas_loss = F.binary_cross_entropy_with_logits(att_box,
                                                           bbx_mask.float(),
                                                           weight=weights,
                                                           reduction='none')

        clas_loss = clas_loss.sum() / bbx_mask.sum()
        # clas_loss = clas_loss.sum() / clas_loss.size(0)

        if torch.isnan(box_loss) or torch.isnan(clas_loss):
            # print('Nan Loss')
            box_loss = box_loss.new_ones(box_loss.shape) * 0.01
            box_loss.requires_grad = True
            clas_loss = clas_loss.new_ones(clas_loss.shape)
            clas_loss.requires_grad = True

        if self.use_att_loss:
            out_loss = self.lamb_reg * box_loss + clas_loss + att_loss
        else:
            out_loss = self.lamb_reg * box_loss + clas_loss
        # + self.lamb_rel * rel_loss
        out_dict = {}
        out_dict['loss'] = out_loss
        out_dict['cls_ls'] = clas_loss
        out_dict['box_ls'] = box_loss
        if self.use_att_loss:
            out_dict['att_ls'] = att_loss
        # out_dict['rel_ls'] = rel_loss

        return out_dict
コード例 #3
0
    def forward(self, out: Dict[str, torch.tensor],
                inp: Dict[str, torch.tensor]) -> Dict[str, torch.tensor]:

        annot = inp['annot']
        att_box = out['att_out']

        reg_box = out['bbx_out']

        feat_sizes = out['feat_sizes']

        num_f_out = out['num_f_out']

        device = att_box.device

        if len(num_f_out) > 1:
            num_f_out = int(num_f_out[0].item())
        else:
            num_f_out = int(num_f_out.item())

        feat_sizes = feat_sizes[:num_f_out, :]

        if self.anchs is None:
            feat_sizes = feat_sizes[:num_f_out, :]
            anchs = self.get_anchors(feat_sizes)
            anchs = anchs.to(device)
            self.anchs = anchs
        else:
            anchs = self.anchs

        att_box_sigmoid = torch.sigmoid(att_box).squeeze(-1)
        att_box_best, att_box_best_ids = att_box_sigmoid.max(1)
        topk_box = torch.topk(att_box_sigmoid, k=100)  #added by rishabh
        att_box_best, att_box_best_ids = topk_box.values, topk_box.indices  #added by rishabh

        ious1 = IoU_values(annot, anchs)
        #print("-->iou:", ious1.shape)
        gt_mask, expected_best_ids = ious1.max(1)

        actual_bbox = reg_params_to_bbox(anchs, reg_box)

        #print("\n ->actual_bbox", actual_bbox[0][att_box_best_ids[0].item()])
        best_possible_result, _ = self.get_eval_result(actual_bbox, annot,
                                                       expected_best_ids)

        #print("\n--> att_box_best_ids:", att_box_best_ids)

        msk = None

        #'''
        pred_box = self.get_eval_result(actual_bbox, annot,
                                        att_box_best_ids[:, 0], msk)[1]

        top_boxes = x1y1x2y2_to_y1x1y2x2(
            reshape((pred_box + 1) / 2, (inp['img_size'])))

        top_scores = att_box_best[:, 0]
        for i in range(1, 100, 1):
            #Rishabh--Break if att_box_best[:,i] < 0.5
            if att_box_best[:, i] < 0.45:
                break

            actual_result, pred_boxes = self.get_eval_result(
                actual_bbox, annot, att_box_best_ids[:, i], msk)
            #'''

            #actual_result, pred_boxes = self.get_eval_result(
            #     actual_bbox, annot, att_box_best_ids, msk)

            #print("\n-> pred box", att_box_best_ids, pred_boxes)
            '''
            print("R-------In Eevaluator.py",
                "\n annot", annot,
                "\n att_box", att_box, att_box.shape,
                "\n reg_box", reg_box, reg_box.shape,
                "\n anchs", anchs, anchs.shape,
                "\n actual_result", actual_result, 
                "\n pred_boxes", pred_boxes, 
                "\n best_possible_result", best_possible_result,
                "\n actual_bbox", actual_bbox, actual_bbox.shape,
                "\n att_box_best", att_box_best)
            '''
            #ris
            out_dict = {}
            out_dict['Acc'] = actual_result
            out_dict['MaxPos'] = best_possible_result
            out_dict['idxs'] = inp['idxs']

            iou_sc = IoU_values(pred_boxes, anchs)
            reshaped_boxes = x1y1x2y2_to_y1x1y2x2(
                reshape((pred_boxes + 1) / 2, (inp['img_size'])))

            out_dict['pred_boxes'] = reshaped_boxes
            out_dict['pred_scores'] = att_box_best

            #print("\n\n --> {}-th reshaped_box \n score:{} and \n box:{} iou:{}".format(i, att_box_best[:,i],reshaped_boxes, iou_sc.max(1)))
            #print(IoU_values(top_boxes, reshaped_boxes) < 0.5)
            if not False in (IoU_values(top_boxes, reshaped_boxes) < 0.5):
                top_boxes = torch.cat([top_boxes, reshaped_boxes], axis=0)
                top_scores = torch.cat([top_scores, att_box_best[:, i]],
                                       axis=0)
            #print("\n\n --> {}-th reshaped_box \n score:{} and \n box:{} iou:{}".format(i, att_box_best,reshaped_boxes, iou_sc.max(1)))
            # orig_annot = inp['orig_annot']
            # Sanity check
            # iou1 = (torch.diag(IoU_values(reshaped_boxes, orig_annot))
            #         >= self.acc_iou_threshold).float().mean()
            # assert actual_result.item() == iou1.item()

        print("Best bounding boxes--------->\n\n", top_boxes, top_scores)
        import cv2
        import pandas as pd

        #Written by Kritika
        test_dat = pd.read_csv("data/referit/csv_dir/test.csv")
        img_count = 0
        for ind in range(len(test_dat)):
            bb_data = top_boxes.cpu().numpy(
            )  #genfromtxt('bb.csv', delimiter=',')
            for box_num in range(0, bb_data.shape[0]):
                filename = test_dat.iloc[ind]['img_id']
                #print(filename)
                #Kritika
                img_path = r'input/{}'.format('img1.jpeg')
                #img_path=r'data/referit/saiapr_tc12_images/{}'.format('imageSend1.jpeg')
                #img_path=r'data/referit/saiapr_tc12_images/{}'.format(filename)
                img = cv2.imread(img_path)
                img = cv2.cvtColor(img, 0)
                #img = cv2.rectangle(img,(173, 182), (448, 360),(0,255,0),2)
                #pred_box = test_dat.iloc[ind]['pred_box']
                x1 = round(bb_data[box_num, 0])
                y1 = round(bb_data[box_num, 1])
                x2 = round(bb_data[box_num, 2])
                y2 = round(bb_data[box_num, 3])
                img = cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 20)
                img_count = img_count + 1
                nameToRename = "output/imgR" + str(img_count) + '.jpeg'
                cv2.imwrite(nameToRename, img)
                p = subprocess.Popen(["display", nameToRename])
                if bb_data.shape[0] > 1:
                    found_obj = input("Do you mean this?: ")
                    '''
                    r = sr.Recognizer()
                    with sr.Microphone() as source:
                        print("Say something!")
                        audio = r.listen(source)

                    # Speech recognition using Google Speech Recognition
                    try:
                        # for testing purposes, we're just using the default API key
                        # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
                        # instead of `r.recognize_google(audio)`
                        #print("You said: " + r.recognize_google(audio))
                        found_obj = r.recognize_google(audio)
                        print(found_obj)
                    except sr.UnknownValueError:
                        print("Google Speech Recognition could not understand audio")
                    except sr.RequestError as e:
                        print("Could not request results from Google Speech Recognition service; {0}".format(e))
                        '''
                    p.kill()
                    if "y" == found_obj[0].lower():
                        break
                    else:
                        continue
                else:
                    print("Thank you for your confirmation")
        #print(top_boxes.cpu().numpy())
        #np.savetxt("bb.csv", top_boxes.cpu().numpy(), delimiter=",")

        return out_dict