def forward(self, out: Dict[str, torch.tensor], inp: Dict[str, torch.tensor]) -> Dict[str, torch.tensor]: annot = inp['annot'] att_box = out['att_out'] reg_box = out['bbx_out'] feat_sizes = out['feat_sizes'] num_f_out = out['num_f_out'] device = att_box.device if len(num_f_out) > 1: num_f_out = int(num_f_out[0].item()) else: num_f_out = int(num_f_out.item()) feat_sizes = feat_sizes[:num_f_out, :] if self.anchs is None: feat_sizes = feat_sizes[:num_f_out, :] anchs = self.get_anchors(feat_sizes) anchs = anchs.to(device) self.anchs = anchs else: anchs = self.anchs att_box_sigmoid = torch.sigmoid(att_box).squeeze(-1) att_box_best, att_box_best_ids = att_box_sigmoid.max(1) # self.att_box_best = att_box_best ious1 = IoU_values(annot, anchs) gt_mask, expected_best_ids = ious1.max(1) actual_bbox = reg_params_to_bbox(anchs, reg_box) best_possible_result, _ = self.get_eval_result(actual_bbox, annot, expected_best_ids) msk = None actual_result, pred_boxes = self.get_eval_result( actual_bbox, annot, att_box_best_ids, msk) out_dict = {} out_dict['Acc'] = actual_result out_dict['MaxPos'] = best_possible_result out_dict['idxs'] = inp['idxs'] reshaped_boxes = x1y1x2y2_to_y1x1y2x2( reshape((pred_boxes + 1) / 2, (inp['img_size']))) out_dict['pred_boxes'] = reshaped_boxes out_dict['pred_scores'] = att_box_best # orig_annot = inp['orig_annot'] # Sanity check # iou1 = (torch.diag(IoU_values(reshaped_boxes, orig_annot)) # >= self.acc_iou_threshold).float().mean() # assert actual_result.item() == iou1.item() return out_dict
def forward(self, out: Dict[str, torch.tensor], inp: Dict[str, torch.tensor]) -> Dict[str, torch.tensor]: """ inp: att_box, reg_box, feat_sizes annot: gt box (r1c1r2c2 form) """ annot = inp['annot'] att_box = out['att_out'] reg_box = out['bbx_out'] feat_sizes = out['feat_sizes'] num_f_out = out['num_f_out'] att_maps = out['att_maps'] if self.use_att_loss: # self.loss_keys.append('att_ls') iou_annot_stage_0 = inp['iou_annot_stage_0'] iou_annot_stage_1 = inp['iou_annot_stage_1'] iou_annot_stage_2 = inp['iou_annot_stage_2'] att_loss = self.att_losses( att_maps[0], iou_annot_stage_0) + self.att_losses( att_maps[1], iou_annot_stage_1) + self.att_losses( att_maps[2], iou_annot_stage_2) att_loss = att_loss / 3. else: att_loss = torch.zeros([1]).to(att_box.device) device = att_box.device # get the correct number of output features # in the case of DataParallel if len(num_f_out) > 1: num_f_out = int(num_f_out[0].item()) else: num_f_out = int(num_f_out.item()) # Computes Anchors only once since size is kept fixed # Needs to be changed in case size is not fixed if self.anchs is None: feat_sizes = feat_sizes[:num_f_out, :] anchs = self.get_anchors(feat_sizes) anchs = anchs.to(device) self.anchs = anchs else: anchs = self.anchs matches = simple_match_anchors( anchs, annot, match_thr=self.cfg['matching_threshold']) #print(matches) bbx_mask = (matches >= 0) ious1 = IoU_values(annot, anchs) _, msk = ious1.max(1) bbx_mask2 = torch.eye(anchs.size(0))[msk] bbx_mask2 = bbx_mask2 > 0 bbx_mask2 = bbx_mask2.to(device) top1_mask = bbx_mask2 if not self.use_multi: bbx_mask = bbx_mask2 else: bbx_mask = bbx_mask | bbx_mask2 # all clear gt_reg_params = bbox_to_reg_params(anchs, annot) box_l = self.box_loss(reg_box, gt_reg_params) # box_l_relv = box_l.sum(dim=2)[bbx_mask] box_l_relv = box_l.sum(dim=2) * bbx_mask.float() box_l_relv = box_l_relv.sum(dim=1) / bbx_mask.sum(dim=-1).float() box_loss = box_l_relv.mean() if box_loss.cpu() == torch.Tensor([float("Inf")]): # There is a likely bug with annot box # being very small import pdb pdb.set_trace() att_box = att_box.squeeze(-1) att_box_sigm = torch.sigmoid(att_box) if self.use_softmax: assert self.use_multi is False gt_ids = msk clas_loss = F.cross_entropy(att_box, gt_ids, reduction='none') else: if self.use_focal: encoded_tgt = bbx_mask.float() ps = att_box_sigm weights = encoded_tgt * (1 - ps) + (1 - encoded_tgt) * ps alphas = ((1 - encoded_tgt) * self.alpha + encoded_tgt * (1 - self.alpha)) weights.pow_(self.gamma).mul_(alphas) weights = weights.detach() else: weights = None #print(att_box, att_box.shape, bbx_mask, bbx_mask.shape, weights, weights.shape) clas_loss = F.binary_cross_entropy_with_logits(att_box, bbx_mask.float(), weight=weights, reduction='none') clas_loss = clas_loss.sum() / bbx_mask.sum() # clas_loss = clas_loss.sum() / clas_loss.size(0) if torch.isnan(box_loss) or torch.isnan(clas_loss): # print('Nan Loss') box_loss = box_loss.new_ones(box_loss.shape) * 0.01 box_loss.requires_grad = True clas_loss = clas_loss.new_ones(clas_loss.shape) clas_loss.requires_grad = True if self.use_att_loss: out_loss = self.lamb_reg * box_loss + clas_loss + att_loss else: out_loss = self.lamb_reg * box_loss + clas_loss # + self.lamb_rel * rel_loss out_dict = {} out_dict['loss'] = out_loss out_dict['cls_ls'] = clas_loss out_dict['box_ls'] = box_loss if self.use_att_loss: out_dict['att_ls'] = att_loss # out_dict['rel_ls'] = rel_loss return out_dict
def forward(self, out: Dict[str, torch.tensor], inp: Dict[str, torch.tensor]) -> Dict[str, torch.tensor]: annot = inp['annot'] att_box = out['att_out'] reg_box = out['bbx_out'] feat_sizes = out['feat_sizes'] num_f_out = out['num_f_out'] device = att_box.device if len(num_f_out) > 1: num_f_out = int(num_f_out[0].item()) else: num_f_out = int(num_f_out.item()) feat_sizes = feat_sizes[:num_f_out, :] if self.anchs is None: feat_sizes = feat_sizes[:num_f_out, :] anchs = self.get_anchors(feat_sizes) anchs = anchs.to(device) self.anchs = anchs else: anchs = self.anchs att_box_sigmoid = torch.sigmoid(att_box).squeeze(-1) att_box_best, att_box_best_ids = att_box_sigmoid.max(1) topk_box = torch.topk(att_box_sigmoid, k=100) #added by rishabh att_box_best, att_box_best_ids = topk_box.values, topk_box.indices #added by rishabh ious1 = IoU_values(annot, anchs) #print("-->iou:", ious1.shape) gt_mask, expected_best_ids = ious1.max(1) actual_bbox = reg_params_to_bbox(anchs, reg_box) #print("\n ->actual_bbox", actual_bbox[0][att_box_best_ids[0].item()]) best_possible_result, _ = self.get_eval_result(actual_bbox, annot, expected_best_ids) #print("\n--> att_box_best_ids:", att_box_best_ids) msk = None #''' pred_box = self.get_eval_result(actual_bbox, annot, att_box_best_ids[:, 0], msk)[1] top_boxes = x1y1x2y2_to_y1x1y2x2( reshape((pred_box + 1) / 2, (inp['img_size']))) top_scores = att_box_best[:, 0] for i in range(1, 100, 1): #Rishabh--Break if att_box_best[:,i] < 0.5 if att_box_best[:, i] < 0.45: break actual_result, pred_boxes = self.get_eval_result( actual_bbox, annot, att_box_best_ids[:, i], msk) #''' #actual_result, pred_boxes = self.get_eval_result( # actual_bbox, annot, att_box_best_ids, msk) #print("\n-> pred box", att_box_best_ids, pred_boxes) ''' print("R-------In Eevaluator.py", "\n annot", annot, "\n att_box", att_box, att_box.shape, "\n reg_box", reg_box, reg_box.shape, "\n anchs", anchs, anchs.shape, "\n actual_result", actual_result, "\n pred_boxes", pred_boxes, "\n best_possible_result", best_possible_result, "\n actual_bbox", actual_bbox, actual_bbox.shape, "\n att_box_best", att_box_best) ''' #ris out_dict = {} out_dict['Acc'] = actual_result out_dict['MaxPos'] = best_possible_result out_dict['idxs'] = inp['idxs'] iou_sc = IoU_values(pred_boxes, anchs) reshaped_boxes = x1y1x2y2_to_y1x1y2x2( reshape((pred_boxes + 1) / 2, (inp['img_size']))) out_dict['pred_boxes'] = reshaped_boxes out_dict['pred_scores'] = att_box_best #print("\n\n --> {}-th reshaped_box \n score:{} and \n box:{} iou:{}".format(i, att_box_best[:,i],reshaped_boxes, iou_sc.max(1))) #print(IoU_values(top_boxes, reshaped_boxes) < 0.5) if not False in (IoU_values(top_boxes, reshaped_boxes) < 0.5): top_boxes = torch.cat([top_boxes, reshaped_boxes], axis=0) top_scores = torch.cat([top_scores, att_box_best[:, i]], axis=0) #print("\n\n --> {}-th reshaped_box \n score:{} and \n box:{} iou:{}".format(i, att_box_best,reshaped_boxes, iou_sc.max(1))) # orig_annot = inp['orig_annot'] # Sanity check # iou1 = (torch.diag(IoU_values(reshaped_boxes, orig_annot)) # >= self.acc_iou_threshold).float().mean() # assert actual_result.item() == iou1.item() print("Best bounding boxes--------->\n\n", top_boxes, top_scores) import cv2 import pandas as pd #Written by Kritika test_dat = pd.read_csv("data/referit/csv_dir/test.csv") img_count = 0 for ind in range(len(test_dat)): bb_data = top_boxes.cpu().numpy( ) #genfromtxt('bb.csv', delimiter=',') for box_num in range(0, bb_data.shape[0]): filename = test_dat.iloc[ind]['img_id'] #print(filename) #Kritika img_path = r'input/{}'.format('img1.jpeg') #img_path=r'data/referit/saiapr_tc12_images/{}'.format('imageSend1.jpeg') #img_path=r'data/referit/saiapr_tc12_images/{}'.format(filename) img = cv2.imread(img_path) img = cv2.cvtColor(img, 0) #img = cv2.rectangle(img,(173, 182), (448, 360),(0,255,0),2) #pred_box = test_dat.iloc[ind]['pred_box'] x1 = round(bb_data[box_num, 0]) y1 = round(bb_data[box_num, 1]) x2 = round(bb_data[box_num, 2]) y2 = round(bb_data[box_num, 3]) img = cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 20) img_count = img_count + 1 nameToRename = "output/imgR" + str(img_count) + '.jpeg' cv2.imwrite(nameToRename, img) p = subprocess.Popen(["display", nameToRename]) if bb_data.shape[0] > 1: found_obj = input("Do you mean this?: ") ''' r = sr.Recognizer() with sr.Microphone() as source: print("Say something!") audio = r.listen(source) # Speech recognition using Google Speech Recognition try: # for testing purposes, we're just using the default API key # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")` # instead of `r.recognize_google(audio)` #print("You said: " + r.recognize_google(audio)) found_obj = r.recognize_google(audio) print(found_obj) except sr.UnknownValueError: print("Google Speech Recognition could not understand audio") except sr.RequestError as e: print("Could not request results from Google Speech Recognition service; {0}".format(e)) ''' p.kill() if "y" == found_obj[0].lower(): break else: continue else: print("Thank you for your confirmation") #print(top_boxes.cpu().numpy()) #np.savetxt("bb.csv", top_boxes.cpu().numpy(), delimiter=",") return out_dict