def extract_regions(self, features, boxes, indices): regions = [] # De-Normalize to the feature map size multiplier = torch.tensor([self.Wp, self.Hp, self.Wp, self.Hp]).view( (4, 1)) boxes = ((boxes * multiplier).round().type(torch.int32) ) # shape (4, p) (x1y1x2y2) # Clip boxes that are out of range boxes = ops.clip_boxes_to_image(boxes.T, (self.Hp, self.Wp)).T # Remove tiny boxes boxes, indices = self.remove_tiny_boxes(boxes, min_side=(self.Wp + self.Hp) // 20, idx=indices) for index in range(len(indices)): idx = boxes[:, index] x_min, x_max = idx[0], idx[2] y_min, y_max = idx[1], idx[3] # print(f'{x_min}, {x_max} | {y_min}, {y_max}') cropped = features[:, x_min:x_max + 1, y_min:y_max + 1] cropped = F.interpolate(cropped.view((1, *cropped.shape)), (self.X, self.Y), mode="bilinear")[0] regions.append(cropped) regions = torch.stack(regions) if len(regions) > 0 else torch.empty(0) return regions
def _first_stage(self, imgs: torch.Tensor): with EvalScope(self.pNet): _, c, h, w = imgs.shape scale = 12.0 / self.minSize # This is initial scale min_l = min(h, w) b, s, i = [], [], [] while min_l * scale >= 12.: imgs = _nnf.interpolate(imgs, size=[int(h * scale), int(w * scale)], mode='area') reg, pro = self.pNet(imgs) pro = pro[:, 1] strd = 2. / scale cell = 12. / scale msk = torch.ge(pro, self.pNetThreshold) # b, h, w if msk.any(): indices = msk.nonzero() # n, 3 <- (i, y, x) idx, r, c = indices[:, 0], indices[:, 1], indices[:, 2] pro = pro[msk] reg = reg.permute(0, 2, 3, 1) # b, h, w, c <- (x1^, y1^, x2^, y2^) reg = reg[msk] x1, y1 = c * strd, r * strd x2, y2 = x1 + cell, y1 + cell bbs = torch.dstack([x1, y1, x2, y2]).squeeze(0) bbs = self._bb_reg(bbs, reg) nms_idx = batched_nms(bbs, pro, idx, self.nmsThreshold) b.append(bbs[nms_idx]) s.append(pro[nms_idx]) i.append(idx[nms_idx]) scale = scale * self.factor if len(b) > 0: b = torch.cat(b, dim=0) s = torch.cat(s, dim=0) i = torch.cat(i, dim=0) nms_idx = batched_nms(b, s, i, self.nmsThreshold) b = clip_boxes_to_image(b[nms_idx], size=(w, h)).int() i = i[nms_idx] return b, i else: return None
def __getitem__(self, index): img, target = tools.load_img_target(self, index) img_info = self.coco.loadImgs(self.ids[index])[0] iw, ih = img_info['width'], img_info['height'] class_labels, bbox_labels, mask_labels = [], [], [] for obj in target: if not tools.is_correct_instance(obj, self.cat_idx_list, iw, ih): continue class_labels.append(self.cat_to_label_map[obj['category_id']]) bbox_labels.append(obj['bbox']) # rle = coco_mask.frPyObjects(obj['segmentation'], ih, iw) # if obj['iscrowd'] == 0: # rle = coco_mask.merge(rle) # mask = coco_mask.decode(rle) # mask_labels.append(mask) transformed = self.img_transform(image=img, bboxes=bbox_labels, class_labels=class_labels) # transformed = self.img_transform(image=img, masks=mask_labels, bboxes=bbox_labels, class_labels=class_labels) img = tools.TENSOR_TRANSFORM(transformed['image']) # mask_labels = transformed['masks'] class_labels = transformed['class_labels'] bbox_labels = transformed['bboxes'] if len(bbox_labels) == 0: # For any instance with classification label 0 (background), only classification loss will be computed, without mask loss, centerness loss and bbox loss. # When there is no instances in an image, it doesn't matter the value of the added bbox. mask_labels = [np.zeros((self.h, self.w))] bbox_labels = [[0., 0., 10., 10.]] class_labels = [0] class_labels = torch.as_tensor(class_labels) # instance_mask_labels = self._generate_instance_mask_labels(mask_labels, bbox_labels) # instance_mask_labels = torch.as_tensor(np.array(instance_mask_labels)).float() bbox_labels = cv_ops.box_convert(torch.as_tensor(bbox_labels, dtype=torch.float32), in_fmt='xywh', out_fmt='xyxy') bbox_labels = cv_ops.clip_boxes_to_image(bbox_labels, (ih, iw)) class_targets, distance_targets = self._encode_targets( class_labels, bbox_labels, None) centerness_targets = tools.encode_centerness_targets(distance_targets) return img, self.points, { 'class': class_targets, 'distance': distance_targets, 'centerness': centerness_targets }
def update(self, img: ImageT) -> np.ndarray: self.model.eval() side_size = int(round(self.curr_instance_side_size)) bbox = BBox.build_from_center_and_size( self.target_bbox.center, np.asarray((side_size, side_size))) instance_img = center_crop_and_resize( img, bbox, (self.cfg.instance_size, self.cfg.instance_size)) if self.on_instance_img_extract: self.on_instance_img_extract(instance_img) instance_img = pil_to_tensor(instance_img).to(self.device) pred_reg, pred_cls = self.model.inference(instance_img, self.kernel_reg, self.kernel_cls) pred_reg = pred_reg.squeeze() pred_cls = pred_cls.squeeze() pred_cls = F.softmax(pred_cls, dim=1) pred_cls_max = pred_cls.argmax(dim=1) # TODO Store the range somewhere as it may be faster. scores = pred_cls[list(range(len(pred_cls))), pred_cls_max] scores[pred_cls_max == 0] = 0 # The 0-th position is the background. # TODO Think of modifying the regression predictions in place. xy_vals = pred_reg[:, :2] * self.anchors[:, 2:] + self.anchors[:, :2] wh_vals = torch.exp(pred_reg[:, 2:]) * self.anchors[:, 2:] boxes = torch.hstack((xy_vals, wh_vals)) boxes = ops.box_convert(boxes, 'cxcywh', 'xyxy') boxes = ops.clip_boxes_to_image( boxes, (self.cfg.instance_size, self.cfg.instance_size)) response = (1 - self.cfg.cosine_win_influence) * response + \ self.cfg.cosine_win_influence * self.cosine_win # The assumption is that the peak response value is in the center of the # response map. Thus, we compute the change with respect to the center # and convert it back to the pixel coordinates in the image. peak_response_pos = np.asarray( np.unravel_index(response.argmax(), response.shape)) # Update target scale. self.curr_instance_side_size *= new_scale # Change from [row, col] to [x, y] coordinates. self.target_bbox.shift(disp_in_image[::-1]) self.target_bbox.rescale(new_scale, new_scale) return self.target_bbox.as_xywh()
def __getitem__(self, idx: int): img = torch.rand(self.img_shape) boxes = torch.tensor( [self._random_bbox() for _ in range(self.num_boxes)], dtype=torch.float32) boxes = ops.clip_boxes_to_image(boxes, (self.img_shape[1], self.img_shape[2])) # No problems if we pass same in_fmt and out_fmt, it is covered by box_convert converted_boxes = ops.box_convert(boxes, in_fmt="xyxy", out_fmt=self.box_fmt) labels = torch.randint(self.num_classes, (self.num_boxes, ), dtype=torch.long) return img, {"boxes": converted_boxes, "labels": labels}
def multi_dim_boxes_clip(boxes: Union[Tensor, np.ndarray], box_range: Tuple[int, int]) \ -> Union[Tensor, np.ndarray]: """ A multi-dim wrapper of `torchvision.ops.clip_boxes_to_image`. Args: boxes: boxes which will be clipped. The original size could be any format matching :math:`(*shape, 4)`. box_range: The width and height used to clip the boxes. Returns: new_boxes: boxes clipped to match the `box_range`. """ new_boxes = torch.from_numpy(boxes).clone() if isinstance( boxes, np.ndarray) else boxes.clone() ori_size = new_boxes.size() new_boxes = clip_boxes_to_image(new_boxes.reshape(-1, 4), box_range[::-1]).reshape(ori_size) return new_boxes.numpy() if isinstance(boxes, np.ndarray) else new_boxes
def __getitem__(self, idx: int): h = random.randint(self.im_size_min, self.im_size_max) w = random.randint(self.im_size_min, self.im_size_max) img_shape = (3, h, w) img = torch.rand(img_shape) num_boxes = random.randint(1, self.num_boxes_max) labels = torch.randint(self.class_start, self.class_end, (num_boxes, ), dtype=torch.long) boxes = torch.tensor( [self._random_bbox(img_shape) for _ in range(num_boxes)], dtype=torch.float32) boxes = ops.clip_boxes_to_image(boxes, (h, w)) # No problems if we pass same in_fmt and out_fmt, it is covered by box_convert boxes = ops.box_convert(boxes, in_fmt="xyxy", out_fmt=self.box_fmt) if self.normalize: boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32) image_id = torch.tensor([idx]) return img, {"image_id": image_id, "boxes": boxes, "labels": labels}
def compare(self, expected, actual, image=None, classifier=None): if image is None: reproj_threshold = 10 else: h, w = image.shape[1:] reproj_threshold = min(h, w) * 0.01 if not len(actual['boxes']): return 0 if len(expected['boxes']) else 1 ge = expected['graph'] if 'graph' in expected else planograms.build_graph(expected['boxes'], expected['labels'], self.graph_threshold) ga = planograms.build_graph(actual['boxes'], actual['labels'], self.graph_threshold) matching = planograms.large_common_subgraph(ge, ga) # TODO: Possibility to use Tonioni if not len(matching): return 0 found, missing_indices, missing_positions, missing_labels = planograms.finalize_via_ransac( matching, expected['boxes'], actual['boxes'], expected['labels'], actual['labels'], reproj_threshold=reproj_threshold, ) if found is None: # --> couldn't calculate homography return len(matching) / len(expected['boxes']) if classifier is not None and image is not None and len(missing_positions): missing_positions = tvops.clip_boxes_to_image(missing_positions, image.shape[1:]) valid_positions = (missing_positions[:,2] - missing_positions[:,0] > 1) & (missing_positions[:,3] - missing_positions[:,1] > 1) if not valid_positions.any(): return found.sum() / len(found) # TODO: Also return which were actually missing missing_indices = missing_indices[valid_positions] missing_positions = missing_positions[valid_positions] missing_labels = [l for l, v in zip(missing_labels, valid_positions) if v] missing_imgs = torch.stack([datautils.resize_for_classification(image[:, y1:y2, x1:x2]) for x1, y1, x2, y2 in missing_positions.to(dtype=torch.long)]) reclass_labels = classifier.classify(missing_imgs) for idx, expected_label, actual_label in zip(missing_indices, missing_labels, reclass_labels): if expected_label == actual_label[0]: found[idx] = True return found.sum() / len(found) # TODO: Also return which were actually missing
def _third_stage( self, imgs: torch.Tensor, r_bbs: torch.Tensor, r_idxs: torch.Tensor ) -> Optional[Tuple[torch.Tensor, torch.Tensor, torch.Tensor]]: _imgs = self._gather_rois(imgs, r_bbs, r_idxs, 48) with EvalScope(self.oNet): reg, lmk, pro = self.oNet(_imgs) mask = torch.ge(pro[:, 1], self.oNetThreshold) if not mask.any(): return None reg = reg[mask] pro = pro[:, 1][mask] b = r_bbs[mask].type(torch.float32) i = r_idxs[mask] b = self._bb_reg(b, reg) j = batched_nms(b, pro, i, self.nmsThreshold) b = clip_boxes_to_image(b[j], size=imgs.shape[2:]).int() i = i[j] return b, i, lmk[j]
def eval_dihe(encoder, sampleset, testset, batch_size, num_workers, k=(1, ), verbose=True): if verbose: print('Preparing classifier...') encoder.requires_grad_(False) classifier = production.Classifier(encoder, sampleset, batch_size=batch_size, num_workers=num_workers, k=max(k)) total = 0 correct = {knn: 0 for knn in k} missed = {} misclassification = {} total_per_ann = {} if verbose: print('Eval start!') for i, (img, target_anns, boxes) in enumerate(testset): if verbose and i % 10 == 0: print(f'{i}...') boxes = tvops.clip_boxes_to_image(boxes, (img.shape[1], img.shape[2])) imgs = torch.stack([ datautils.resize_for_classification(img[:, y1:y2, x1:x2]) for x1, y1, x2, y2 in boxes ]) pred_anns = classifier.classify(imgs) total += len(target_anns) for a1, a2 in zip(target_anns, pred_anns): if a1 not in total_per_ann: total_per_ann[a1] = 0 total_per_ann[a1] += 1 for knn in k: if a1 in a2[:knn]: correct[knn] += 1 if a1 != a2[0]: if a1 not in missed: missed[a1] = 0 misclassification[a1] = {} if a2[0] not in misclassification[a1]: misclassification[a1][a2[0]] = 0 missed[a1] += 1 misclassification[a1][a2[0]] += 1 del classifier # maybe this will solve memory problems caused by eval? encoder.requires_grad_(True) accuracy = {knn: c / total for knn, c in correct.items()} if verbose: print( f'Total annotations: {total}, Correctly guessed: {correct}, Accuracy: {accuracy}' ) most_missed = sorted( ((v / total_per_ann[k], v, k) for k, v in missed.items()), reverse=True)[:10] print( f'Most missed: {", ".join(f"{a} ({n}, {p * 100} %)" for p, n, a in most_missed)}' ) for _, n, k in most_missed[:3]: common_misclassifications = sorted( ((v / n, v, k) for k, v in misclassification[k].items()), reverse=True)[:3] print( f'{k}: Commonly mistaken for {", ".join(f"{a} ({n}, {p * 100} %)" for p, n, a in common_misclassifications)}' ) return accuracy
def val_one_epoch(model, data_loader, coco_gt, dist_logger, epoch_idx, nms_cfg): pred_instances = [] nms_pre, cls_score_thr, iou_thr = nms_cfg['nms_pre'], nms_cfg[ 'cls_score_thr'], nms_cfg['iou_thr'] model.eval() processor = dist_logger.init_processor(data_loader) for img, data in processor: img = img.cuda(non_blocking=True) points = data['points'].cuda(non_blocking=True) img_info_list = coco_gt.loadImgs(data['img_id'].numpy()) class_pred, distance_pred, centerness_pred = model(img) class_pred = class_pred.sigmoid() # [B, num_points, num_classes] cls_pred_scores, cls_pred_indexes = class_pred.max( dim=-1) # [B, num_points] bbox_pred = bbox_ops.convert_distance_to_bbox( points, distance_pred) # [B, num_points, 4] centerness_pred = centerness_pred.sigmoid() # [B, num_points] batch_size, _, num_classes = class_pred.shape _, _, ih, iw = img.shape for batch_idx in range(batch_size): b_cls_pred_scores, b_cls_pred_indexes, b_centerness_pred = cls_pred_scores[ batch_idx], cls_pred_indexes[batch_idx], centerness_pred[ batch_idx] # [num_points] b_bbox_pred = bbox_pred[batch_idx, :] # [num_points, 4] _, top_idx = (b_cls_pred_scores * b_centerness_pred).topk( nms_pre) # [topk] top_class_pred_scores, top_class_pred_indexes, top_centerness_pred = b_cls_pred_scores[ top_idx], b_cls_pred_indexes[top_idx], b_centerness_pred[ top_idx] # [topk] nms_scores = top_class_pred_scores * top_centerness_pred # [topk] top_bbox_pred = b_bbox_pred[top_idx, :] # [topk, 4] top_bbox_pred = cv_ops.clip_boxes_to_image(top_bbox_pred, size=(ih, iw)) valid_mask = top_class_pred_scores > cls_score_thr valid_class_pred_scores, valid_class_pred_indexes, valid_nms_scores = top_class_pred_scores[ valid_mask], top_class_pred_indexes[valid_mask], nms_scores[ valid_mask] valid_bbox_pred = top_bbox_pred[valid_mask, :] keep_idx = cv_ops.batched_nms(valid_bbox_pred, valid_nms_scores, valid_class_pred_indexes, iou_thr) keep_class_pred_scores, keep_class_pred_indexes = valid_class_pred_scores[ keep_idx], valid_class_pred_indexes[keep_idx] keep_bbox_pred = valid_bbox_pred[keep_idx, :] oh, ow = img_info_list[batch_idx]['height'], img_info_list[ batch_idx]['width'] keep_bbox_pred = bbox_ops.recover_bboxes(keep_bbox_pred, oh, ow, ih, iw) keep_bbox_pred = cv_ops.box_convert(keep_bbox_pred, in_fmt='xyxy', out_fmt='xywh') for cls_score, cls_idx, bbox in zip(keep_class_pred_scores, keep_class_pred_indexes, keep_bbox_pred): pred_instances.append({ 'image_id': int(data['img_id'][batch_idx]), 'category_id': int(cls_idx) + 1, 'bbox': [float(str('%.1f' % coord)) for coord in bbox.tolist()], 'score': float(str('%.1f' % cls_score)) }) dist_logger.save_pred_instances_local_rank(pred_instances) dist_logger.save_val_file() dist_logger.update_tensorboard_val_results(coco_gt, epoch_idx)
def val_one_epoch(model, data_loader, coco_gt, dist_logger, epoch_idx, nms_cfg): pred_instances = [] nms_pre, cls_score_thr, iou_thr = nms_cfg['nms_pre'], nms_cfg['cls_score_thr'], nms_cfg['iou_thr'] _, _, label_to_cat_map = tools.get_cat_label_map(coco_gt, tools.COCO_CLASSES) # print(label_to_cat_map) model.eval() processor = tqdm.tqdm(data_loader, disable=not dist_logger.is_master_rank) for img, points, img_ids in processor: img = img.cuda(non_blocking=True) points = points.cuda(non_blocking=True) img_info_list = coco_gt.loadImgs(img_ids.numpy()) pred = model(img, points) class_pred = pred['class'].sigmoid() # [B, num_points, num_classes] centerness_pred = pred['centerness'].sigmoid() # [B, num_points] bbox_pred = bbox_ops.convert_distance_to_bbox(points, pred['distance']) # [B, num_points, 4] # instance_mask_pred = pred['instance_mask'].sigmoid() # [B, num_points, pooler_size, pooler_size] # print(class_pred.shape, centerness_pred.shape, bbox_pred.shape, instance_mask_pred.shape) # exit(-1) cls_pred_scores, cls_pred_indexes = class_pred.max(dim=-1) # [B, num_points] batch_size, _, num_classes = class_pred.shape _, _, ih, iw = img.shape for batch_idx in range(batch_size): b_cls_pred_scores = cls_pred_scores[batch_idx] b_cls_pred_indexes = cls_pred_indexes[batch_idx] b_centerness_pred = centerness_pred[batch_idx] b_bbox_pred = bbox_pred[batch_idx, :] # [num_points, 4] _, top_idx = (b_cls_pred_scores * b_centerness_pred).topk(nms_pre) top_class_pred_scores = b_cls_pred_scores[top_idx] top_class_pred_indexes = b_cls_pred_indexes[top_idx] top_centerness_pred = b_centerness_pred[top_idx] top_bbox_pred = b_bbox_pred[top_idx, :] # [topk, 4] nms_scores = top_class_pred_scores * top_centerness_pred top_bbox_pred = cv_ops.clip_boxes_to_image(top_bbox_pred, size=(ih, iw)) valid_mask = top_class_pred_scores > cls_score_thr valid_class_pred_scores = top_class_pred_scores[valid_mask] valid_class_pred_indexes = top_class_pred_indexes[valid_mask] valid_nms_scores = nms_scores[valid_mask] valid_bbox_pred = top_bbox_pred[valid_mask, :] keep_idx = cv_ops.batched_nms(valid_bbox_pred, valid_nms_scores, valid_class_pred_indexes, iou_thr) keep_class_pred_scores = valid_class_pred_scores[keep_idx] keep_class_pred_indexes = valid_class_pred_indexes[keep_idx] keep_bbox_pred = valid_bbox_pred[keep_idx, :] oh, ow = img_info_list[batch_idx]['height'], img_info_list[batch_idx]['width'] keep_bbox_pred = bbox_ops.recover_bboxes(keep_bbox_pred, oh, ow, ih, iw) keep_bbox_pred = cv_ops.box_convert(keep_bbox_pred, in_fmt='xyxy', out_fmt='xywh') for cls_score, cls_idx, bbox in zip(keep_class_pred_scores, keep_class_pred_indexes, keep_bbox_pred): # poly = coco_mask.frPyObjects(poly.permute(1, 0).reshape(1, -1).detach().cpu().double().numpy(), oh, ow) # rle = coco_mask.merge(poly) # rle['counts'] = rle['counts'].decode('utf-8') pred_instances.append({ 'image_id': int(img_ids[batch_idx]), 'category_id': label_to_cat_map[int(cls_idx) + 1], 'bbox': [float(str('%.1f' % coord)) for coord in bbox.tolist()], # 'segmentation': rle, 'score': float(str('%.1f' % cls_score)) }) dist_logger.save_pred_instances_local_rank(pred_instances) dist_logger.save_val_file() dist_logger.evaluate(coco_gt)
def plot_planogram_eval(img_dir, test_imgs, test_annotations, planos, datatype, load_classifier_index, plano_idx, gln_state, dihe_state): ''' Visualize planogram compliance evaluation steps. Does the proposed planogram compliance evaluation procedure step-by-step, plotting a visualization for each step. ''' if datatype == 'gp': planoset = datautils.PlanogramTestSet(test_imgs, test_annotations, planos) sampleset = datautils.GroceryProductsDataset(img_dir, include_annotations=True) rebuildset = datautils.GroceryProductsDataset(img_dir, include_annotations=True, resize=False) else: planoset = datautils.InternalPlanoSet(planos) sampleset = datautils.InternalTrainSet(img_dir[0], include_annotations=True) rebuildset = datautils.InternalTrainSet(img_dir[0], include_annotations=True, resize=False) proposal_generator = proposals_eval.load_gln(gln_state, False) proposal_generator.requires_grad_(False) encoder = classification.macvgg_embedder(model='vgg16', pretrained=False).cuda() enc_state = torch.load(dihe_state) encoder.load_state_dict( enc_state[classification_training.EMBEDDER_STATE_DICT_KEY]) encoder.eval() encoder.requires_grad_(False) del enc_state datum = planoset[plano_idx] if plano_idx is not None else random.choice( planoset) if datatype == 'gp': image, _, _, expected = datum else: image, expected = datum generator = production.ProposalGenerator(proposal_generator) classifier = production.Classifier(encoder, sampleset, batch_size=8, load=load_classifier_index) boxes, images = generator.generate_proposals_and_images(image) boxes = boxes.detach().cpu() classes = [ann[0] for ann in classifier.classify(images)] actual = {'boxes': boxes, 'labels': classes} h, w = image.shape[1:] reproj_threshold = min(h, w) * 0.01 maxy = boxes[:, 3].max().item() fig, (ax1, ax2) = plt.subplots(1, 2, figsize=( 12, 12)) if image.shape[2] < image.shape[1] else plt.subplots( 2, 1, figsize=(12, 12)) utils.build_fig(image, detections=tvops.box_convert(boxes, 'xyxy', 'xywh'), ax=ax1) utils.build_rebuild(boxes, classes, rebuildset, maxy, ax=ax2) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=( 12, 12)) if image.shape[2] < image.shape[1] else plt.subplots( 2, 1, figsize=(12, 12)) ge = expected['graph'] if 'graph' in expected else planograms.build_graph( expected['boxes'], expected['labels'], 0.5) ga = planograms.build_graph(actual['boxes'], actual['labels'], 0.5) utils.build_rebuild(expected['boxes'], expected['labels'], rebuildset, ax=ax1) utils.draw_planograph(ge, expected['boxes'], ax=ax1, flip_y=True) utils.build_rebuild(boxes, classes, rebuildset, maxy, ax=ax2) utils.draw_planograph(ga, actual['boxes'], ax=ax2, flip_y=True) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=( 12, 12)) if image.shape[2] < image.shape[1] else plt.subplots( 2, 1, figsize=(12, 12)) matching = planograms.large_common_subgraph(ge, ga) nodes_e, nodes_a = (list(l) for l in zip(*matching)) if len(matching) else ([], []) sge = ge.subgraph(nodes_e) sga = ga.subgraph(nodes_a) utils.build_rebuild(expected['boxes'], expected['labels'], rebuildset, ax=ax1) utils.draw_planograph(sge, expected['boxes'], ax=ax1, flip_y=True) utils.build_rebuild(boxes, classes, rebuildset, maxy, ax=ax2) utils.draw_planograph(sga, actual['boxes'], ax=ax2, flip_y=True) if not len(matching): plt.show() return # fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 12)) if image.shape[2] < image.shape[1] else plt.subplots(2, 1, figsize=(12, 12)) found, found_actual, missing_indices, missing_positions, missing_labels = planograms.finalize_via_ransac( matching, expected['boxes'], actual['boxes'], expected['labels'], actual['labels'], reproj_threshold=reproj_threshold, return_matched_actual=True) missing_positions = tvops.clip_boxes_to_image(missing_positions, image.shape[1:]) valid_positions = (missing_positions[:, 2] - missing_positions[:, 0] > 1) & (missing_positions[:, 3] - missing_positions[:, 1] > 1) missing_indices = missing_indices[valid_positions] missing_positions = missing_positions[valid_positions] missing_labels = [l for l, v in zip(missing_labels, valid_positions) if v] if len(missing_positions) > 0: found_round2 = torch.full((len(missing_indices), ), False) missing_imgs = torch.stack([ datautils.resize_for_classification(image[:, y1:y2, x1:x2]) for x1, y1, x2, y2 in missing_positions.to(dtype=torch.long) ]) reclass_labels = classifier.classify(missing_imgs) for idx, (expected_label, actual_label) in enumerate( zip(missing_labels, reclass_labels)): if expected_label == actual_label[0]: found_round2[idx] = True utils.build_fig( image, groundtruth=tvops.box_convert(actual['boxes'][found_actual], 'xyxy', 'xywh'), detections=tvops.box_convert(missing_positions, 'xyxy', 'xywh'), ) if len(missing_positions) > 0: utils.plot_boxes(tvops.box_convert(missing_positions[found_round2], 'xyxy', 'xywh'), color='yellow', hl_color='orange') plt.show()
def pipeline_demo(gln_state, dihe_state, dataset_folder, image_file, plano_file): ''' Demonstrate the CVPCE pipeline. The dataset folder is expected to contain .png, .jpg and .jpeg files, one for each class, with the class label set as the filename. The planogram file should be a JSON file formatted in a manner such as \b [ { "label": "class1", "box": [0, 0, 5, 5] }, { "label": "class2", "box": [5, 0, 10, 5] } ] (in this case, the dataset folder should contain files for class1 and class2, such as class1.png and class2.jpg) ''' # TODO: This shares a bunch of code with cvpce plot-planogram-eval; will want to refactor a bit at some point def double_fig(img): return plt.subplots(1, 2, figsize=( 12, 12)) if img.shape[2] < img.shape[1] else plt.subplots( 2, 1, figsize=(12, 12)) dataset = SimpleFolderSet(dataset_folder) rebuildset = SimpleFolderSet(dataset_folder, train=False) state_dict = torch.load(gln_state)[MODEL_STATE_DICT_KEY] gln = proposals.gln().cuda() gln.load_state_dict(state_dict) gln.eval() gln.requires_grad_(False) generator = ProposalGenerator(gln) img = ttf.to_tensor(pil.Image.open(image_file)) detections, images = generator.generate_proposals_and_images(img) encoder = classification.macvgg_embedder(model='vgg16', pretrained=False).cuda() enc_state = torch.load(dihe_state) encoder.load_state_dict(enc_state[EMBEDDER_STATE_DICT_KEY]) encoder.eval() encoder.requires_grad_(False) classifier = Classifier(encoder, dataset) classes, embedding = classifier.classify(images, return_embedding=True) with open(plano_file) as pf: plano = json.load(pf) expected_boxes = torch.tensor([o['box'] for o in plano], dtype=torch.float) expected_labels = [o['label'] for o in plano] actual_boxes = detections.detach().cpu() actual_labels = [c[0] for c in classes] ge = planograms.build_graph(expected_boxes, expected_labels, thresh_size=0.7) ga = planograms.build_graph(actual_boxes, actual_labels, thresh_size=0.7) matching = planograms.large_common_subgraph(ge, ga) nodes_e, nodes_a = (list(l) for l in zip(*matching)) if len(matching) else ([], []) sge = ge.subgraph(nodes_e) sga = ga.subgraph(nodes_a) h, w = img.shape[1:] reproj_threshold = min(h, w) * 0.01 _, found_actual, expected_positions, missing_indices, missing_positions, missing_labels = planograms.finalize_via_ransac( matching, expected_boxes, actual_boxes, expected_labels, actual_labels, reproj_threshold=reproj_threshold, return_matched_actual=True, return_expected_positions=True) missing_positions = tvops.clip_boxes_to_image(missing_positions, img.shape[1:]) valid_positions = (missing_positions[:, 2] - missing_positions[:, 0] > 1) & (missing_positions[:, 3] - missing_positions[:, 1] > 1) missing_indices = missing_indices[valid_positions] missing_positions = missing_positions[valid_positions] missing_labels = [l for l, v in zip(missing_labels, valid_positions) if v] if len(missing_positions) > 0: found_round2 = torch.full((len(missing_indices), ), False) missing_imgs = torch.stack([ resize_for_classification(img[:, y1:y2, x1:x2]) for x1, y1, x2, y2 in missing_positions.to(dtype=torch.long) ]) reclass_labels = classifier.classify(missing_imgs) for idx, (expected_label, actual_label) in enumerate( zip(missing_labels, reclass_labels)): if expected_label == actual_label[0]: found_round2[idx] = True _, (ax1, ax2) = double_fig(img) utils.build_fig(img, ax=ax1) utils.build_rebuild(expected_boxes, expected_labels, rebuildset, ax=ax2) ax1.set_title('Image to evaluate') ax2.set_title('Planogram') plt.show() utils.show( img, utils.recall_tensor(tvops.box_convert(detections, 'xyxy', 'xywh'))) data_imgs = torch.stack([i for i, _, _, _ in dataset]) utils.show_demo_emb_fig(data_imgs, classifier.embedding, images, embedding, draw_positives=False) utils.show_demo_emb_fig(data_imgs, classifier.embedding, images, embedding) _, (ax1, ax2) = double_fig(img) utils.build_fig(img, ax=ax1) utils.build_rebuild(detections, actual_labels, rebuildset, ax=ax2) ax1.set_title('Image') ax2.set_title('Classified detections = "Observed planogram"') plt.show() _, (ax1, ax2) = double_fig(img) utils.build_rebuild(expected_boxes, expected_labels, rebuildset, ax=ax1) utils.draw_planograph(ge, expected_boxes, ax=ax1, flip_y=True) utils.build_rebuild(actual_boxes, actual_labels, rebuildset, ax=ax2) utils.draw_planograph(ga, actual_boxes, ax=ax2, flip_y=True) ax1.set_title('Expected planogram') ax2.set_title('Observed planogram') plt.show() _, (ax1, ax2) = double_fig(img) utils.build_rebuild(expected_boxes, expected_labels, rebuildset, ax=ax1) utils.draw_planograph(sge, expected_boxes, ax=ax1, flip_y=True) utils.build_rebuild(actual_boxes, actual_labels, rebuildset, ax=ax2) utils.draw_planograph(sga, actual_boxes, ax=ax2, flip_y=True) ax1.set_title('Expected planogram') ax2.set_title('Observed planogram') plt.show() utils.show(img, tvops.box_convert(expected_positions, 'xyxy', 'xywh')) utils.build_fig( img, groundtruth=tvops.box_convert(actual_boxes[found_actual], 'xyxy', 'xywh'), detections=tvops.box_convert(missing_positions, 'xyxy', 'xywh'), ) if len(missing_positions) > 0: utils.plot_boxes(tvops.box_convert(missing_positions[found_round2], 'xyxy', 'xywh'), color='yellow', hl_color='orange') plt.show()