def calculate_accuracy_for_matching(match_res_file, topk_acc): max_k = max(topk_acc) correct_counts = [0] * max_k num_total = 0 all_gt = [] all_pred = [] for parts in tsv_io.tsv_reader(match_res_file): num_total += 1 query_fea_idx = parts[0] query_bbox = json.loads(parts[1]) pred_labels = json.loads(parts[2]) # calculate mAP all_gt.append([query_fea_idx, qd_common.json_dump([query_bbox])]) all_pred.append([query_fea_idx, qd_common.json_dump( [{"class": pred_labels[0][0], "conf": pred_labels[0][1]/1000.0, "rect": query_bbox["rect"]}])]) # calculate top k accuracy gt_label = query_bbox["class"] for i in range(min(max_k, len(pred_labels))): cur_pred = pred_labels[i][0] if cur_pred == gt_label: correct_counts[i] += 1 break map_report = match_res_file + ".eval.map" pred_file = match_res_file + ".pred" gt_file = match_res_file + ".gt" tsv_io.tsv_writer(all_pred, pred_file) tsv_io.tsv_writer(all_gt, gt_file) deteval(truth=gt_file, dets=pred_file, report_file=map_report) for i in range(1, len(correct_counts)): correct_counts[i] += correct_counts[i-1] return [c / float(num_total) for c in correct_counts]
def __init__(self, tsvfile, labelmap, labelfile=None, transform=None, logger=None, for_test=False, enlarge_bbox=1.0, use_cache=True, is_debug=False): """ TSV dataset with cropped images from bboxes labels Params: tsvfile: image tsv file, columns are key, bboxes, b64_image_string labelmap: file of all categories labelfile: label tsv file, columns are key, bboxes """ self.min_pixels = 3 self.tsv = TSVFile(tsvfile) self.tsvfile = tsvfile self.labelfile = labelfile self.transform = transform self.label_to_idx = {} self.labels = [] if labelmap: with open(labelmap, 'r') as fp: for i, line in enumerate(fp): l = line.rstrip('\n') assert (l not in self.label_to_idx) self.labels.append(l) self.label_to_idx[l] = i self.img_col = 2 self.label_col = 1 self.key_col = 0 self.is_debug = is_debug self.logger = logger self._for_test = for_test self._enlarge_bbox = enlarge_bbox self._label_counts = None _cache_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "cache") self._bbox_idx_file = os.path.join( _cache_dir, "{}.tsv".format( hash_sha1((tsvfile, labelfile if labelfile else "", str(for_test), str(enlarge_bbox))))) try: if not use_cache or not os.path.isfile(self._bbox_idx_file) or worth_create(tsvfile, self._bbox_idx_file) \ or (labelfile and worth_create(labelfile, self._bbox_idx_file)): _class_instance_idx = self._generate_class_instance_index_parallel( ) tsv_writer(_class_instance_idx, self._bbox_idx_file) self._bbox_idx_tsv = TSVFile(self._bbox_idx_file) except Exception as e: if os.path.isfile(self._bbox_idx_file): os.remove(self._bbox_idx_file) raise e
def convert_matching_format(infile, outfile): def gen_rows(): for parts in tsv_reader(infile): assert (len(parts) == 3) pred_labels = [it.split(':') for it in parts[2].split(';')] pred_labels = [[label, int(score), ""] for label, score in pred_labels] yield parts[0], parts[1], json_dump(pred_labels) tsv_writer(gen_rows(), outfile)
def _delf_feature_match(args): query_fea_rows, all_query_fea, all_index_fea, outfile, max_k = args # resume from last checkpoint last_cache = {} checkpoints = [outfile + ".tmp", outfile] for cache_file in checkpoints: if op.isfile(cache_file): for parts in tsv_io.tsv_reader(cache_file): if len(parts) == 3: try: json.loads(parts[1]) json.loads(parts[2]) except Exception: continue last_cache[int(parts[0])] = parts def gen_rows(): for query_idx in query_fea_rows: print(query_idx) if query_idx in last_cache: yield last_cache[query_idx] else: query_fea = all_query_fea[query_idx] scores = [] for i in range(len(all_index_fea)): index_fea = all_index_fea[i] inliers, locations_1_to_use, locations_2_to_use = matcher.get_inliers( query_fea['location_np_list'], query_fea['descriptor_np_list'], index_fea['location_np_list'], index_fea['descriptor_np_list']) if inliers is not None: score = sum(inliers) else: score = 0 scores.append((i, score)) scores = sorted(scores, key=lambda t: t[1], reverse=True) # use top1 matching image pred_labels = [] for i, (matched_fea_idx, score) in enumerate(scores): if i >= max_k: break cur_pred = get_bbox_from_fea(all_index_fea[matched_fea_idx])["class"] pred_labels.append([cur_pred, score, matched_fea_idx]) query_bbox = get_bbox_from_fea(query_fea) yield str(query_idx), qd_common.json_dump(query_bbox), qd_common.json_dump(pred_labels) tsv_io.tsv_writer(gen_rows(), outfile)
def extract(self, input_path, output_path): '''extract features from single image without batch process. ''' assert self.mode.lower() in ['pca', 'delf'] batch_timer = AverageMeter() data_timer = AverageMeter() # dataloader. # dataset = ImageFolder( # root = input_path, # transform = transforms.ToTensor()) bgr_normalize = transforms.Normalize(mean=[0.406, 0.456, 0.485], std=[0.225, 0.224, 0.229]) transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(256), # transforms.CenterCrop(224), transforms.ToTensor(), bgr_normalize, ]) if input_path.endswith('.yaml'): dataset = CropClassTSVDatasetYaml(input_path, session_name='test', transform=transform, enlarge_bbox=self.enlarge_bbox) else: raise NotImplementedError() self.dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=1, shuffle=(not __DEBUG__), num_workers=0) feature_maps = [] if self.mode.lower() in ['pca']: # bar = Bar('[{}]{}'.format(self.mode.upper(), self.title), max=len(self.dataloader)) for batch_idx, (inputs, fileinfos) in enumerate(self.dataloader): filename = json.dumps(fileinfos) # if filename != "https://s-media-cache-ak0.pinimg.com/600x315/44/02/42/440242443ac39b6f6d5a6f41bae2bd31.jpg": # continue # image size upper limit. if not (len(inputs.size()) == 4): if __DEBUG__: print('wrong input dimenstion! ({},{})'.format( filename, input.size())) continue if not (inputs.size(2) * inputs.size(3) <= 1200 * 1200): if __DEBUG__: print('passed: image size too large! ({},{})'.format( filename, inputs.size())) continue if not (inputs.size(2) >= 112 and inputs.size(3) >= 112): if __DEBUG__: print('passed: image size too small! ({},{})'.format( filename, inputs.size())) continue data_timer.update(time.time() - since) # prepare inputs if __is_cuda__(): inputs = __cuda__(inputs) inputs = __to_var__(inputs) # get delf feature only for pca calculation. pca_feature = self.__extract_delf_feature__(inputs.data, filename, mode='pca') if pca_feature is not None: feature_maps.extend(pca_feature) batch_timer.update(time.time() - since) since = time.time() # progress log_msg = ('\n[Extract][Processing:({batch}/{size})] '+ \ 'eta: (data:{data:.3f}s),(batch:{bt:.3f}s)') \ .format( batch=batch_idx + 1, size=len(self.dataloader), data=data_timer.val, bt=batch_timer.val) # tt=bar.elapsed_td) print(log_msg) # bar.next() print('\nnumber of selected features so far: {}'.format( len(feature_maps))) if len(feature_maps) >= 1000000: # UPPER LIMIT. break # free GPU cache every. if batch_idx % 10 == 0: torch.cuda.empty_cache() if __DEBUG__: print('GPU Memory flushed !!!!!!!!!') # trian PCA. self.pca(feature_maps) else: # bar = Bar('[{}]{}'.format(self.mode.upper(), self.title), max=len(self.dataloader)) assert self.mode.lower() in ['delf'] feature_maps = [] def gen_rows(): since = time.time() for batch_idx, (inputs, fileinfos) in enumerate(self.dataloader): filename = json.dumps(fileinfos) # image size upper limit. if not (len(inputs.size()) == 4): # if __DEBUG__: print('wrong input dimenstion! ({},{})'.format( filename, inputs.size())) continue if not (inputs.size(2) * inputs.size(3) <= 1200 * 1200): # if __DEBUG__: print('passed: image size too large! ({},{})'.format( filename, inputs.size())) continue if not (inputs.size(2) >= 10 and inputs.size(3) >= 10): # if __DEBUG__: print('passed: image size too small! ({},{})'.format( filename, inputs.size())) continue data_timer.update(time.time() - since) # prepare inputs if __is_cuda__(): inputs = __cuda__(inputs) # inputs = __to_var__(inputs) # get delf everything (score, feature, etc.) # delf_feature = self.__extract_delf_feature__(inputs.data, filename, mode='delf') delf_feature = self.__extract_delf_feature__( inputs.detach(), filename, mode='delf') if delf_feature is not None: # feature_maps.append(delf_feature) yield [self.delf_features2str(delf_feature)] # log. batch_timer.update(time.time() - since) since = time.time() log_msg = ('\n[Extract][Processing:({batch}/{size})] '+ \ 'eta: (data:{data:.3f}s),(batch:{bt:.3f}s)') \ .format( batch=batch_idx + 1, size=len(self.dataloader), data=data_timer.val, bt=batch_timer.val) # tt=bar.elapsed_td) print(log_msg) # bar.next() # free GPU cache every. if batch_idx % 10 == 0: torch.cuda.empty_cache() if __DEBUG__: print('GPU Memory flushed !!!!!!!!!') # use pickle to save DeLF features. # self.__save_delf_features_to_file__(feature_maps, output_path) tsv_writer(gen_rows(), output_path)