Beispiel #1
0
def calculate_accuracy_for_matching(match_res_file, topk_acc):
    max_k = max(topk_acc)
    correct_counts = [0] * max_k
    num_total = 0
    all_gt = []
    all_pred = []
    for parts in tsv_io.tsv_reader(match_res_file):
        num_total += 1
        query_fea_idx = parts[0]
        query_bbox = json.loads(parts[1])
        pred_labels = json.loads(parts[2])
        # calculate mAP
        all_gt.append([query_fea_idx, qd_common.json_dump([query_bbox])])
        all_pred.append([query_fea_idx, qd_common.json_dump(
                [{"class": pred_labels[0][0], "conf": pred_labels[0][1]/1000.0, "rect": query_bbox["rect"]}])])
        # calculate top k accuracy
        gt_label = query_bbox["class"]
        for i in range(min(max_k, len(pred_labels))):
            cur_pred = pred_labels[i][0]
            if cur_pred == gt_label:
                correct_counts[i] += 1
                break

    map_report = match_res_file + ".eval.map"
    pred_file = match_res_file + ".pred"
    gt_file = match_res_file + ".gt"
    tsv_io.tsv_writer(all_pred, pred_file)
    tsv_io.tsv_writer(all_gt, gt_file)
    deteval(truth=gt_file, dets=pred_file, report_file=map_report)

    for i in range(1, len(correct_counts)):
        correct_counts[i] += correct_counts[i-1]

    return [c / float(num_total) for c in correct_counts]
Beispiel #2
0
    def __init__(self,
                 tsvfile,
                 labelmap,
                 labelfile=None,
                 transform=None,
                 logger=None,
                 for_test=False,
                 enlarge_bbox=1.0,
                 use_cache=True,
                 is_debug=False):
        """ TSV dataset with cropped images from bboxes labels
        Params:
            tsvfile: image tsv file, columns are key, bboxes, b64_image_string
            labelmap: file of all categories
            labelfile: label tsv file, columns are key, bboxes
        """
        self.min_pixels = 3
        self.tsv = TSVFile(tsvfile)
        self.tsvfile = tsvfile
        self.labelfile = labelfile
        self.transform = transform
        self.label_to_idx = {}
        self.labels = []
        if labelmap:
            with open(labelmap, 'r') as fp:
                for i, line in enumerate(fp):
                    l = line.rstrip('\n')
                    assert (l not in self.label_to_idx)
                    self.labels.append(l)
                    self.label_to_idx[l] = i
        self.img_col = 2
        self.label_col = 1
        self.key_col = 0
        self.is_debug = is_debug
        self.logger = logger
        self._for_test = for_test
        self._enlarge_bbox = enlarge_bbox
        self._label_counts = None

        _cache_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)),
                                  "cache")
        self._bbox_idx_file = os.path.join(
            _cache_dir, "{}.tsv".format(
                hash_sha1((tsvfile, labelfile if labelfile else "",
                           str(for_test), str(enlarge_bbox)))))
        try:
            if not use_cache or not os.path.isfile(self._bbox_idx_file) or worth_create(tsvfile, self._bbox_idx_file) \
                    or (labelfile and worth_create(labelfile, self._bbox_idx_file)):
                _class_instance_idx = self._generate_class_instance_index_parallel(
                )
                tsv_writer(_class_instance_idx, self._bbox_idx_file)
            self._bbox_idx_tsv = TSVFile(self._bbox_idx_file)
        except Exception as e:
            if os.path.isfile(self._bbox_idx_file):
                os.remove(self._bbox_idx_file)
            raise e
Beispiel #3
0
def convert_matching_format(infile, outfile):
    def gen_rows():
        for parts in tsv_reader(infile):
            assert (len(parts) == 3)
            pred_labels = [it.split(':') for it in parts[2].split(';')]
            pred_labels = [[label, int(score), ""]
                           for label, score in pred_labels]
            yield parts[0], parts[1], json_dump(pred_labels)

    tsv_writer(gen_rows(), outfile)
Beispiel #4
0
def _delf_feature_match(args):
    query_fea_rows, all_query_fea, all_index_fea, outfile, max_k = args

    # resume from last checkpoint
    last_cache = {}
    checkpoints = [outfile + ".tmp", outfile]
    for cache_file in checkpoints:
        if op.isfile(cache_file):
            for parts in tsv_io.tsv_reader(cache_file):
                if len(parts) == 3:
                    try:
                        json.loads(parts[1])
                        json.loads(parts[2])
                    except Exception:
                        continue
                    last_cache[int(parts[0])] = parts

    def gen_rows():
        for query_idx in query_fea_rows:
            print(query_idx)
            if query_idx in last_cache:
                yield last_cache[query_idx]
            else:
                query_fea = all_query_fea[query_idx]
                scores = []
                for i in range(len(all_index_fea)):
                    index_fea = all_index_fea[i]
                    inliers, locations_1_to_use, locations_2_to_use = matcher.get_inliers(
                            query_fea['location_np_list'],
                            query_fea['descriptor_np_list'],
                            index_fea['location_np_list'],
                            index_fea['descriptor_np_list'])
                    if inliers is not None:
                        score = sum(inliers)
                    else:
                        score = 0
                    scores.append((i, score))
                scores = sorted(scores, key=lambda t: t[1], reverse=True)
                # use top1 matching image
                pred_labels = []
                for i, (matched_fea_idx, score) in enumerate(scores):
                    if i >= max_k:
                        break
                    cur_pred = get_bbox_from_fea(all_index_fea[matched_fea_idx])["class"]
                    pred_labels.append([cur_pred, score, matched_fea_idx])

                query_bbox = get_bbox_from_fea(query_fea)
                yield str(query_idx), qd_common.json_dump(query_bbox), qd_common.json_dump(pred_labels)

    tsv_io.tsv_writer(gen_rows(), outfile)
Beispiel #5
0
    def extract(self, input_path, output_path):
        '''extract features from single image without batch process.
        '''
        assert self.mode.lower() in ['pca', 'delf']
        batch_timer = AverageMeter()
        data_timer = AverageMeter()

        # dataloader.
        # dataset = ImageFolder(
        #     root = input_path,
        #     transform = transforms.ToTensor())
        bgr_normalize = transforms.Normalize(mean=[0.406, 0.456, 0.485],
                                             std=[0.225, 0.224, 0.229])
        transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize(256),
            # transforms.CenterCrop(224),
            transforms.ToTensor(),
            bgr_normalize,
        ])

        if input_path.endswith('.yaml'):
            dataset = CropClassTSVDatasetYaml(input_path,
                                              session_name='test',
                                              transform=transform,
                                              enlarge_bbox=self.enlarge_bbox)
        else:
            raise NotImplementedError()

        self.dataloader = torch.utils.data.DataLoader(dataset=dataset,
                                                      batch_size=1,
                                                      shuffle=(not __DEBUG__),
                                                      num_workers=0)
        feature_maps = []
        if self.mode.lower() in ['pca']:
            # bar = Bar('[{}]{}'.format(self.mode.upper(), self.title), max=len(self.dataloader))
            for batch_idx, (inputs, fileinfos) in enumerate(self.dataloader):
                filename = json.dumps(fileinfos)
                # if filename != "https://s-media-cache-ak0.pinimg.com/600x315/44/02/42/440242443ac39b6f6d5a6f41bae2bd31.jpg":
                #     continue
                # image size upper limit.
                if not (len(inputs.size()) == 4):
                    if __DEBUG__:
                        print('wrong input dimenstion! ({},{})'.format(
                            filename, input.size()))
                    continue
                if not (inputs.size(2) * inputs.size(3) <= 1200 * 1200):
                    if __DEBUG__:
                        print('passed: image size too large! ({},{})'.format(
                            filename, inputs.size()))
                    continue
                if not (inputs.size(2) >= 112 and inputs.size(3) >= 112):
                    if __DEBUG__:
                        print('passed: image size too small! ({},{})'.format(
                            filename, inputs.size()))
                    continue

                data_timer.update(time.time() - since)
                # prepare inputs
                if __is_cuda__():
                    inputs = __cuda__(inputs)
                inputs = __to_var__(inputs)

                # get delf feature only for pca calculation.
                pca_feature = self.__extract_delf_feature__(inputs.data,
                                                            filename,
                                                            mode='pca')
                if pca_feature is not None:
                    feature_maps.extend(pca_feature)

                batch_timer.update(time.time() - since)
                since = time.time()

                # progress
                log_msg  = ('\n[Extract][Processing:({batch}/{size})] '+ \
                            'eta: (data:{data:.3f}s),(batch:{bt:.3f}s)') \
                .format(
                    batch=batch_idx + 1,
                    size=len(self.dataloader),
                    data=data_timer.val,
                    bt=batch_timer.val)
                # tt=bar.elapsed_td)
                print(log_msg)
                # bar.next()
                print('\nnumber of selected features so far: {}'.format(
                    len(feature_maps)))
                if len(feature_maps) >= 1000000:  # UPPER LIMIT.
                    break

                # free GPU cache every.
                if batch_idx % 10 == 0:
                    torch.cuda.empty_cache()
                    if __DEBUG__:
                        print('GPU Memory flushed !!!!!!!!!')

            # trian PCA.
            self.pca(feature_maps)

        else:
            # bar = Bar('[{}]{}'.format(self.mode.upper(), self.title), max=len(self.dataloader))
            assert self.mode.lower() in ['delf']
            feature_maps = []

            def gen_rows():
                since = time.time()
                for batch_idx, (inputs,
                                fileinfos) in enumerate(self.dataloader):
                    filename = json.dumps(fileinfos)
                    # image size upper limit.
                    if not (len(inputs.size()) == 4):
                        # if __DEBUG__:
                        print('wrong input dimenstion! ({},{})'.format(
                            filename, inputs.size()))
                        continue
                    if not (inputs.size(2) * inputs.size(3) <= 1200 * 1200):
                        # if __DEBUG__:
                        print('passed: image size too large! ({},{})'.format(
                            filename, inputs.size()))
                        continue
                    if not (inputs.size(2) >= 10 and inputs.size(3) >= 10):
                        # if __DEBUG__:
                        print('passed: image size too small! ({},{})'.format(
                            filename, inputs.size()))
                        continue

                    data_timer.update(time.time() - since)
                    # prepare inputs
                    if __is_cuda__():
                        inputs = __cuda__(inputs)
                    # inputs = __to_var__(inputs)

                    # get delf everything (score, feature, etc.)
                    # delf_feature = self.__extract_delf_feature__(inputs.data, filename, mode='delf')
                    delf_feature = self.__extract_delf_feature__(
                        inputs.detach(), filename, mode='delf')
                    if delf_feature is not None:
                        # feature_maps.append(delf_feature)
                        yield [self.delf_features2str(delf_feature)]

                    # log.
                    batch_timer.update(time.time() - since)
                    since = time.time()
                    log_msg  = ('\n[Extract][Processing:({batch}/{size})] '+ \
                                'eta: (data:{data:.3f}s),(batch:{bt:.3f}s)') \
                    .format(
                        batch=batch_idx + 1,
                        size=len(self.dataloader),
                        data=data_timer.val,
                        bt=batch_timer.val)
                    # tt=bar.elapsed_td)
                    print(log_msg)
                    # bar.next()

                    # free GPU cache every.
                    if batch_idx % 10 == 0:
                        torch.cuda.empty_cache()
                        if __DEBUG__:
                            print('GPU Memory flushed !!!!!!!!!')

            # use pickle to save DeLF features.
            # self.__save_delf_features_to_file__(feature_maps, output_path)
            tsv_writer(gen_rows(), output_path)