コード例 #1
0
    def _calc_inter_distance(self, feature_map_dir, avg_feature_dict=None):
        """
        Calculate the inter distance inside each class, the data will be saved and organized as:
        {
            'classid': {
                'xxx.png': distance,
                ...: ...
            },
            ...: ...
        }
        """
        t1 = time.time()
        distance_dict = {}

        avg_feature_dict = pickle_read(
            './results/temp/%s_true_avg_feature_for_each_class.pkl' %
            self.prefix.split('/')[0])
        for pkl in [
                x for x in os.listdir(feature_map_dir) if 'features.pkl' in x
        ]:
            classid = pkl.split('_')[0]
            distance_dict[classid] = {}
            for _filename, _feature in pickle_read(
                    os.path.join(feature_map_dir, pkl)).items():
                distance_dict[classid][_filename] = self.feature_util.dist(
                    avg_feature_dict[classid], _feature)
        pickle_write(
            './results/temp/%s_inter_class_distances.pkl' %
            self.prefix.split('/')[0], distance_dict)
        print('Time for _calc_inter_distance: %.1f s' % (time.time() - t1))
        return distance_dict
コード例 #2
0
    def _calc_avg_feature_map(self, feature_map_dir, model_count=1):
        t1 = time.time()

        feature_pkls = [
            x for x in os.listdir(feature_map_dir) if 'features.pkl' in x
        ]
        avg_feature_dict = {}

        for pkl in feature_pkls:
            features = pickle_read(os.path.join(feature_map_dir, pkl))
            features = list(features.values())
            _avg_feature = np.zeros(shape=features[0].shape)
            for _feature in features:
                _feature = _feature.cpu().detach().numpy()
                _avg_feature += _feature

            divider = len(features) if model_count == 1 else (len(features) /
                                                              2)
            _avg_feature /= divider

            classid = pkl.split('_')[0]
            avg_feature_dict[classid] = torch.FloatTensor(_avg_feature)

        prefix = feature_map_dir.split('/')[-1].split('_')[0]
        pickle_write(
            './results/temp/%s_avg_feature_for_each_class.pkl' % prefix,
            avg_feature_dict)
        print('Time for _calc_avg_feature_map: %.1f s' % (time.time() - t1))
        return avg_feature_dict
コード例 #3
0
 def _calc_variance_each_class(self, inter_distance=None):
     """
     Calculate the variance of each class, the data will be saved and organized as:
     {
         'classid': variance,
         ...: ...
     }
     """
     t1 = time.time()
     variance_dict = {}
     if inter_distance is None:
         inter_distance = pickle_read(
             './results/temp/%s_inter_class_distances.pkl' %
             self.prefix.split('/')[0])
     for classid, distances in inter_distance.items():
         count = len(distances.keys())
         sum_d = .0
         for _, d in distances.items():
             sum_d += d
         variance_dict[classid] = sum_d / count
     pickle_write(
         './results/temp/%s_variance_each_class.pkl' %
         self.prefix.split('/')[0], variance_dict)
     print('Time for _calc_variance_each_class: %.1f s' %
           (time.time() - t1))
     return variance_dict
コード例 #4
0
    def _calc_exter_class_distance(self, avg_feature_dict):
        """
        Calculate the exter distance for each center vector, the data will be saved and organized as:
        {
            'id-id': distance,
            ...: ...
        }
        """
        t1 = time.time()
        id_feature_ls = [(_id, _feature)
                         for _id, _feature in avg_feature_dict.items()]
        exter_class_distance_dict, class_to_nearest_class = {}, {}
        for _i in range(len(id_feature_ls)):
            classid, feature = id_feature_ls[_i]
            nearest_id, neareast_d = None, 1e6
            for _second_classid, _second_feature in id_feature_ls[_i + 1:]:
                _d = self.feature_util.dist(feature, _second_feature)
                _key = classid + '-' + _second_classid
                exter_class_distance_dict[_key] = _d
                if neareast_d > _d:
                    neareast_d = _d
                    nearest_id = _second_classid
            class_to_nearest_class[classid] = nearest_id

        pickle_write(
            './results/temp/%s_exter_class_distances.pkl' %
            self.prefix.split('/')[0], exter_class_distance_dict)
        print('Time for _calc_exter_class_distance: %.1f s' %
              (time.time() - t1))
        return exter_class_distance_dict, class_to_nearest_class
コード例 #5
0
def main():
    mapping = get_mapping_dict()
    d = {}
    for _id, _name in mapping.items():
        d[_name] = _id
    pickle_write('./mapping_reverse_dict.pkl', d)
    pass
コード例 #6
0
	def write_feature_map(self, label, feature, file_name, feature_map_dir):
		if not os.path.exists(feature_map_dir):
			os.makedirs(feature_map_dir)
		feature_map_name = os.path.join(feature_map_dir, '%s_features.pkl' % label)
		if not os.path.exists(feature_map_name):
			obj = {
				file_name: feature
			}
		else:
			obj = pickle_read(feature_map_name)
			obj[file_name] = feature
		pickle_write(feature_map_name, obj)
		return feature
コード例 #7
0
def _write_feature_map(label, feature, file_name, feature_map_dir, weight=1.0):
    _zero = torch.Tensor([[.0 for i in range(2048)]]).cuda()
    _multiplier = torch.Tensor([[weight for i in range(2048)]]).cuda()
    _zero = torch.addcmul(_zero, 1, feature, _multiplier)

    if not os.path.exists(feature_map_dir):
        os.makedirs(feature_map_dir)
    feature_map_name = os.path.join(feature_map_dir, '%s_features.pkl' % label)
    if not os.path.exists(feature_map_name):
        obj = {file_name: _zero}
    else:
        obj = pickle_read(feature_map_name)
        obj[file_name] = _zero
    pickle_write(feature_map_name, obj)
    return _zero
コード例 #8
0
    def _calc_true_avg_feature(self, feature_pkls_dir):
        t1 = time.time()
        feature_pkls = [
            x for x in os.listdir(feature_pkls_dir) if 'features.pkl' in x
        ]
        avg_feature_dict = {}
        for pkl in feature_pkls:
            features = pickle_read(os.path.join(feature_pkls_dir, pkl))
            features = list(features.values())
            _avg_feature = np.zeros(shape=features[0].shape)
            for _feature in features:
                _feature = _feature.cpu().detach().numpy()
                _avg_feature += _feature
            _avg_feature /= len(features)
            classid = pkl.split('_')[0]
            avg_feature_dict[classid] = torch.FloatTensor(_avg_feature)

        pickle_write(
            './results/temp/%s_true_avg_feature_for_each_class.pkl' %
            self.prefix.split('/')[0], avg_feature_dict)
        print('Time for _calc_true_avg_feature: %.1f s' % (time.time() - t1))
        return avg_feature_dict
コード例 #9
0
    def get_accuracy_for_every_class(self, pkl_path, seen='none'):
        predict_label_dict = pickle_read(
            './results/temp/%s_predict_label_dict_%s.pkl' %
            (self.prefix.split('/')[0], seen))
        class_count_dict = pickle_read(
            './results/temp/%s_class_count_dict_%s.pkl' %
            (self.prefix.split('/')[0], seen))

        class_count_dict = sorted(class_count_dict.items(), key=lambda x: x[1])
        accuracy_for_every_class = []
        mapping = pickle_read('./constants/mapping_dict.pkl')

        for classid, classcount in class_count_dict:
            key = '%s-%s' % (classid, classid)
            acc = len(predict_label_dict[key]
                      ) / classcount if key in predict_label_dict else 0
            accuracy_for_every_class.append({
                'id': classid,
                'accuracy': acc,
                'count': classcount
            })
        pickle_write(pkl_path, accuracy_for_every_class)
        return accuracy_for_every_class
コード例 #10
0
    def predict_pictures(self, test_pictures=None):
        pkls_dir = os.path.join('results', 'temp', self.prefix + '_all_test_pkls')
        if os.path.exists(pkls_dir):
            shutil.rmtree(pkls_dir)

        all_count, positive_count = 0, 0
        predict_dict, class_count_dict = {}, {}

        if test_pictures is None:
            test_pictures = os.listdir(self.test_dir)
        dataset = ImageDataset([os.path.join(self.test_dir, x) for x in test_pictures],
                               transform=TestTransform(self.input_w, self.input_h))
        test_loader = DataLoader(dataset, batch_size=64, num_workers=2, pin_memory=True)
        for f_ls, l_ls, p_ls in test_loader:
            f_ls = self.models[0](torch.Tensor(f_ls).cuda())
            for f, l, p in zip(f_ls, l_ls, p_ls):
                self.feature_util.write_feature_map(l, f, p, pkls_dir, weight=1.0)
                pred_l, min_d = self.evaluate_single_file(f)
                pred_k = l + '-' + pred_l
                if pred_k not in predict_dict:
                    predict_dict[pred_k] = []
                predict_dict[pred_k].append(p)
                if l not in class_count_dict:
                    class_count_dict[l] = 0
                class_count_dict[l] += 1

                if l == pred_l:
                    positive_count += 1
                all_count += 1
                if all_count % 1000 == 0:
                    print('All:', all_count, ', Positive:', positive_count)
        print('test finished. all:', all_count, ', positive:', positive_count)

        pickle_write('./results/temp/%s_predict_label_dict_%s.pkl' % (self.prefix, seen), predict_dict) # store the prediction of each picture
        pickle_write('./results/temp/%s_class_count_dict_%s.pkl' % (self.prefix, seen), class_count_dict) # store the count of each class
        return positive_count / (all_count + 1e-12)
コード例 #11
0
    def train(self, balance_testset):
        """
            Training using hard sample + sample re-weighting(proposed by keke)
        """
        self._train_prepare()

        analyzer = Analyzer(sample_file_dir=self.sample_file_dir,
                            test_dir=self.train_root,
                            prefix=self.prefix,
                            WIDTH=self.w,
                            HEIGHT=self.h)
        max_acc, last_acc, drop_count, fail_max_count = .0, .0, 0, 0
        max_acc_unseen = .0
        overlap_dict = {}
        epoch = self.start_epoch
        for epoch in range(self.start_epoch,
                           self.start_epoch + self.train_epochs):
            s_time = time.time()

            if self.step_size > 0:
                self.optimizer = _adjust_learning_rate(self.optimizer, epoch)
            next_margin = self.margin

            # get a brand new training set for a new epoch
            if self.num_train_imgs > self.num_train_pids * 100:
                if epoch == self.start_epoch:
                    true_exter_class_top = None
                elif epoch % 5 == 0 or epoch == (self.start_epoch + 1):
                    true_exter_class_top = exter_class_top
                else:
                    pass
                if true_exter_class_top is not None:
                    print('length of true_exter_class_top:',
                          len(true_exter_class_top), ', and:',
                          true_exter_class_top)
                train_pictures = get_training_set_list(
                    self.train_root,
                    train_limit=70,
                    random_training_set=False,
                    special_classes=true_exter_class_top)
            else:
                train_pictures = None

            # and then go through the training set, to get data needed for hard-sample
            if epoch % 5 == 0 or epoch == self.start_epoch:
                distance_dict, class_to_nearest_class = analyzer.analysis_for_hard_sample(
                    self.model, test_pictures=train_pictures)

            train_using_metriclearning(
                self.model,
                self.optimizer,
                self.tri_criterion,
                epoch,
                self.train_root,
                train_pictures=train_pictures,
                batch_size=self.batch_size,
                distance_dict=distance_dict,
                class_to_nearest_class=class_to_nearest_class)
            exter_class_top, overlap_rate_dict_ls = analyzer.analysis_for_exter_class_overlap(
                model_path=None, model=self.model, WIDTH=self.w, HEIGHT=self.h)
            e_time = time.time()
            if epoch % 5 == 0 or epoch == 2:
                overlap_dict[epoch] = overlap_rate_dict_ls
            # true testing on seen classes
            acc = self.tester.evaluate_with_models(seen='seen')
            print(
                'Margin: {}, Epoch: {}, Acc: {:.3}%, Top overlap rate: {:.4} (on seen pictures)[Hard Sample + Sample Re-weighting]'
                .format(self.margin, epoch, acc * 100,
                        overlap_rate_dict_ls[0][1]))

            if self.test_unseen_root is not None:
                # true testing on unseen classes
                acc_unseen = self.tester.evaluate_with_models(seen='unseen')
                max_acc_unseen = max(max_acc_unseen, acc_unseen)
                note = 'update:%.2f, on unseen%s' % (
                    self.update_conv_layers, ' - New Unseen Accuracy'
                    if max_acc_unseen == acc_unseen else '')
                log(log_path=os.path.join(self.save_dir, 'readme.txt'),
                    epoch=epoch,
                    accuracy=acc_unseen,
                    train_cls_count=self.num_train_pids,
                    test_cls_count=self.num_test_unseen_pids,
                    method='metric',
                    note=note)
                print(
                    'Margin: {}, Epoch: {}, Acc: {:.3}% (on unseen pictures)[Hard Sample + Sample Re-weighting]'
                    .format(self.margin, epoch, acc_unseen * 100))
            else:
                acc_unseen = -1

            max_acc = max(acc, max_acc)
            note = 'update:%.2f, on seen%s' % (self.update_conv_layers,
                                               ' - New Seen Accuracy'
                                               if max_acc == acc else '')
            log(log_path=os.path.join(self.save_dir, 'readme.txt'),
                epoch=epoch,
                accuracy=acc,
                train_cls_count=self.num_train_pids,
                test_cls_count=self.num_test_pids,
                method='metric',
                epoch_time=(e_time - s_time),
                note=note)

            if epoch == self.start_epoch:
                last_acc = acc
            else:
                if acc < last_acc:
                    drop_count += 1
                else:
                    drop_count = 0
                    last_acc = acc
            if max_acc == acc:
                fail_max_count = 0
            else:
                fail_max_count += 1

            if 'inception3' == self.model_type:
                save_model_name = 'inception_v3_metric.tmpmodel.tar'
            else:
                save_model_name = 'resnet_metric.tmpmodel.tar'
            state_dict = self.model.module.state_dict(
            ) if self.use_gpu else self.model.state_dict()

            # save model, and check if its the best model. save as the best model if positive
            save_checkpoint(
                {
                    'state_dict': state_dict,
                    'epoch': epoch,
                },
                is_best=acc == max_acc,
                save_dir=self.save_dir,
                filename=save_model_name,
                acc=acc,
            )

            # if the accuracy keep dropping, stop training
            if (drop_count == 12
                    or fail_max_count == 24) and self.enable_stop_machanism:
                print(
                    'Accuracy dropping for %d times or smaller the max for %d times, stop in epoch %d\n'
                    % (drop_count, fail_max_count, epoch))
                break
            # if overlap_rate_dict_ls[0][1] < .1:
            #     print('Top exter class overlap rate reach a smaller value than threshold, stop in epoch %d\n' % epoch)
            #     break

            self.margin = next_margin
        with open(os.path.join(self.save_dir, 'readme.txt'), 'ab+') as f:
            c = '\r\n[Hard Sample + Sample Re-weighting] Training finished with: %d epoch, %.2f%% accuracy.' % (
                epoch, max_acc * 100)
            f.write(c.encode())
        self._clean_tmp_model(epoch)
        pickle_write(
            './results/temp/%s_v5_overlap_rate_dict.pkl' % self.prefix,
            overlap_dict)
        return max_acc, epoch
コード例 #12
0
    def predict_pictures(self,
                         feature_map,
                         index_list,
                         seen,
                         weight_ls,
                         test_pictures=None):
        pkls_dir = os.path.join('results', 'temp',
                                self.prefix + '_all_test_pkls')
        if os.path.exists(pkls_dir):
            shutil.rmtree(pkls_dir)

        all_count, positive_count, pred_to_unseen = 0, 0, 0
        predict_dict, class_count_dict = {}, {}
        if test_pictures is None:
            test_pictures = os.listdir(self.test_dir)

        for i in test_pictures:
            file_path = os.path.join(self.test_dir, i)
            cls_idx = re.split(
                '_', file_path)[-1][:-4]  # accroding to the directory name
            all_count += 1

            feature_on_gpu = None
            for weight_index, model in enumerate(self.models):
                _f = self.get_feature(file_path, model)

                _zero = torch.Tensor([[.0 for _ in range(2048)]]).cuda()
                _multiplier = torch.Tensor(
                    [[weight_ls[weight_index] for _ in range(2048)]]).cuda()
                _zero = torch.addcmul(_zero, 1, _f, _multiplier)

                if type(_zero) is not torch.Tensor:
                    print('Expected torch.Tensor, got', type(feature_on_gpu))
                    exit(200)
                if feature_on_gpu is None:
                    feature_on_gpu = np.zeros(shape=_zero.shape)
                feature_on_gpu += _zero.cpu().detach().numpy()
            feature_on_gpu = torch.FloatTensor(feature_on_gpu).cuda()
            self._write_feature_map(cls_idx,
                                    feature_on_gpu,
                                    file_path.split('/')[-1],
                                    pkls_dir,
                                    weight=1.0)

            pred_label, min_distance = self.evaluate_single_file(
                feature_on_gpu, feature_map)

            pred_key = cls_idx + '-' + pred_label
            if pred_key not in predict_dict:
                predict_dict[pred_key] = [file_path.split('/')[-1]]
            else:
                predict_dict[pred_key].append(file_path.split('/')[-1])
            if cls_idx not in class_count_dict:
                class_count_dict[cls_idx] = 1
            else:
                class_count_dict[cls_idx] += 1

            if cls_idx == pred_label:  # compute the correct num of the class
                positive_count += 1
            if all_count % 1000 == 0:
                print('For now, all:', all_count, ', positive:',
                      positive_count)
        print('all:', all_count, ', positive:', positive_count)

        pickle_write('./results/temp/%s_predict_label_dict_%s.pkl' %
                     (self.prefix, seen),
                     predict_dict)  # store the prediction of each picture
        pickle_write('./results/temp/%s_class_count_dict_%s.pkl' %
                     (self.prefix, seen),
                     class_count_dict)  # store the count of each class
        return positive_count / (all_count + 1e-12)