def calc_predict_result(self, predict_dict_path, class_count_dict_path):
     # calc predict result
     predict_dict = pickle_read(predict_dict_path)
     class_count_dict = pickle_read(class_count_dict_path)
     data_dict = {
         '00ACCURACY': [
             self.id_name_dict[str(x)]
             for x in range(1, self.num_of_classes + 1)
         ]
     }
     class_acc = {}
     for i in range(1, self.num_of_classes + 1):
         data_dict['(%02d)%s' % (i, self.id_name_dict[str(i)])] = []
         for j in range(1, self.num_of_classes + 1):
             key = '%d-%d' % (j, i)
             if key in predict_dict:
                 acc = len(predict_dict[key]) / class_count_dict[str(j)]
                 if i == j:
                     class_acc[str(i)] = acc
             else:
                 acc = 0.0
             data_dict['(%02d)%s' %
                       (i, self.id_name_dict[str(i)])].append(acc)
     _save_csv = os.path.join(
         'model', self.prefix, 'visualize_folder',
         '%s_prediction_class_pairs.csv' % self.prefix.split('/')[0])
     write_csv(_save_csv, data_dict)
     return class_acc
Ejemplo n.º 2
0
    def _calc_inter_distance(self, feature_map_dir, avg_feature_dict=None):
        """
        Calculate the inter distance inside each class, the data will be saved and organized as:
        {
            'classid': {
                'xxx.png': distance,
                ...: ...
            },
            ...: ...
        }
        """
        t1 = time.time()
        distance_dict = {}

        avg_feature_dict = pickle_read(
            './results/temp/%s_true_avg_feature_for_each_class.pkl' %
            self.prefix.split('/')[0])
        for pkl in [
                x for x in os.listdir(feature_map_dir) if 'features.pkl' in x
        ]:
            classid = pkl.split('_')[0]
            distance_dict[classid] = {}
            for _filename, _feature in pickle_read(
                    os.path.join(feature_map_dir, pkl)).items():
                distance_dict[classid][_filename] = self.feature_util.dist(
                    avg_feature_dict[classid], _feature)
        pickle_write(
            './results/temp/%s_inter_class_distances.pkl' %
            self.prefix.split('/')[0], distance_dict)
        print('Time for _calc_inter_distance: %.1f s' % (time.time() - t1))
        return distance_dict
Ejemplo n.º 3
0
 def _calc_variance_each_class(self, inter_distance=None):
     """
     Calculate the variance of each class, the data will be saved and organized as:
     {
         'classid': variance,
         ...: ...
     }
     """
     t1 = time.time()
     variance_dict = {}
     if inter_distance is None:
         inter_distance = pickle_read(
             './results/temp/%s_inter_class_distances.pkl' %
             self.prefix.split('/')[0])
     for classid, distances in inter_distance.items():
         count = len(distances.keys())
         sum_d = .0
         for _, d in distances.items():
             sum_d += d
         variance_dict[classid] = sum_d / count
     pickle_write(
         './results/temp/%s_variance_each_class.pkl' %
         self.prefix.split('/')[0], variance_dict)
     print('Time for _calc_variance_each_class: %.1f s' %
           (time.time() - t1))
     return variance_dict
 def visualize_class_variance(self, variance_dict, class_acc=None):
     class_count_dict = pickle_read(
         './results/temp/%s_class_count_dict_seen.pkl' %
         self.prefix.split('/')[0])
     data_dict = {
         '0CLASS': [
             '%s' % (self.id_name_dict[str(i)])
             for i in range(1, self.num_of_classes + 1)
         ],
         '1VARIANCE': [
             '%f' % variance_dict[str(i)]
             for i in range(1, self.num_of_classes + 1)
         ],
         '2IMAGES_COUNT': [
             '%d' % class_count_dict[str(i)]
             for i in range(1, self.num_of_classes + 1)
         ]
     }
     if class_acc is not None:
         data_dict['3ACCURACY'] = [
             '%f' % class_acc[str(i)]
             for i in range(1, self.num_of_classes + 1)
         ]
     _save_csv = os.path.join(
         'model', self.prefix, 'visualize_folder',
         '%s_class_variance_seen.csv' % self.prefix.split('/')[0])
     write_csv(_save_csv, data_dict)
Ejemplo n.º 5
0
 def predict(self, model, feature_map, picture_path):
     model.eval()
     if type(feature_map) == str:
         feature_map = pickle_read(feature_map)
     feature = self.feature_util.get_feature(picture_path, model, TestTransform(self.input_w, self.input_h))
     pred_l, min_d = self.evaluate_single_file(feature, feature_map)
     return pred_l, min_d
Ejemplo n.º 6
0
def prepare_data_for_one(
        pictures_pool,
        limit=100,
        ignore_limit=False,
        output_path=r'/home/ubuntu/Program/Dish_recognition/dataset/test'):
    # clean_old_train_test_data()
    if os.path.exists(output_path):
        shutil.rmtree(output_path)
    os.makedirs(output_path)

    train_dict = pickle_read('./mapping_dict_for_train.pkl')
    train_dict_reverse = {}
    for k, v in train_dict.items():
        train_dict_reverse[v] = k

    start_index = 10000
    for label in os.listdir(pictures_pool):
        label_count = 0
        if label not in train_dict_reverse:
            l = len(os.listdir(os.path.join(pictures_pool, label)))
            print('skip:', label, ', has count:', l)
            continue
        ground_truth_label = train_dict_reverse[label]
        for image_path in os.listdir(os.path.join(pictures_pool, label)):
            final_name = str(start_index) + '_' + ground_truth_label + '.png'
            img = cv2.imread(os.path.join(pictures_pool, label, image_path))
            cv2.imwrite(os.path.join(output_path, final_name), img)
            start_index += 1
            label_count += 1
            if label_count == limit and not ignore_limit:
                break
    print(start_index - 10000, 'pictures prepared for specific one set.')
Ejemplo n.º 7
0
    def _calc_avg_feature_map(self, feature_map_dir, model_count=1):
        t1 = time.time()

        feature_pkls = [
            x for x in os.listdir(feature_map_dir) if 'features.pkl' in x
        ]
        avg_feature_dict = {}

        for pkl in feature_pkls:
            features = pickle_read(os.path.join(feature_map_dir, pkl))
            features = list(features.values())
            _avg_feature = np.zeros(shape=features[0].shape)
            for _feature in features:
                _feature = _feature.cpu().detach().numpy()
                _avg_feature += _feature

            divider = len(features) if model_count == 1 else (len(features) /
                                                              2)
            _avg_feature /= divider

            classid = pkl.split('_')[0]
            avg_feature_dict[classid] = torch.FloatTensor(_avg_feature)

        prefix = feature_map_dir.split('/')[-1].split('_')[0]
        pickle_write(
            './results/temp/%s_avg_feature_for_each_class.pkl' % prefix,
            avg_feature_dict)
        print('Time for _calc_avg_feature_map: %.1f s' % (time.time() - t1))
        return avg_feature_dict
 def visualize_acc_for_every_class(self,
                                   pkl_path,
                                   acc_for_every_class=None):
     if acc_for_every_class is None:
         acc_for_every_class = pickle_read(pkl_path)
     data_dict = {'1CLASS': [], '2COUNT': [], '3ACCURACY': []}
     for obj in acc_for_every_class:
         data_dict['1CLASS'].append(obj['id'])
         data_dict['2COUNT'].append(obj['count'])
         data_dict['3ACCURACY'].append(obj['accuracy'] * 100)
     write_csv('%s.csv' % pkl_path, data_dict)
Ejemplo n.º 9
0
def get_feature_map_average(models,
                            sample_file_dir,
                            temp_prefix,
                            WIDTH,
                            HEIGHT,
                            weight_ls=None,
                            sample_num_each_cls=5,
                            ignore_limit=False):
    t1 = time.time()

    if not os.path.exists(sample_file_dir):
        print(
            'You should prepare base samples first, call prepare_base_picture_for_class may help.'
        )
        return None

    feature_map_dir = os.path.join('results', 'temp',
                                   temp_prefix + '_feature_pkls')
    if os.path.exists(feature_map_dir):
        shutil.rmtree(feature_map_dir)
    os.makedirs(feature_map_dir)

    name_to_id_dict = pickle_read('./constants/mapping_reverse_dict.pkl')

    for i, model in enumerate(models):
        class_count_dict = {}
        for cls_name in os.listdir(sample_file_dir):
            ground_truth_label = name_to_id_dict[
                cls_name] if cls_name in name_to_id_dict else cls_name

            if ground_truth_label not in class_count_dict:
                class_count_dict[ground_truth_label] = 0

            dir_full_path = os.path.join(
                sample_file_dir, cls_name)  # open the directory in order.

            for file_name in os.listdir(dir_full_path):
                if ignore_limit or class_count_dict[
                        ground_truth_label] < sample_num_each_cls:
                    file_full_path = os.path.join(dir_full_path, file_name)
                    feature_on_gpu = get_feature(file_full_path, model, WIDTH,
                                                 HEIGHT)
                    _write_feature_map(label=ground_truth_label,
                                       feature=feature_on_gpu,
                                       file_name=file_name,
                                       feature_map_dir=feature_map_dir,
                                       weight=weight_ls[i])
                    class_count_dict[ground_truth_label] += 1

    avg_feature_map = _calc_avg_feature_map(feature_map_dir, len(models))
    print('Time for get_feature_map_average: %.1f s' % (time.time() - t1))
    return avg_feature_map
Ejemplo n.º 10
0
	def write_feature_map(self, label, feature, file_name, feature_map_dir):
		if not os.path.exists(feature_map_dir):
			os.makedirs(feature_map_dir)
		feature_map_name = os.path.join(feature_map_dir, '%s_features.pkl' % label)
		if not os.path.exists(feature_map_name):
			obj = {
				file_name: feature
			}
		else:
			obj = pickle_read(feature_map_name)
			obj[file_name] = feature
		pickle_write(feature_map_name, obj)
		return feature
Ejemplo n.º 11
0
 def calc_top3_error(self, predict_dict_path, class_count_dict_path, seen,
                     epoch):
     # calc top 3 error
     predict_dict = pickle_read(predict_dict_path)
     class_count_dict = pickle_read(class_count_dict_path)
     id_name = pickle_read(
         '/home/ubuntu/Program/Dish_recognition/program/constants/mapping_dict.pkl'
     )
     for i in range(1, 95):
         key = '%d-%d' % (i, i)
         if key in predict_dict:
             acc = len(predict_dict[key]) / class_count_dict[str(i)]
             if acc < .8:
                 # start to calc the top 3 error
                 with open(
                         os.path.join(
                             'results', 'data_analyze',
                             '%s_top3_error.txt' %
                             self.prefix.split('/')[0]), 'ab+') as f:
                     acc_dict = {}
                     time_str = time.strftime("%Y-%m-%d %H:%M:%S",
                                              time.localtime())
                     c = '\r\n\r\n%s (%s)\r\nEposh[%d] Top 3 Error of Class: %d (Dish: %s)\r\n' % (
                         time_str, seen, epoch, i, id_name[str(i)])
                     f.write(c.encode())
                     for j in range(1, 95):
                         if i == j:
                             continue
                         key = '%d-%d' % (i, j)
                         if key in predict_dict:
                             acc = len(predict_dict[key]
                                       ) / class_count_dict[str(i)] * 100
                             acc_dict[str(j)] = acc
                     acc_dict = sorted(acc_dict.items(), key=lambda x: x[1])
                     acc_dict.reverse()
                     for cls_idx, acc in acc_dict[:3]:
                         c = '\tClass: %s (Dish: %s) : %.2f\r\n' % (
                             cls_idx, id_name[cls_idx], acc)
                         f.write(c.encode())
Ejemplo n.º 12
0
    def get_accuracy_for_every_class(self, pkl_path, seen='none'):
        predict_label_dict = pickle_read(
            './results/temp/%s_predict_label_dict_%s.pkl' %
            (self.prefix.split('/')[0], seen))
        class_count_dict = pickle_read(
            './results/temp/%s_class_count_dict_%s.pkl' %
            (self.prefix.split('/')[0], seen))

        class_count_dict = sorted(class_count_dict.items(), key=lambda x: x[1])
        accuracy_for_every_class = []
        mapping = pickle_read('./constants/mapping_dict.pkl')

        for classid, classcount in class_count_dict:
            key = '%s-%s' % (classid, classid)
            acc = len(predict_label_dict[key]
                      ) / classcount if key in predict_label_dict else 0
            accuracy_for_every_class.append({
                'id': classid,
                'accuracy': acc,
                'count': classcount
            })
        pickle_write(pkl_path, accuracy_for_every_class)
        return accuracy_for_every_class
Ejemplo n.º 13
0
def _write_feature_map(label, feature, file_name, feature_map_dir, weight=1.0):
    _zero = torch.Tensor([[.0 for i in range(2048)]]).cuda()
    _multiplier = torch.Tensor([[weight for i in range(2048)]]).cuda()
    _zero = torch.addcmul(_zero, 1, feature, _multiplier)

    if not os.path.exists(feature_map_dir):
        os.makedirs(feature_map_dir)
    feature_map_name = os.path.join(feature_map_dir, '%s_features.pkl' % label)
    if not os.path.exists(feature_map_name):
        obj = {file_name: _zero}
    else:
        obj = pickle_read(feature_map_name)
        obj[file_name] = _zero
    pickle_write(feature_map_name, obj)
    return _zero
Ejemplo n.º 14
0
def predict(model, feature_map, picture_path):
    """
    get prediction for on picture
    arguments:
        model: the used model
        feature_map_path: path to load the feature_map, feature_map is needed for prediction
        picture_path: the path of the picture to classify
    return:
        the predicted label
    """
    model.eval()
    if type(feature_map) == str:
        feature_map = pickle_read(feature_map)
    feature = get_feature(picture_path)
    pred_label, min_distance = evaluate_single_file(feature, feature_map)
    return pred_label, min_distance
Ejemplo n.º 15
0
 def __init__(self, model, sample_file_dir, test_dir, prefix, input_w, input_h,
             sample_num_each_cls=5,
             model=None,
             tablewares_mapping_path=None):
     self.model = model
     self.sample_file_dir = sample_file_dir
     self.test_dir = test_dir
     self.prefix = prefix
     self.input_w = input_w
     self.input_h = input_h
     self.sample_num_each_cls = sample_num_each_cls
     self.mapping = None
     if tablewares_mapping_path is not None :
         self.mapping = pickle_read(tablewares_mapping_path)
     self.feature_util = FeatureUtil(input_w, input_h)
     self.feature_map = None
Ejemplo n.º 16
0
 def __init__(self,
              sample_file_dir,
              test_dir,
              num_of_classes,
              prefix,
              WIDTH=None,
              HEIGHT=None):
     self.sample_file_dir = sample_file_dir
     self.test_dir = test_dir
     self.num_of_classes = num_of_classes
     self.prefix = prefix
     pkl_path = './constants/o_id_to_name.pkl' if self.num_of_classes > 50 else './constants/n_id_to_name.pkl'
     self.id_name_dict = pickle_read(pkl_path)
     if not os.path.exists(
             os.path.join('model', self.prefix, 'visualize_folder')):
         os.makedirs(os.path.join('model', self.prefix, 'visualize_folder'))
     if WIDTH is not None:
         self.WIDTH = WIDTH
     if HEIGHT is not None:
         self.HEIGHT = HEIGHT
Ejemplo n.º 17
0
    def _calc_true_avg_feature(self, feature_pkls_dir):
        t1 = time.time()
        feature_pkls = [
            x for x in os.listdir(feature_pkls_dir) if 'features.pkl' in x
        ]
        avg_feature_dict = {}
        for pkl in feature_pkls:
            features = pickle_read(os.path.join(feature_pkls_dir, pkl))
            features = list(features.values())
            _avg_feature = np.zeros(shape=features[0].shape)
            for _feature in features:
                _feature = _feature.cpu().detach().numpy()
                _avg_feature += _feature
            _avg_feature /= len(features)
            classid = pkl.split('_')[0]
            avg_feature_dict[classid] = torch.FloatTensor(_avg_feature)

        pickle_write(
            './results/temp/%s_true_avg_feature_for_each_class.pkl' %
            self.prefix.split('/')[0], avg_feature_dict)
        print('Time for _calc_true_avg_feature: %.1f s' % (time.time() - t1))
        return avg_feature_dict
Ejemplo n.º 18
0
def get_mapping_dict():
    return pickle_read('mapping_dict.pkl')
Ejemplo n.º 19
0
        test_index += 1

    print('training & test set prepared.')
    """
        train set path
        test set path
        base sample path
    """
    return


if __name__ == '__main__':
    # training_for_94_vs_54()
    # exit(1000)

    name_to_id_dict = pickle_read('../constants/mapping_reverse_dict.pkl')
    train_classes = [
        '洋葱',
        '日本豆腐',
        '粗海带炒肉',
        '炒饭',
        '煎饺',
        '肠粉',
        '小青椒炒蛋',
        '土豆鸡肉',
        '腐竹炒肉',
        '苦瓜炒肉',
        '大头菜',
        '炒腐皮',
        '蟹柳',
        '苦瓜包肉',
Ejemplo n.º 20
0
def get_mapping_list():
    return pickle_read('mapping_list.pkl')
Ejemplo n.º 21
0
def prepare_dataset_different_train_test(dir_list=None,
                                         train_limit=300,
                                         test_limit=100,
                                         train_classes=None,
                                         unseen_test_classes=None):
    # this function only use to get different distribution data
    if dir_list is None:
        dir_list = os.listdir(origin_data_root)

    train_index, test_index, unseen_test_index = 10000, 10000, 10000
    pic_sum_limit = 25  # > 25 will be choose.
    test_num_dict, train_num_dict = {}, {}

    clean_old_train_test_data()

    # first, prepare a training set with 2-level order, test set with 1-level order
    name_to_id_dict = pickle_read('../constants/mapping_reverse_dict.pkl')
    for index, cls_name in enumerate(dir_list):
        if '.zip' in cls_name:
            continue

        dir_full_path = os.path.join(origin_data_root, cls_name)
        pic_list = os.listdir(dir_full_path)

        random.shuffle(pic_list)

        feature_list = pic_list[:5]
        copy_files(src_dir=dir_full_path,
                   des_dir=output_sample_path,
                   pic_list=feature_list,
                   cls_idx=name_to_id_dict[cls_name])
        train_and_test_list = pic_list[5:]

        if (len(pic_list) < pic_sum_limit and train_classes is None) or (
                train_classes is not None and cls_name not in train_classes):
            print(cls_name, 'not included for training.')

            if (unseen_test_classes is not None
                    and cls_name in unseen_test_classes):
                for pic in train_and_test_list:
                    src_path = os.path.join(dir_full_path, pic)
                    output_name = str(
                        unseen_test_index
                    ) + '_' + name_to_id_dict[cls_name] + '.png'
                    img = cv2.imread(src_path)
                    cv2.imwrite(
                        os.path.join(output_test_unseen_path, output_name),
                        img)
                    unseen_test_index += 1

            continue

        if len(train_and_test_list) <= (train_limit + test_limit):
            mid = int(len(train_and_test_list) / 2)
            # train_list = train_and_test_list[:mid]
            test_list = train_and_test_list[mid:]
        else:
            test_list = train_and_test_list[:test_limit]
            # train_list = train_and_test_list[test_limit:]
        train_final_output_dir = output_train_path
        dir_path = os.path.join(distortion_train_path, cls_name)
        train_list = os.listdir(dir_path)

        for pic in train_list:
            src_path = os.path.join(dir_path, pic)
            output_name = str(
                train_index) + '_' + name_to_id_dict[cls_name] + '.png'
            img = cv2.imread(src_path)
            cv2.imwrite(os.path.join(train_final_output_dir, output_name), img)
            train_index += 1

        for pic in test_list:
            src_path = os.path.join(dir_full_path, pic)
            output_name = str(
                test_index) + '_' + name_to_id_dict[cls_name] + '.png'
            img = cv2.imread(src_path)
            cv2.imwrite(os.path.join(output_test_path, output_name), img)
            test_index += 1
    print('training & test set prepared.')
    return