def create_category_label(): samples = io.load_json(design_root + 'Label/ca_samples.json') cat_entry_list = io.load_str_list(ca_root + 'Anno/list_category_cloth.txt')[2::] cat_list = io.load_str_list(ca_root + 'Anno/list_category_img.txt')[2::] # create category entry cat_entry = [] for cat_str in cat_entry_list: cat_name = ' '.join(cat_str.split()[0:-1]) cat_type = int(cat_str.split()[-1]) cat_entry.append({'entry': cat_name, 'type': cat_type}) io.save_json(cat_entry, design_root + 'Label/cat_entry.json') print('create category entry') # create category label img2id = { s['img_path_org'][s['img_path_org'].find('img')::]: s_id for s_id, s in samples.iteritems() } cat_label = {} for idx, s in enumerate(cat_list): s = s.split() s_id = img2id[s[0]] cat = int(s[1]) - 1 cat_label[s_id] = cat io.save_data(cat_label, design_root + 'Label/ca_cat_label.pkl') print('create category label')
def create_debug_ca_dataset(): ''' Create a mini subset of Category_and_Attribute data. Assume standard CA index file and label files already exist. ''' num_train = 10 num_test = 10 same_train_test = True samples = io.load_json(design_root + 'Label/ca_samples.json') attr_label = io.load_data(design_root + 'Label/ca_attr_label.pkl') bbox_label = io.load_data(design_root + 'Label/ca_bbox_label_256.pkl') lm_label = io.load_data(design_root + 'Label/ca_landmark_label_256.pkl') if same_train_test: id_list = samples.keys()[0:num_train] split = {'train': id_list, 'test': id_list} else: id_list = samples.keys()[0:(num_train + num_test)] split = {'train': id_list[0:num_train], 'test': id_list[num_train::]} samples = {s_id:samples[s_id] for s_id in id_list} attr_label = {s_id:attr_label[s_id] for s_id in id_list} bbox_label = {s_id:bbox_label[s_id] for s_id in id_list} lm_label = {s_id:lm_label[s_id] for s_id in id_list} io.save_json(samples, design_root + 'Label/debugca_samples.json') io.save_data(attr_label, design_root + 'Label/debugca_attr_label.pkl') io.save_data(bbox_label, design_root + 'Label/debugca_bbox_label.pkl') io.save_data(lm_label, design_root + 'Label/debugca_landmark_label.pkl') io.save_json(split, design_root + 'Split/debugca_split.json')
def create_attr_entry(): ''' Create attribute entry list, which contains original 1000 attributes used in Category_and_Attribute benchmark ''' print('loading data...') attr_entry_list = io.load_str_list(ca_root + 'Anno/list_attr_cloth.txt')[2::] attr_label = io.load_data(design_root + 'Label/ca_attr_label.pkl') split = io.load_json(design_root + 'Split/ca_split.json') train_ids = set(split['train']) attr_mat = np.array( [v for k, v in attr_label.iteritems() if k in train_ids], dtype=np.float32) print('computing positive rates') num_sample = len(train_ids) pos_rate = attr_mat.sum(axis=0) / num_sample attr_entry = [] for idx, s in enumerate(attr_entry_list): s = s.split() attr_name = ' '.join(s[0:-1]) attr_type = int(s[-1]) attr_entry.append({ 'entry': attr_name, 'type': attr_type, 'pos_rate': pos_rate[idx] }) io.save_json(attr_entry, design_root + 'Label/attr_entry.json')
def create_split(): ''' Create split following the original partition ''' split_list = io.load_str_list(ca_root + 'Eval/list_eval_partition.txt')[2:] split = {'train': [], 'val': [], 'test': []} samples = io.load_json(design_root + 'Label/ca_samples.json') img2id = { s['img_path_org'][s['img_path_org'].find('img')::]: s_id for s_id, s in samples.iteritems() } for s in split_list: img_path, status = s.split() s_id = img2id[img_path] split[status].append(s_id) io.mkdir_if_missing(design_root + 'Split') io.save_json(split, design_root + 'Split/ca_split.json') print('create split') for status in ['train', 'val', 'test']: print('%s: %d' % (status, len(split[status]))) split_trainval = { 'train': split['train'] + split['val'], 'test': split['test'] } io.save_json(split_trainval, design_root + 'Split/ca_split_trainval.json')
def create_attr_label(): # attr_list = io.load_str_list(root + 'Anno/list_attr_cloth.txt')[2::] # attr_anno = io.load_str_list(root + 'Anno/list_attr_items.txt')[2::] # fn_out = root + 'Label/attribute_inshop.json' # num_attr = 463 # n_top = 5 attr_list = io.load_str_list('/data2/ynli/datasets/DeepFashion/Category_and_Attribute/Anno/list_attr_cloth.txt')[2::] attr_list = [' '.join(a.split()[0:-1]) for a in attr_list] attr_anno = io.load_str_list('/data2/ynli/datasets/DeepFashion/Category_and_Attribute/Anno/list_attr_img.txt')[2::] fn_out = root + 'Label/attribute_ca.json' num_attr = 1000 n_top = 5 # create label data if not os.path.isfile(fn_out): attr_data = {} for line in attr_anno: line = line.split() item_id = line[0] label = [int(c) for c in line[1::]] assert len(label) == num_attr attr_data[item_id] = label io.save_json(attr_data, fn_out) else: attr_data = io.load_json(fn_out) num_sample = len(attr_data) # most frequent attribute in each attribute type attr_list_ref = io.load_str_list('/data2/ynli/datasets/DeepFashion/Category_and_Attribute/Anno/list_attr_cloth.txt')[2::] attr_type = {' '.join(a.split()[0:-1]): a.split()[-1] for a in attr_list_ref} attr_mat = np.array(attr_data.values(), dtype = float) attr_count = np.where(attr_mat > 0, 1, 0).sum(axis = 0) attr_count_type = {} for i, attr_name in enumerate(attr_list): t = attr_type[attr_name] if attr_name in attr_type else '-1' if t not in attr_count_type: attr_count_type[t] = [] attr_count_type[t].append((attr_name, attr_count[i])) for t in {'1', '2', '3', '4', '5', '-1'}: if t not in attr_count_type: continue attr_count_list = attr_count_type[t] attr_count_list.sort(key = lambda x: x[1], reverse = True) print('attribute type: %s' % t) for attr_name, count in attr_count_list[0:n_top]: print('%s: %d (%.1f %%)' % (attr_name, count, 100. * count / num_sample)) print('\n')
def create_test_pair(): np.random.rand(0) split = io.load_json(design_root + 'Split/ca_gan_split_trainval_upper.json') cat_label = io.load_data(design_root + 'Label/ca_cat_label.pkl') cat_entry = io.load_json(design_root + 'Label/cat_entry.json') # group samples by category label cat_to_ids = defaultdict(lambda: []) for s_id in split['test']: c = cat_label[s_id] cat_to_ids[c].append(s_id) n = 0 pair_list = [] for c, s_list in cat_to_ids.iteritems(): print('[%d/%d] %s: %d samples...' % (n, len(cat_to_ids), cat_entry[c]['entry'], len(s_list))) n += 1 s_list_org = [s_id for s_id in s_list] for i in range(len(s_list) - 1): j = np.random.randint(i + 1, len(s_list)) temp = s_list[i] s_list[i] = s_list[j] s_list[j] = temp pair_list += zip(s_list_org, s_list) pair_dict = {s_tar: s_src for s_tar, s_src in pair_list} io.save_json(pair_dict, design_root + 'Temp/ca_test_tps_pair.json') io.save_str_list(pair_dict.keys(), design_root + 'Temp/ca_test_tps_tar.txt') io.save_str_list(pair_dict.values(), design_root + 'Temp/ca_test_tps_src.txt')
def divide_vert_into_bodypart(): ''' Devide 6890 verts of a SMPL model into 24 parts, each part corresponding to a joint. A vert will be assigned to the joint with the largest vert-to-angle weight. May need to run this function under HMR environment ''' smpl_dict = io.load_data('scripts/3d/neutral_smpl_with_cocoplus_reg.pkl') weights = smpl_dict['weights'] vert2part = weights.argmax(axis=1).tolist() io.save_json(vert2part, 'scripts/3d/smpl_vert_to_bodypart.json')
def save(self, fn=None): if self.opt is None: raise Exception("parse options before saving!") if fn is None: expr_dir = os.path.join('checkpoints', self.opt.id) io.mkdir_if_missing(expr_dir) if self.opt.is_train: fn = os.path.join(expr_dir, 'train_opt.json') else: fn = os.path.join(expr_dir, 'test_opt.json') io.save_json(vars(self.opt), fn)
def load_test_pair_index(): num_pair = 12800 pair_index = io.load_data( 'datasets/DF_Pose/Anno/NIPS17-test/p_pair_test.p' ) # test index used in NIPS17 paper: Pose Guided Person Image Generation pair_split = io.load_json('datasets/DF_Pose/Label/pair_split.json') # store previous generated pairs pair_split['test_disordered_pair'] = pair_split['test'] # use pair indexes provided in NIPS17 paper pair_split['test'] = [[s1[0:-4], s2[0:-4]] for s1, s2 in pair_index[0:num_pair]] np.random.shuffle(pair_split['test']) io.save_json(pair_split, 'datasets/DF_Pose/Label/pair_split.json')
def create_inner_edge_map(): ''' extract the edges inside the clothing regions ''' # config kernel_size = 7 threshold = 0 split = io.load_json(design_root + 'Split/ca_gan_split_trainval.json') id_list = split['train'] + split['test'] edge_root = design_root + 'Img/edge_ca_256' seg_root = design_root + 'Img/seg_ca_256' output_dir = design_root + 'Img/edge_ca_256_inner' io.mkdir_if_missing(output_dir) kernel = np.zeros((kernel_size, kernel_size), np.uint8) k = (kernel_size - 1) / 2 for i in range(kernel_size): for j in range(kernel_size): if np.abs(i - k) + np.abs(j - k) <= k: kernel[i, j] = 1 for i, s_id in enumerate(id_list): edge = image.imread(os.path.join(edge_root, s_id + '.jpg'), 'grayscale') seg = image.imread(os.path.join(seg_root, s_id + '.bmp'), 'grayscale') mask_upper = cv2.erode((seg == 3).astype(np.uint8), kernel) mask_lower = cv2.erode((seg == 4).astype(np.uint8), kernel) mask = mask_upper | mask_lower edge_inner = edge * mask edge_inner = (edge_inner >= threshold).astype(np.uint8) * edge_inner image.imwrite(edge_inner, os.path.join(output_dir, s_id + '.jpg')) print('extracting inner edge %d / %d' % (i, len(id_list))) # create labels edge_paths = { s_id: os.path.join(output_dir, s_id + '.jpg') for s_id in id_list } split_debug = io.load_json(design_root + 'Split/debugca_gan_split.json') edge_paths_debug = { s_id: p for s_id, p in edge_paths.iteritems() if s_id in split_debug['train'] + split_debug['test'] } io.save_json(edge_paths, design_root + 'Label/ca_edge_inner_paths.json') io.save_json(edge_paths_debug, design_root + 'Label/debugca_edge_inner_paths.json')
def create_image_info(): ''' create a .mat file containing: - id_1 - id_2 - image_1 - image_2 - image_gen (generated by PoseTranfer_x) ''' image_dir = '/data2/ynli/datasets/DF_Pose/Img/img_df/' model_id = 'PoseTransfer_7.5' image_gen_dir = '/data2/ynli/Fashion/fashionHD/checkpoints/%s/test/' % model_id pair_indices = io.load_json( 'datasets/DF_Pose/Label/pair_split.json')['test'][0:num_sample] pose_label = io.load_data('datasets/DF_Pose/Label/pose_label.pkl') id_1 = [p[0] for p in pair_indices] id_2 = [p[1] for p in pair_indices] image_1 = [] image_2 = [] image_gen = [] scale_2over1 = [] for i in range(num_sample): image_1.append(image_dir + id_1[i] + '.jpg') image_2.append(image_dir + id_2[i] + '.jpg') image_gen.append(image_gen_dir + '%s_%s.jpg' % (id_1[i], id_2[i])) pose_1 = np.array(pose_label[id_1[i]]) pose_2 = np.array(pose_label[id_2[i]]) scale_2over1.append(pose_util.relative_scale_from_pose(pose_2, pose_1)) image_info = { 'id_1': id_1, 'id_2': id_2, 'image_1': image_1, 'image_2': image_2, 'image_gen': image_gen, 'model_id': model_id, 'scale_2over1': scale_2over1 } data_dict = { k: np.array(v, dtype=np.object) for k, v in image_info.iteritems() } io.save_json(image_info, 'temp/patch_matching/label/image_info.json' ) # for other functions in this script scipy.io.matlab.savemat( 'temp/patch_matching/label/image_info.mat', data_dict ) # for PatchMatch matlab tools and other matlab implementions
def create_sample_index_for_attribute_dataset(): ''' Simply add "img_path" field for each sample, which is the image path used in training and testing ''' # config img_root = root + 'Img/img_aligned/' fn_out = root + 'Label/samples_attr.json' # update sample samples = io.load_json(root + 'Label/samples.json') for s_id in samples.keys(): samples[s_id]['img_path'] = img_root + s_id + '.jpg' io.save_json(samples, fn_out)
def create_split(): ''' create split file. follow the partition used in VITON paper ''' train_pairs = io.load_str_list(zalando_root + 'Source/viton_train_pairs.txt') test_piars = io.load_str_list(zalando_root + 'Source/viton_test_pairs.txt') split = {} for subset, pairs in [('train', train_pairs), ('test', test_piars)]: id_list = [p.split()[0][0:6] for p in pairs] split[subset] = id_list split['debug'] = split['train'][0:32] io.mkdir_if_missing(zalando_root + 'Split') io.save_json(split, zalando_root + 'Split/zalando_split.json')
def create_edge_path(): edge_root = design_root + 'Img/edge_ca_256' samples = io.load_json(design_root + 'Label/ca_samples.json') split = io.load_json(design_root + 'Split/ca_gan_split_trainval.json') edge_path = { s_id: os.path.join(edge_root, s_id + '.jpg') for s_id in split['train'] + split['test'] } io.save_json(edge_path, design_root + 'Label/ca_edge_paths.json') split = io.load_json(design_root + 'Split/debugca_gan_split.json') edge_path = { s_id: os.path.join(edge_root, s_id + '.jpg') for s_id in split['train'] + split['test'] } io.save_json(edge_path, design_root + 'Label/debugca_edge_paths.json')
def gather_tps_pair(): split = io.load_json(design_root + 'Split/ca_gan_split_trainval_upper.json') tps_pair = io.load_json(design_root + 'Temp/ca_train_tps_pair.json') tps_pair.update(io.load_json(design_root + 'Temp/ca_test_tps_pair.json')) io.save_json(tps_pair, design_root + 'Label/ca_tps_pair.json') print(len(split)) print(len(tps_pair)) img_dir = design_root + 'Img/edge_ca_256_tps/' missing_list = [] for i, (tar_id, src_id) in enumerate(tps_pair.items()): print('%d/%d' % (i, len(tps_pair))) fn_old = img_dir + tar_id + '.jpg' fn_new = img_dir + tar_id + '_' + src_id + '.jpg' if os.path.isfile(fn_old): shutil.move(fn_old, fn_new) else: missing_list.append(tar_id) print(missing_list)
def create_gan_split(): ''' create split for gan training ca_gan: containing all frontal images ca_gan_upper: containing all frontial, upperbody images ''' samples = io.load_json(design_root + 'Label/ca_samples.json') split = io.load_json(design_root + 'Split/ca_split_trainval.json') # use same image set as in Zhu Shizhan's ICCV17 FashionGAN paper img_list = io.load_str_list( 'datasets/DeepFashion/Fashion_synthesis/data_release/benchmark/name_list.txt' ) img_set = set(img_list) split_gan = {'train': [], 'test': []} split_gan_upper = {'train': [], 'test': []} for set_name in ['train', 'test']: for s_id in split[set_name]: s = samples[s_id] img_name = s['img_path_org'] img_name = img_name[img_name.find('img/')::] if img_name in img_set: split_gan[set_name].append(s_id) if s['cloth_type'] == 1: split_gan_upper[set_name].append(s_id) print('create split "split_gan"') print('train: %d, test: %d, total: %d' % (len(split_gan['train']), len(split_gan['test']), len(split_gan['train']) + len(split_gan['test']))) print('create split "split_gan_upper"') print('train: %d, test: %d, total: %d' % (len(split_gan_upper['train']), len(split_gan_upper['test']), len(split_gan_upper['train']) + len(split_gan['test']))) io.save_json(split_gan, design_root + 'Split/ca_gan_split_trainval.json') io.save_json(split_gan_upper, design_root + 'Split/ca_gan_split_trainval_upper.json')
def clean_attribute_label(): ''' Clean attribute labels created by create_attribute_label(). - Remove attribute entries with only few samples - Compute positive rate for each attribute ''' # config attr_top = 500 # load attribute entry and label print('loading attribute label') attr_entry = io.load_json(root + 'Label/attribute_entry.json') attr_label = io.load_json(root + 'Label/attribute_label.json') # count positive sample number of each attribute print('select top %d attributes' % attr_top) attr_mat = np.array(attr_label.values()) attr_pos_num = attr_mat.sum(axis=0) attr_order = np.argsort(attr_pos_num * -1)[0:attr_top] attr_entry_t = [attr_entry[idx] for idx in attr_order] s_ids = attr_label.keys() attr_mat_t = attr_mat[:, attr_order] attr_label_t = dict(zip(s_ids, attr_mat_t.tolist())) attr_pos_num_t = attr_pos_num[attr_order].tolist() # compute positive rate for each attribute num_sample = len(attr_label) for idx, att in enumerate(attr_entry_t): attr_entry_t[idx]['pos_rate'] = 1.0 * attr_pos_num_t[idx] / num_sample # output print('saving cleaned attribute label') fn_entry = root + 'Label/attribute_entry_top%d.json' % attr_top io.save_json(attr_entry_t, fn_entry) fn_label = root + 'Label/attribute_label_top%d.json' % attr_top io.save_json(attr_label_t, fn_label)
def parse(self, ord_str=None, save_to_file=True, display=True, set_gpu=True): if not self.initialized: self.initialize() if ord_str is None: self.opt = self.parser.parse_args() else: ord_list = ord_str.split() self.opt = self.parser.parse_args(ord_list) self.auto_set() if len(self.opt.gpu_ids) > 0 and set_gpu: os.environ['CUDA_VISIBLE_DEVICES'] = ','.join( [str(i) for i in self.opt.gpu_ids]) self.opt.gpu_ids = range(len(self.opt.gpu_ids)) torch.cuda.set_device(0) args = vars(self.opt) # display options if display: print('------------ Options -------------') for k, v in sorted(args.items()): print('%s: %s' % (str(k), str(v))) print('-------------- End ----------------') # save to disk if save_to_file: expr_dir = os.path.join('checkpoints', self.opt.id) io.mkdir_if_missing(expr_dir) if self.opt.is_train: fn_out = os.path.join(expr_dir, 'train_opt.json') else: fn_out = os.path.join(expr_dir, 'test_opt.json') io.save_json(args, fn_out) return self.opt
def create_vis_pair(): ''' select a small group of targets from test set. prepare several edge source items for each target. ''' np.random.rand(0) num_tar = 500 num_src_per_tar = 6 split = io.load_json(design_root + 'Split/ca_gan_split_trainval_upper.json') cat_label = io.load_data(design_root + 'Label/ca_cat_label.pkl') cat_entry = io.load_json(design_root + 'Label/cat_entry.json') id_list = split['test'] # group samples by category label cat_to_ids = defaultdict(lambda: []) for s_id in split['test']: c = cat_label[s_id] cat_to_ids[c].append(s_id) n = 0 # target list np.random.shuffle(id_list) tar_list = id_list[0:num_tar] # select src for each target group_dict = {} for tar_id in tar_list: c = cat_label[tar_id] src_list = [s_id for s_id in cat_to_ids[c] if s_id != tar_id] np.random.shuffle(src_list) group_dict[tar_id] = src_list[0:num_src_per_tar] io.save_json(group_dict, design_root + 'Temp/ca_vis_tps_group.json') output_src_list = [] output_tar_list = [] for tar_id, src_list in group_dict.iteritems(): output_tar_list += [tar_id] * len(src_list) output_src_list += src_list io.save_str_list(output_tar_list, design_root + 'Temp/ca_vis_tps_tar.txt') io.save_str_list(output_src_list, design_root + 'Temp/ca_vis_tps_src.txt')
def create_pair_index(): ''' create pair index ''' from itertools import combinations split = io.load_json(root + 'Label/split.json') pair_split = {'train': [], 'test': []} for subset in ['train', 'test']: cloth = defaultdict(lambda: []) for s_id in split[subset]: cloth[s_id[0:5]].append(s_id) for group in cloth.values(): pair_split[subset] += combinations(group, 2) np.random.shuffle(pair_split['train']) np.random.shuffle(pair_split['test']) pair_split['debug'] = pair_split['train'][0:32] io.save_json(pair_split, root + 'Label/pair_split.json') print('train pair: %d' % len(pair_split['train'])) print('test pair: %d' % len(pair_split['test']))
def create_color_entry_and_label(): ''' Create color attribute entries and color labels ''' print('loading data') # load description desc_list = io.load_json(inshop_root + 'Anno/list_description_inshop.json') item2color = { d['item']: d['color'].lower().replace('-', ' ').split() for d in desc_list } colors = set([c[0] for c in item2color.values() if len(c) == 1]) color_entry = [{'entry': c, 'type': 0, 'pos_rate': -1} for c in colors] # load sample index samples = io.load_json(design_root + 'Label/inshop_samples.json') split = io.load_json(design_root + 'Split/inshop_split.json') train_ids = set(split['train']) print('computing positive rates') color_label = {} for s_id, s in samples.iteritems(): color = item2color[s['item_id']] label = [1 if c['entry'] in color else 0 for c in color_entry] color_label[s_id] = label color_mat = np.array( [v for k, v in color_label.iteritems() if k in train_ids], dtype=np.float32) num_sample = len(train_ids) pos_rate = (color_mat.sum(axis=0) / num_sample).tolist() for idx, att in enumerate(color_entry): color_entry[idx]['pos_rate'] = pos_rate[idx] print('saving data') io.save_json(color_entry, design_root + 'Label/color_entry.json') io.save_data(color_label, design_root + 'Label/inshop_attr_label.pkl')
def create_image_index(): ''' create image index, split and pose label from original index.p file ''' index = io.load_data(root + 'Anno/index.pkl') split = {'train': [], 'test': []} pose_label = {} for img, joints, is_train in zip(index['imgs'], index['joints'], index['train']): s_id = img.split('/')[1][0:7] if is_train: split['train'].append(s_id) else: split['test'].append(s_id) for i in range(len(joints)): if not (joints[i][0] == -1 and joints[i][1] == -1): joints[i][0] *= 256 joints[i][1] *= 256 pose_label[s_id] = joints.tolist() io.save_json(split, root + 'Label/split.json') io.save_data(pose_label, root + 'Label/pose_label.pkl')
def create_color_attribute_label(): ''' attributes related to color attribute types: 1-texture, 2-fabrix, 3-shape, 4-part, 5-style ''' attr_entry = io.load_json(design_root + 'Label/attr_entry.json') attr_label = io.load_data(design_root + 'Label/ca_attr_label.pkl') index = [ i for i, entry in enumerate(attr_entry) if entry['type'] in {1, 2, 5} ] n = 0 new_attr_label = {} for s_id, label in attr_label.iteritems(): new_attr_label[s_id] = [label[i] for i in index] n += 1 print('%d/%d' % (n, len(attr_label))) new_attr_entry = [attr_entry[i] for i in index] print('%d color-related attributes' % len(index)) io.save_data(new_attr_label, design_root + 'Label/ca_color_attr_label.pkl') io.save_json(new_attr_entry, design_root + 'Label/color_attr_entry.json')
def merge_seg_map(): ''' input seg map: 0-background, 1-hair, 2-head, 3-upperbody, 4-lowerbody, 5-leg, 6-arm ''' # config seg_root = '/data2/ynli/Fashion/ICCV17-fashionGAN/complete_demo/output/img_ca_256/seg_7' tar_root = 'datasets/DeepFashion/Fashion_design/Img/seg_ca_256' io.mkdir_if_missing(tar_root) samples = io.load_json( 'datasets/DeepFashion/Fashion_design/Label/ca_samples.json') seg_map_paths = {} for i, (s_id, s) in enumerate(samples.items()): seg_org = image.imread(os.path.join(seg_root, s_id + '.bmp'), mode='grayscale') # assert seg_org if s['cloth_type'] == 1: seg_mrg = (seg_org == 3).astype(np.uint8) elif s['cloth_type'] == 2: seg_mrg = (seg_org == 4).astype(np.uint8) else: seg_mrg = np.logical_or(seg_org == 3, seg_org == 4).astype(np.uint8) fn_out = os.path.join(tar_root, s_id + '.bmp') image.imwrite(seg_mrg, fn_out) seg_map_paths[s_id] = fn_out print('\rmerge segmentation map: %d / %d' % (i, len(samples))) print('\n') io.save_json( seg_map_paths, 'datasets/DeepFashion/Fashion_design/Label/ca_seg_paths.json')
def create_dug_ca_gan_dataset(): num_train = 10 num_test = 10 same_train_test = True samples = io.load_json(design_root + 'Label/ca_samples.json') attr_label = io.load_data(design_root + 'Label/ca_attr_label.pkl') bbox_label = io.load_data(design_root + 'Label/ca_bbox_label_256.pkl') lm_label = io.load_data(design_root + 'Label/ca_landmark_label_256.pkl') seg_path_list = io.load_json(design_root + 'Label/ca_seg_paths.json') ca_split = io.load_json(design_root + 'Split/ca_gan_split_trainval_upper.json') if same_train_test: split = { 'train': ca_split['train'][0:num_train], 'test': ca_split['train'][0:num_train] } id_list = split['train'] else: split = { 'train': ca_split['train'][0:num_train], 'test': ca_split['test'][0:num_test] } id_list = split['train'] + split['test'] samples = {s_id: samples[s_id] for s_id in id_list} attr_label = {s_id:attr_label[s_id] for s_id in id_list} bbox_label = {s_id:bbox_label[s_id] for s_id in id_list} lm_label = {s_id:lm_label[s_id] for s_id in id_list} seg_path_list = {s_id: seg_path_list[s_id] for s_id in id_list} io.save_json(samples, design_root + 'Label/debugca_gan_samples.json') io.save_data(attr_label, design_root + 'Label/debugca_gan_attr_label.pkl') io.save_data(bbox_label, design_root + 'Label/debugca_gan_bbox_label.pkl') io.save_data(lm_label, design_root + 'Label/debugca_gan_landmark_label.pkl') io.save_json(seg_path_list, design_root + 'Label/debugca_seg_paths.json') io.save_json(split, design_root + 'Split/debugca_gan_split.json')
def align_dataset_index(): ''' Align the DeepFashion Inshop sample indices between VUnet version and DeformableGAN version ''' # load sample index 1 (VUnet version) split = io.load_json('datasets/DF_Pose/Label/split.json') img_dir_1 = 'datasets/DF_Pose/Img/img_df/' samples_1 = {sid:(img_dir_1 + '%s.jpg'%sid) for sid in split['train']+split['test']} # load sample index 2 (deformableGAN version) train_list_2 = pd.read_csv('datasets/DF_Pose/DeformableGAN_Version/annotations/fasion-annotation-train.csv', sep=':')['name'] train_dir_2 = 'datasets/DF_Pose/DeformableGAN_Version/images/train/' test_list_2 = pd.read_csv('datasets/DF_Pose/DeformableGAN_Version/annotations/fasion-annotation-test.csv', sep=':')['name'] test_dir_2 = 'datasets/DF_Pose/DeformableGAN_Version/images/test/' samples_2 = [(fn, train_dir_2+fn) for fn in train_list_2] + [(fn, test_dir_2+fn) for fn in test_list_2] samples_2 = dict(samples_2) # debug # samples_1 = {k:v for k,v in samples_1.items()[0:100]} # samples_2 = samples_1 # samples_2 = {k:v for k,v in samples_2.items()[0:100]} # load images into memory print('loading images 1 ...') images_1 = {} for sid, fn in tqdm.tqdm(samples_1.items()): images_1[sid] = imageio.imread(fn) images_2 = {} print('loading images 2 ...') for sid, fn in tqdm.tqdm(samples_2.items()): images_2[sid] = imageio.imread(fn) # group image by identity. alignment will be applied between identities instead of single images for efficiency. persion_ids_1 = defaultdict(lambda :{}) for sid in samples_1.keys(): pid = sid.split('_')[0] view = sid.split('_')[1][0] persion_ids_1[pid][view] = sid persion_ids_2 = defaultdict(lambda: {}) for sid in samples_2.keys(): pid = sid.split('_')[0] view = sid.split('_')[1][0] persion_ids_2[pid][view] = sid # align index print('aligning image index ...') num_cand = 3 func_similarity = similarity_cos map_1to2 = {} map_1to2_cand = {} for pid_1, s_dict_1 in tqdm.tqdm(persion_ids_1.items()): # for pid_1, s_dict_1 in persion_ids_1.items(): cand_list = [(None, -1)] s_list_1 = s_dict_1.items() s_list_1.sort() view_1, sid_1 = s_list_1[0] for pid_2, s_dict_2 in tqdm.tqdm(persion_ids_2.items()): # for pid_2, s_dict_2 in persion_ids_2.items(): if view_1 not in s_dict_2: continue sid_2 = s_dict_2[view_1] score = func_similarity(images_1[sid_1], images_2[sid_2]) i_insert = -1 for i in range(len(cand_list)-1, -1, -1): if score < cand_list[i][1]: break i_insert = i if i_insert >= 0: cand_list.insert(i_insert, (sid_2, score)) if len(cand_list) > num_cand: cand_list = cand_list[0:num_cand] map_1to2_cand[pid_1] = cand_list pid_2 = cand_list[0][0].split('_')[0] s_dict_2 = persion_ids_2[pid_2] for view_1, sid_1 in s_dict_1.iteritems(): if view_1 in s_dict_2: map_1to2[sid_1] = s_dict_2[view_1] else: map_1to2[sid_1] = None # output result output_dir = 'temp/search_image/DeepFashion_Inshop_VUnetversion_to_DeformableGANversion/' io.mkdir_if_missing(output_dir) io.save_json(map_1to2, output_dir + 'map.json') io.save_json(map_1to2_cand, output_dir + 'map_cand.json') # output samples io.mkdir_if_missing(output_dir + 'vis') print('output visualization ...') for sid_1, sid_2 in tqdm.tqdm(map_1to2.iteritems()): if sid_2 is not None: fn = output_dir + 'vis/%s-%s.jpg'%(sid_1, sid_2) img = np.hstack((images_1[sid_1], images_2[sid_2])) imageio.imwrite(fn, img)
def align_dataset_index_bat(): ''' Align the DeepFashion Inshop sample indices between VUnet version and DeformableGAN version ''' # load sample index 1 (VUnet version) split = io.load_json('datasets/DF_Pose/Label/split.json') img_dir_1 = 'datasets/DF_Pose/Img/img_df/' samples_1 = {sid:(img_dir_1 + '%s.jpg'%sid) for sid in split['train']+split['test']} # load sample index 2 (deformableGAN version) train_list_2 = pd.read_csv('datasets/DF_Pose/DeformableGAN_Version/annotations/fasion-annotation-train.csv', sep=':')['name'] train_dir_2 = 'datasets/DF_Pose/DeformableGAN_Version/images/train/' test_list_2 = pd.read_csv('datasets/DF_Pose/DeformableGAN_Version/annotations/fasion-annotation-test.csv', sep=':')['name'] test_dir_2 = 'datasets/DF_Pose/DeformableGAN_Version/images/test/' samples_2 = [(fn, train_dir_2+fn) for fn in train_list_2] + [(fn, test_dir_2+fn) for fn in test_list_2] samples_2 = dict(samples_2) # debug # samples_1 = {k:v for k,v in samples_1.items()[0:100]} # samples_2 = samples_1 # samples_2 = {k:v for k,v in samples_2.items()[0:200]} # load images into memory print('loading images 1 ...') images_1 = {} for sid, fn in tqdm.tqdm(samples_1.items()): images_1[sid] = imageio.imread(fn) images_2 = {} print('loading images 2 ...') for sid, fn in tqdm.tqdm(samples_2.items()): images_2[sid] = imageio.imread(fn) # group image by identity. alignment will be applied between identities instead of single images for efficiency. persion_ids_1 = defaultdict(lambda :{}) for sid in samples_1.keys(): pid = sid.split('_')[0] view = sid.split('_')[1][0] persion_ids_1[pid][view] = sid persion_ids_2 = defaultdict(lambda: {}) for sid in samples_2.keys(): pid = sid.split('_')[0] view = sid.split('_')[1][0] persion_ids_2[pid][view] = sid # align index print('aligning image index ...') num_cand = 3 block_size = 1000 func_similarity = similarity_l1_bat map_1to2 = {} map_1to2_cand = {} for pid_1, s_dict_1 in tqdm.tqdm(persion_ids_1.items()): # for pid_1, s_dict_1 in persion_ids_1.items(): s_list_1 = s_dict_1.items() s_list_1.sort() view_1, sid_1 = s_list_1[0] sid_list_2 = [p[view_1] for p in persion_ids_2.values() if view_1 in p] img_1 = images_1[sid_1] imgs_2 = np.array([images_2[sid_2] for sid_2 in sid_list_2]) scores = np.array([]) for i in range(0, imgs_2.shape[0], block_size): imgs_2_block = imgs_2[i:(i+block_size),:] scores_block = func_similarity(img_1, imgs_2_block).flatten() scores = np.concatenate((scores, scores_block)) sorted_indices = np.argsort(scores)[::-1] cand_list = [(sid_list_2[i], scores[i]) for i in sorted_indices[0:num_cand]] map_1to2_cand[pid_1] = cand_list pid_2 = cand_list[0][0].split('_')[0] s_dict_2 = persion_ids_2[pid_2] for view_1, sid_1 in s_dict_1.iteritems(): if view_1 in s_dict_2: map_1to2[sid_1] = s_dict_2[view_1] else: map_1to2[sid_1] = None # output result output_dir = 'temp/search_image/DeepFashion_Inshop_VUnetversion_to_DeformableGANversion_bat_l1/' io.mkdir_if_missing(output_dir) io.save_json(map_1to2, output_dir + 'map.json') io.save_json(map_1to2_cand, output_dir + 'map_cand.json') # output samples io.mkdir_if_missing(output_dir + 'vis') print('output visualization ...') for sid_1, sid_2 in tqdm.tqdm(map_1to2.iteritems()): if sid_2 is not None: fn = output_dir + 'vis/%s-%s.jpg'%(sid_1, sid_2) img = np.hstack((images_1[sid_1], images_2[sid_2])) imageio.imwrite(fn, img)
def create_sample_index_and_label(): ''' Create sample index and label for Category_and_Attribute data - sample index - landmark label - bbox label - attribute label ''' # config dir_label = design_root + 'Label/' # create sample index and landmark label landmark_list = io.load_str_list(ca_root + 'Anno/list_landmarks.txt')[2::] img_root_org = ca_root + 'Img/' samples = {} landmark_label = {} for idx, s in enumerate(landmark_list): img_id = 'ca_' + str(idx) s = s.split() img_path_org = os.path.join(img_root_org, s[0]) # 1: upper-body, 2: lower-body, 3: full-body cloth_type = int(s[1]) pose_type = -1 lm_str = s[2::] if cloth_type == 1: assert len(lm_str) == 18 elif cloth_type == 2: assert len(lm_str) == 12 elif cloth_type == 3: assert len(lm_str) == 24 # lm is a list: [(x_i, y_i, v_i)] lm = [(float(lm_str[i + 1]), float(lm_str[i + 2]), int(lm_str[i])) for i in range(0, len(lm_str), 3)] samples[img_id] = { 'img_id': img_id, 'cloth_type': cloth_type, 'pose_type': pose_type, 'img_path_org': img_path_org } landmark_label[img_id] = lm io.mkdir_if_missing(dir_label) io.save_json(samples, os.path.join(dir_label, 'ca_samples.json')) io.save_data(landmark_label, os.path.join(dir_label, 'ca_landmark_label.pkl')) print('create sample index (%d samples)' % len(samples)) print('create landmark label') img2id = { s['img_path_org'][s['img_path_org'].find('img')::]: s_id for s_id, s in samples.iteritems() } # create bbox label bbox_list = io.load_str_list(ca_root + 'Anno/list_bbox.txt')[2::] bbox_label = {} for s in bbox_list: s = s.split() assert len(s) == 5 s_id = img2id[s[0]] bbox = [float(x) for x in s[1::]] bbox_label[s_id] = bbox io.save_data(bbox_label, os.path.join(dir_label, 'ca_bbox_label.pkl')) print('create bbox label') # create attribute label attr_list = io.load_str_list(ca_root + 'Anno/list_attr_img.txt')[2::] attr_label = {} for idx, s in enumerate(attr_list): s = s.split() s_id = img2id[s[0]] att = [1 if c == '1' else 0 for c in s[1::]] assert len(att) == 1000 attr_label[s_id] = att print('\rcreating attribute label %d / %d' % (idx, len(attr_list)), end='') io.save_data(attr_label, os.path.join(dir_label, 'ca_attr_label.pkl')) print('\ncreate attribute label')
def svm_test_all_attr(): # config ######################################## train_on_val_set = True num_worker = 20 param_C_by_CV = True num_attr = 1000 reduced_dim = 512 whiten = True ######################################## opt = TestAttributeOptions().parse() # extract feature feat_data = extract_feature(opt) feat_train = feat_data['feat_train'] feat_test = feat_data['feat_test'] print('extract feature done!') # load attribute label print('loading attribute label...') attr_label = io.load_data('datasets/DeepFashion/Fashion_design/' + opt.fn_label) attr_entry = io.load_json('datasets/DeepFashion/Fashion_design/' + opt.fn_entry) label_train = np.array( [attr_label[s_id] for s_id in feat_data['id_list_train']]) label_test = np.array( [attr_label[s_id] for s_id in feat_data['id_list_test']]) # label_train = np.random.choice([0,1], size = (feat_train.shape[0], num_attr)) # label_test = np.random.choice([0,1], size = (feat_test.shape[0], num_attr)) label_train = label_train[:, 0:num_attr] label_test = label_test[:, 0:num_attr] # get validation feature and label id_list_val = io.load_json( 'datasets/DeepFashion/Fashion_design/Split/ca_split.json')['val'] id2idx = {s_id: idx for idx, s_id in enumerate(feat_data['id_list_train'])} idx_list_val = [id2idx[s_id] for s_id in id_list_val] feat_val = feat_train[idx_list_val, :] label_val = label_train[idx_list_val, :] if train_on_val_set: feat_train = feat_val label_train = label_val print('PCA reduction and whitening...') t = time.time() pca = PCA(n_components=reduced_dim, whiten=whiten) pca.fit(feat_train) feat_train = pca.transform(feat_train) feat_test = pca.transform(feat_test) print('PCA done! (%f sec)' % (time.time() - t)) print('start to train SVMs!') cache_dir = os.path.join('checkpoints', opt.id, 'test', 'cache') io.mkdir_if_missing(os.path.join('checkpoints', opt.id, 'test')) io.mkdir_if_missing(cache_dir) block_size = int(round(num_attr / num_worker)) p_list = [] for worker_idx in range(num_worker): idx_attr_rng = [ block_size * worker_idx, min(num_attr, block_size * (worker_idx + 1)) ] p = Process(target=_svm_test_attr_unit, args=(worker_idx, idx_attr_rng, feat_train, feat_test, label_train, label_test, attr_entry, cache_dir)) p.start() p_list.append(p) print('worker %d for attribute %d-%d' % (worker_idx, idx_attr_rng[0], idx_attr_rng[1])) for p in p_list: p.join() # load cached result pred_test = [] for worker_idx in range(num_worker): pred_test.append( io.load_data(os.path.join(cache_dir, '%d.pkl' % worker_idx))) pred_test = np.concatenate(pred_test, axis=1) crit_ap = MeanAP() crit_ap.add(pred_test, label_test) mAP, ap_list = crit_ap.compute_mean_ap() mBP, bp_list = crit_ap.compute_balanced_precision() rec3_avg, rec3_attr, rec3_overall = crit_ap.compute_recall(k=3) rec5_avg, rec5_attr, rec5_overall = crit_ap.compute_recall(k=5) # display result result = OrderedDict([ ('mAP', mAP), ('mBP', mBP), ('rec3_avg', rec3_avg), ('rec5_avg', rec5_avg), ('rec3_overall', rec3_overall), ('rec5_overall', rec5_overall), ]) AttributeVisualizer(opt).print_error(result) # save result rec3_attr = [(attr_entry[i]['entry'], rec3_attr[i]) for i in xrange(num_attr)] rec5_attr = [(attr_entry[i]['entry'], rec5_attr[i]) for i in xrange(num_attr)] result_output = { 'rec3_avg': rec3_avg, 'rec5_avg': rec5_avg, 'rec3_attr': rec3_attr, 'rec5_attr': rec5_attr } io.save_json(result_output, os.path.join('checkpoints', opt.id, 'test', 'svm_test.json'))
def create_aligned_index(): ''' create (tar_id, edge_src_id, color_src_id) tuplets ''' if False: split = io.load_json(design_root + 'Split/ca_gan_split_trainval_upper.json') ############### for train/test ############### edge_pair = io.load_json(design_root + 'Label/ca_tps_pair.json') # attach color src index (random select) for each target np.random.seed(0) color_pair = {} for set_name in ['train', 'test']: id_list = split[set_name] id_list_shuffle = [s_id for s_id in id_list] np.random.shuffle(id_list_shuffle) for tar_id, src_id in zip(id_list, id_list_shuffle): color_pair[tar_id] = src_id # create index file aligned_index = {} for s_id in split['train'] + split['test']: aligned_index[s_id] = { 'id': s_id, 'edge_ids': [edge_pair[s_id]], 'color_ids': [color_pair[s_id]] } io.save_json( aligned_index, design_root + 'Label/ca_gan_trainval_upper_aligned_index.json') ############### for vis ############### edge_vis_group = io.load_json(design_root + 'Label/ca_vis_tps_group.json') vis_id_list = edge_vis_group.keys() # check assert set(vis_id_list).issubset(set(split['test'])) # attach color src candidate set (random select) for each target num_color = 6 id_list = split['test'] color_vis_group = {} for tar_id in vis_id_list: id_list_shuffle = [s_id for s_id in id_list if s_id != tar_id] np.random.shuffle(id_list_shuffle) color_vis_group[tar_id] = id_list_shuffle[0:num_color] # create index file vis_aligned_index = {} for s_id in vis_id_list: vis_aligned_index[s_id] = { 'id': s_id, 'edge_ids': edge_vis_group[s_id], 'color_ids': color_vis_group[s_id] } io.save_json(vis_aligned_index, design_root + 'Label/ca_gan_vis_upper_aligned_index.json') ############### visualize ############### vis_aligned_index = io.load_json( design_root + 'Label/ca_gan_vis_upper_aligned_index.json') num_visual = 10 img_dir = design_root + 'Img/img_ca_256/' output_dir = 'temp/aligned_index/' io.mkdir_if_missing(output_dir) for tar_id, index in vis_aligned_index.items()[0:num_visual]: img_edge = [ image.imread(img_dir + s_id + '.jpg') for s_id in [tar_id] + index['edge_ids'] ] img_color = [ image.imread(img_dir + s_id + '.jpg') for s_id in [tar_id] + index['color_ids'] ] img_edge = np.concatenate(img_edge, axis=1) img_color = np.concatenate(img_color, axis=1) img_out = np.concatenate((img_edge, img_color), axis=0) image.imwrite(img_out, output_dir + tar_id + '.jpg')