コード例 #1
0
ファイル: preproc_ca.py プロジェクト: donghaoye/fashionHD
def create_category_label():

    samples = io.load_json(design_root + 'Label/ca_samples.json')
    cat_entry_list = io.load_str_list(ca_root +
                                      'Anno/list_category_cloth.txt')[2::]
    cat_list = io.load_str_list(ca_root + 'Anno/list_category_img.txt')[2::]

    # create category entry
    cat_entry = []
    for cat_str in cat_entry_list:
        cat_name = ' '.join(cat_str.split()[0:-1])
        cat_type = int(cat_str.split()[-1])
        cat_entry.append({'entry': cat_name, 'type': cat_type})

    io.save_json(cat_entry, design_root + 'Label/cat_entry.json')
    print('create category entry')

    # create category label
    img2id = {
        s['img_path_org'][s['img_path_org'].find('img')::]: s_id
        for s_id, s in samples.iteritems()
    }
    cat_label = {}

    for idx, s in enumerate(cat_list):
        s = s.split()
        s_id = img2id[s[0]]
        cat = int(s[1]) - 1
        cat_label[s_id] = cat

    io.save_data(cat_label, design_root + 'Label/ca_cat_label.pkl')
    print('create category label')
def create_attribute_label():
    img_split = io.load_json('datasets/DF_Pose/Label/image_split_dfm_new.json')
    id_list = img_split['train'] + img_split['test']
    attr_entry = io.load_str_list(
        'datasets/DeepFashion/In-shop/Anno/list_attr_cloth.txt')[2:]
    attr_anno = io.load_str_list(
        'datasets/DeepFashion/In-shop/Anno/list_attr_items.txt')
    attr_anno = attr_anno[2:]
    attr_anno = [l.replace('-1', '0').split() for l in attr_anno]
    attr_anno = {l[0]: np.array(l[1:], dtype=np.int) for l in attr_anno}

    label = {}
    for sid in id_list:
        s = sid.index('id') + 2
        e = s + 8
        sid_ori = 'id_' + sid[s:e]
        label[sid] = attr_anno[sid_ori]

    # remove attribute entries with no positive sample
    label_mat = np.array(label.values())
    valid_idx = label_mat.sum(axis=0) > 0
    print('%d valid attribute entries' % (valid_idx.sum()))
    label = {k: v[valid_idx] for k, v in label.iteritems()}
    attr_entry = [e for i, e in enumerate(attr_entry) if valid_idx[i]]
    attr_label = {'label': label, 'entry': attr_entry}

    io.save_data(attr_label, 'datasets/DF_Pose/Label/attr_label.pkl')
コード例 #3
0
def create_attr_label():

    # attr_list = io.load_str_list(root + 'Anno/list_attr_cloth.txt')[2::]
    # attr_anno = io.load_str_list(root + 'Anno/list_attr_items.txt')[2::]
    # fn_out = root + 'Label/attribute_inshop.json'
    # num_attr = 463
    # n_top = 5

    attr_list = io.load_str_list('/data2/ynli/datasets/DeepFashion/Category_and_Attribute/Anno/list_attr_cloth.txt')[2::]
    attr_list = [' '.join(a.split()[0:-1]) for a in attr_list]
    attr_anno = io.load_str_list('/data2/ynli/datasets/DeepFashion/Category_and_Attribute/Anno/list_attr_img.txt')[2::]
    fn_out = root + 'Label/attribute_ca.json'
    num_attr = 1000
    n_top = 5

    # create label data
    if not os.path.isfile(fn_out):
        attr_data = {}
        for line in attr_anno:
            line = line.split()
            item_id = line[0]
            label = [int(c) for c in line[1::]]
            assert len(label) == num_attr

            attr_data[item_id] = label

        io.save_json(attr_data, fn_out)
    else:
        attr_data = io.load_json(fn_out)

    num_sample = len(attr_data)

    # most frequent attribute in each attribute type
    attr_list_ref = io.load_str_list('/data2/ynli/datasets/DeepFashion/Category_and_Attribute/Anno/list_attr_cloth.txt')[2::]
    attr_type = {' '.join(a.split()[0:-1]): a.split()[-1] for a in attr_list_ref}
    
    
    attr_mat = np.array(attr_data.values(), dtype = float)
    attr_count = np.where(attr_mat > 0, 1, 0).sum(axis = 0)
    

    attr_count_type = {}

    for i, attr_name in enumerate(attr_list):
        t = attr_type[attr_name] if attr_name in attr_type else '-1'
        if t not in attr_count_type:
            attr_count_type[t] = []
        attr_count_type[t].append((attr_name, attr_count[i]))

    for t in {'1', '2', '3', '4', '5', '-1'}:
        if t not in attr_count_type:
            continue
        attr_count_list = attr_count_type[t]
        attr_count_list.sort(key = lambda x: x[1], reverse = True)
        
        print('attribute type: %s' % t)

        for attr_name, count in attr_count_list[0:n_top]:
            print('%s: %d (%.1f %%)' % (attr_name, count, 100. * count / num_sample))
        print('\n')
コード例 #4
0
ファイル: preproc_ca.py プロジェクト: donghaoye/fashionHD
def create_attr_entry():
    '''
    Create attribute entry list, which contains original 1000 attributes used in Category_and_Attribute benchmark
    '''

    print('loading data...')
    attr_entry_list = io.load_str_list(ca_root +
                                       'Anno/list_attr_cloth.txt')[2::]
    attr_label = io.load_data(design_root + 'Label/ca_attr_label.pkl')
    split = io.load_json(design_root + 'Split/ca_split.json')
    train_ids = set(split['train'])
    attr_mat = np.array(
        [v for k, v in attr_label.iteritems() if k in train_ids],
        dtype=np.float32)

    print('computing positive rates')
    num_sample = len(train_ids)
    pos_rate = attr_mat.sum(axis=0) / num_sample

    attr_entry = []
    for idx, s in enumerate(attr_entry_list):
        s = s.split()
        attr_name = ' '.join(s[0:-1])
        attr_type = int(s[-1])
        attr_entry.append({
            'entry': attr_name,
            'type': attr_type,
            'pos_rate': pos_rate[idx]
        })

    io.save_json(attr_entry, design_root + 'Label/attr_entry.json')
コード例 #5
0
ファイル: preproc_ca.py プロジェクト: donghaoye/fashionHD
def create_split():
    '''
    Create split following the original partition
    '''

    split_list = io.load_str_list(ca_root + 'Eval/list_eval_partition.txt')[2:]
    split = {'train': [], 'val': [], 'test': []}
    samples = io.load_json(design_root + 'Label/ca_samples.json')
    img2id = {
        s['img_path_org'][s['img_path_org'].find('img')::]: s_id
        for s_id, s in samples.iteritems()
    }

    for s in split_list:
        img_path, status = s.split()
        s_id = img2id[img_path]
        split[status].append(s_id)

    io.mkdir_if_missing(design_root + 'Split')
    io.save_json(split, design_root + 'Split/ca_split.json')

    print('create split')
    for status in ['train', 'val', 'test']:
        print('%s: %d' % (status, len(split[status])))

    split_trainval = {
        'train': split['train'] + split['val'],
        'test': split['test']
    }
    io.save_json(split_trainval, design_root + 'Split/ca_split_trainval.json')
コード例 #6
0
def create_synthesis_to_CA_index():
    '''
    create an index map A: img_syn[i] = img_ca[A[i]]
    '''
    import scipy.io

    syn_name_list = io.load_str_list(
        'datasets/DeepFashion/Fashion_synthesis/data_release/benchmark/name_list.txt'
    )

    samples = io.load_json(
        'datasets/DeepFashion/Fashion_design/Label/ca_samples.json')
    ca_name2idx = {
        s['img_path_org'][s['img_path_org'].find('img/')::]: int(s_id[3::])
        for s_id, s in samples.iteritems()
    }
    ca_name2sz = {}
    for i, s in enumerate(samples.values()):
        img = image.imread(s['img_path_org'])
        h, w = img.shape[0:2]
        ca_name2sz[s['img_path_org'][s['img_path_org'].find('img/')::]] = (w,
                                                                           h)
        print('load ca image size: %d/%d' % (i, len(samples)))

    syn_idx_list = [ca_name2idx[name] for name in syn_name_list]
    syn_org_size_list = [ca_name2sz[name] for name in syn_name_list]

    data_out = {
        'syn2ca_index': syn_idx_list,
        'syn2ca_width': [w for w, _ in syn_org_size_list],
        'syn2ca_height': [h for _, h in syn_org_size_list]
    }
    fn_out = 'datasets/DeepFashion/Fashion_synthesis/data_release/benchmark/index_to_Category_and_Attribute.mat'
    scipy.io.savemat(fn_out, data_out)
コード例 #7
0
def create_split():
    '''
    create split file. follow the partition used in VITON paper
    '''
    train_pairs = io.load_str_list(zalando_root + 'Source/viton_train_pairs.txt')
    test_piars = io.load_str_list(zalando_root + 'Source/viton_test_pairs.txt')

    split = {}

    for subset, pairs in [('train', train_pairs), ('test', test_piars)]:
        id_list = [p.split()[0][0:6] for p in pairs]
        split[subset] = id_list

    split['debug'] = split['train'][0:32]

    io.mkdir_if_missing(zalando_root + 'Split')
    io.save_json(split, zalando_root + 'Split/zalando_split.json')
コード例 #8
0
ファイル: preproc_ca.py プロジェクト: donghaoye/fashionHD
def create_gan_split():
    '''
    create split for gan training
    ca_gan: containing all frontal images
    ca_gan_upper: containing all frontial, upperbody images
    '''

    samples = io.load_json(design_root + 'Label/ca_samples.json')
    split = io.load_json(design_root + 'Split/ca_split_trainval.json')

    # use same image set as in Zhu Shizhan's ICCV17 FashionGAN paper
    img_list = io.load_str_list(
        'datasets/DeepFashion/Fashion_synthesis/data_release/benchmark/name_list.txt'
    )
    img_set = set(img_list)

    split_gan = {'train': [], 'test': []}
    split_gan_upper = {'train': [], 'test': []}

    for set_name in ['train', 'test']:
        for s_id in split[set_name]:
            s = samples[s_id]
            img_name = s['img_path_org']
            img_name = img_name[img_name.find('img/')::]
            if img_name in img_set:
                split_gan[set_name].append(s_id)
                if s['cloth_type'] == 1:
                    split_gan_upper[set_name].append(s_id)

    print('create split "split_gan"')
    print('train: %d, test: %d, total: %d' %
          (len(split_gan['train']), len(split_gan['test']),
           len(split_gan['train']) + len(split_gan['test'])))
    print('create split "split_gan_upper"')
    print('train: %d, test: %d, total: %d' %
          (len(split_gan_upper['train']), len(split_gan_upper['test']),
           len(split_gan_upper['train']) + len(split_gan['test'])))

    io.save_json(split_gan, design_root + 'Split/ca_gan_split_trainval.json')
    io.save_json(split_gan_upper,
                 design_root + 'Split/ca_gan_split_trainval_upper.json')
コード例 #9
0
ファイル: preproc_ca.py プロジェクト: donghaoye/fashionHD
def create_sample_index_and_label():
    '''
    Create sample index and label for Category_and_Attribute data
    - sample index
    - landmark label
    - bbox label
    - attribute label
    '''

    # config
    dir_label = design_root + 'Label/'

    # create sample index and landmark label

    landmark_list = io.load_str_list(ca_root + 'Anno/list_landmarks.txt')[2::]
    img_root_org = ca_root + 'Img/'

    samples = {}
    landmark_label = {}

    for idx, s in enumerate(landmark_list):
        img_id = 'ca_' + str(idx)

        s = s.split()
        img_path_org = os.path.join(img_root_org, s[0])

        # 1: upper-body, 2: lower-body, 3: full-body
        cloth_type = int(s[1])
        pose_type = -1

        lm_str = s[2::]
        if cloth_type == 1:
            assert len(lm_str) == 18
        elif cloth_type == 2:
            assert len(lm_str) == 12
        elif cloth_type == 3:
            assert len(lm_str) == 24

        # lm is a list: [(x_i, y_i, v_i)]
        lm = [(float(lm_str[i + 1]), float(lm_str[i + 2]), int(lm_str[i]))
              for i in range(0, len(lm_str), 3)]

        samples[img_id] = {
            'img_id': img_id,
            'cloth_type': cloth_type,
            'pose_type': pose_type,
            'img_path_org': img_path_org
        }

        landmark_label[img_id] = lm

    io.mkdir_if_missing(dir_label)
    io.save_json(samples, os.path.join(dir_label, 'ca_samples.json'))
    io.save_data(landmark_label,
                 os.path.join(dir_label, 'ca_landmark_label.pkl'))

    print('create sample index (%d samples)' % len(samples))
    print('create landmark label')

    img2id = {
        s['img_path_org'][s['img_path_org'].find('img')::]: s_id
        for s_id, s in samples.iteritems()
    }

    # create bbox label
    bbox_list = io.load_str_list(ca_root + 'Anno/list_bbox.txt')[2::]
    bbox_label = {}

    for s in bbox_list:
        s = s.split()
        assert len(s) == 5
        s_id = img2id[s[0]]
        bbox = [float(x) for x in s[1::]]
        bbox_label[s_id] = bbox

    io.save_data(bbox_label, os.path.join(dir_label, 'ca_bbox_label.pkl'))
    print('create bbox label')

    # create attribute label
    attr_list = io.load_str_list(ca_root + 'Anno/list_attr_img.txt')[2::]
    attr_label = {}

    for idx, s in enumerate(attr_list):
        s = s.split()
        s_id = img2id[s[0]]
        att = [1 if c == '1' else 0 for c in s[1::]]
        assert len(att) == 1000

        attr_label[s_id] = att
        print('\rcreating attribute label %d / %d' % (idx, len(attr_list)),
              end='')

    io.save_data(attr_label, os.path.join(dir_label, 'ca_attr_label.pkl'))
    print('\ncreate attribute label')
コード例 #10
0
def create_split():
    '''
    Split dataset into train/test sets, where ttems are NOT overlapped.
    In original split, train/test sets have equal size. We create our own split with a larger train set.
    '''

    # config
    use_original = False
    train_rate = 0.8

    # load sample
    samples = io.load_json(design_root + 'Label/inshop_samples.json')

    if use_original:
        # load split file
        split_list = io.load_str_list(inshop_root +
                                      'Eval/list_eval_partition.txt')[2::]
        item2split = {}

        for line in split_list:
            line = line.split()
            item_id = line[1]
            status = line[2]

            if status == 'train':
                item2split[item_id] = 'train'
            else:
                item2split[item_id] = 'test'

    else:
        category2item = {}
        for s in samples.values():
            cat = s['category']
            if cat not in category2item:
                category2item[cat] = []
            category2item[cat].append(s['item_id'])

        item2split = {}
        np.random.seed(0)
        cat_list = category2item.keys()
        cat_list.sort()

        for cat in cat_list:
            item_list = list(set(category2item[cat]))
            item_list.sort()
            np.random.shuffle(item_list)

            split_point = int(len(item_list) * train_rate)
            for item_id in item_list[0:split_point]:
                item2split[item_id] = 'train'
            for item_id in item_list[split_point::]:
                item2split[item_id] = 'test'

        # check overlap
        train_set = set(
            [item_id for item_id, s in item2split.iteritems() if s == 'train'])
        test_set = set(
            [item_id for item_id, s in item2split.iteritems() if s == 'test'])
        assert not set.intersection(train_set, test_set)

    # create split
    split = {'train': [], 'test': []}

    for s_id, s in samples.iteritems():
        split[item2split[s['item_id']]].append(s_id)

    print('train set: %d items, %d images' %
          (item2split.values().count('train'), len(split['train'])))
    print('test set:  %d items, %d images' %
          (item2split.values().count('test'), len(split['test'])))

    fn_out = design_root + 'Split/inshop_split.json'
    io.save_json(split, fn_out)
コード例 #11
0
def create_sample_index_and_label():
    '''
    Create sample index and label for In-shop datasets
    - sample index
    - landmark label
    - bbox label
    '''

    # config
    dir_label = design_root + 'Label/'

    # create sampel index and landmark label
    landmark_list = io.load_str_list(inshop_root +
                                     'Anno/list_landmarks_inshop.txt')[2::]
    img_root_org = inshop_root + 'Img/'

    samples = {}
    landmark_label = {}

    for idx, s in enumerate(landmark_list):
        img_id = 'inshop_' + str(idx)

        s = s.split()
        img_path_org = os.path.join(img_root_org, s[0])

        item_id = img_path_org.split('/')[-2]
        category = img_path_org.split('/')[-3]

        # 1: upper-body, 2: lower-body, 3: full-body
        cloth_type = int(s[1])

        # 1: normal, 2: medium, 3: large, 4: medium zoom-in, 5: larg zoom-in, 6: flat (no person)
        pose_type = int(s[2])

        lm_str = s[3::]
        if cloth_type == 1:
            assert len(lm_str) == 18
        elif cloth_type == 2:
            assert len(lm_str) == 12
        elif cloth_type == 3:
            assert len(lm_str) == 24

        # lm is a list: [(x_i, y_i, v_i)]
        lm = [(float(lm_str[i + 1]), float(lm_str[i + 2]), int(lm_str[i]))
              for i in range(0, len(lm_str), 3)]

        samples[img_id] = {
            'img_id': img_id,
            'item_id': item_id,
            'category': category,
            'cloth_type': cloth_type,
            'pose_type': pose_type,
            'img_path_org': img_path_org
        }

        landmark_label[img_id] = lm

    io.mkdir_if_missing(dir_label)
    io.save_json(samples, os.path.join(dir_label, 'inshop_samples.json'))
    io.save_data(landmark_label,
                 os.path.join(dir_label, 'inshop_landmark_label.pkl'))

    print('create sample index (%d samples)' % len(samples))
    print('create landmark label')

    img2id = {
        s['img_path_org'][s['img_path_org'].find('img')::]: s_id
        for s_id, s in samples.iteritems()
    }

    # create bbox label
    bbox_list = io.load_str_list(inshop_root +
                                 'Anno/list_bbox_inshop.txt')[2::]
    bbox_label = {}

    for s in bbox_list:
        s = s.split()
        assert len(s) == 7
        s_id = img2id[s[0]]
        bbox = [float(x) for x in s[3::]]
        bbox_label[s_id] = bbox

    io.save_data(bbox_label, os.path.join(dir_label, 'inshop_bbox_label.pkl'))
    print('create bbox label')
コード例 #12
0
def create_index():

    high_res = True
    copy_img = False

    bbox_list = io.load_str_list(root + '/Anno/list_bbox_inshop.txt')[2::]
    samples = {}


    if high_res:
        org_img_root = root + 'Img/img_highres/'
        new_img_root = root + 'Img/img1_highres/'
        fn_out = root + 'Label/samples_highres.json'
    else:
        org_img_root = root + 'Img/img/'
        new_img_root = root + 'Img/img1/'
        fn_out = root + 'Label/samples.json'

    io.mkdir_if_missing(new_img_root)
    num_fail = 0
    for idx, s in enumerate(bbox_list):
        img_id = str(idx)

        s = s.split()
        assert len(s) == 7

        org_path = s[0]
        org_path_split = org_path.split('/')[1:]
        org_path = '/'.join(org_path_split)

        gender = org_path_split[0]
        category = org_path_split[1]
        item_id = org_path_split[2]
        img_path = '/'.join(org_path_split[2:4])
        pose = org_path_split[3].split('_')[2][0:-4]
        assert pose in {'front', 'back', 'side', 'full', 'flat', 'additional'}

        fn_src = org_img_root + '/'+ org_path
        fn_tar = new_img_root + '/' + img_path

        if not os.path.isfile(fn_src):
            num_fail += 1
            print(fn_src)
            continue

        samples[img_id] = {
            'img_id': img_id,
            'item_id': item_id,
            'img_path': img_path,
            'gender': gender,
            'category': category,
            'pose': pose,
            'org_path': org_path,
        }

        if copy_img:
            io.mkdir_if_missing(new_img_root + item_id)
            shutil.copyfile(fn_src, fn_tar)

        # print('%s: %s => %s' % (img_id, fn_src, fn_tar))
        # print('%s' % img_id)

    io.save_json(samples, fn_out)

    print('\n')
    print('save sample index to %s' % fn_out)
    print('%d samples not found!' % num_fail)