コード例 #1
0
ファイル: loader.py プロジェクト: chicm/inclusive
def get_train_val_loaders(args, batch_size=32, dev_mode=False, train_shuffle=True, val_num=4000):
    classes, stoi = get_classes(args.cls_type, args.start_index, args.end_index)
    train_meta, val_meta = get_train_val_meta(args.cls_type, args.start_index, args.end_index)

    #sampler = BalancedSammpler(train_meta, classes, stoi, balanced=args.balanced, min_label_num=500, max_label_num=700)
    #df1 = train_meta.set_index('ImageID')
    #sampled_train_meta = df1.loc[sampler.img_ids]

    train_meta = train_meta[train_meta['obj_num'] <= 10]
    val_meta = val_meta[val_meta['obj_num'] <= 10]

    # resample training data
    train_img_ids = get_weighted_sample(train_meta, 1024*100)
    df_sampled = train_meta.set_index('ImageID').loc[train_img_ids]

    #print(df_sampled.shape)
    if val_num is not None:
        val_meta = val_meta.iloc[:val_num]

    #if dev_mode:
    #    train_meta = train_meta.iloc[:10]
    #    val_meta = val_meta.iloc[:10]
    img_dir = settings.TRAIN_IMG_DIR
    #train_set = ImageDataset(True, sampled_train_meta.index.values.tolist(), img_dir, classes, stoi, sampled_train_meta['LabelName'].values.tolist())
    train_set = ImageDataset(True, train_img_ids, img_dir, classes, stoi, df_sampled['LabelName'].values.tolist())
    
    val_set = ImageDataset(False, val_meta['ImageID'].values.tolist(), img_dir, classes, stoi, val_meta['LabelName'].values.tolist())

    train_loader = data.DataLoader(train_set, batch_size=batch_size, shuffle=train_shuffle, num_workers=4, collate_fn=train_set.collate_fn, drop_last=True)
    train_loader.num = train_set.num

    val_loader = data.DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=4, collate_fn=val_set.collate_fn, drop_last=False)
    val_loader.num = val_set.num

    return train_loader, val_loader
コード例 #2
0
def get_val_loader(args, val_index, batch_size=32, dev_mode=False, val_num=3000):
    classes, stoi = get_classes(args.cls_type, args.start_index, args.end_index)
    _, val_meta = get_train_val_meta(args.cls_type, args.start_index, args.end_index)

    # filter, keep label counts <= args.max_labels
    val_meta = val_meta[val_meta['obj_num'] <= args.max_labels]

    if len(classes) < 7172:
        classes_set = set(classes)
        val_meta['tmp_label_count'] = val_meta['LabelName'].map(lambda x: len(set(x.split()) & classes_set))
        val_meta = val_meta[val_meta['tmp_label_count'] > 0]

    #print(val_meta.shape)
    #print(val_meta['LabelName'].str.split().apply(pd.Series).stack().nunique())
    val_meta = shuffle(val_meta, random_state=1234).iloc[:val_num]

    #print(val_meta.shape)

    if dev_mode:
        val_meta = val_meta.iloc[:10]
    img_dir = settings.TRAIN_IMG_DIR
    
    val_set = ImageDataset(False, val_meta['ImageID'].values.tolist(), img_dir, classes, stoi, val_index, val_meta['LabelName'].values.tolist())

    val_loader = data.DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=4, drop_last=False)
    val_loader.num = val_set.num

    return val_loader
コード例 #3
0
def get_train_loader(args, batch_size=32, dev_mode=False, train_shuffle=True):
    classes, stoi = get_classes(args.cls_type, args.start_index, args.end_index)
    train_meta, _ = get_train_val_meta(args.cls_type, args.start_index, args.end_index)

    # filter, keep label counts <= args.max_labels
    train_meta = train_meta[train_meta['obj_num'] <= args.max_labels]
    
    print(train_meta.shape)
    if len(classes) < 7172:
        classes_set = set(classes)
        train_meta['tmp_label_count'] = train_meta['LabelName'].map(lambda x: len(set(x.split()) & classes_set))
        train_meta = train_meta[train_meta['tmp_label_count'] > 0]
        #tuning_labels['LabelName'].map(lambda x: sum([cls_counts[c] for c in x.split()]))
        #print('>>', train_meta.shape)

    # resample training data
    train_img_ids = get_weighted_sample(train_meta, 1024*100)
    df_sampled = train_meta.set_index('ImageID').loc[train_img_ids]

    if dev_mode:
        train_meta = train_meta.iloc[:10]
        train_shuffle = False
    img_dir = settings.TRAIN_IMG_DIR
    
    train_set = ImageDataset(True, train_img_ids, img_dir, classes, stoi, None, df_sampled['LabelName'].values.tolist())
    
    train_loader = data.DataLoader(train_set, batch_size=batch_size, shuffle=train_shuffle, num_workers=4, drop_last=True)#, collate_fn=train_set.collate_fn, drop_last=True)
    train_loader.num = train_set.num

    return train_loader
コード例 #4
0
ファイル: loader.py プロジェクト: chicm/ship
def get_train_val_loaders(batch_size=8,
                          dev_mode=False,
                          drop_empty=False,
                          img_sz=384):
    train_shuffle = True
    train_meta, val_meta = get_train_val_meta(drop_empty=drop_empty)

    img_mask_aug_train = get_img_mask_transforms(
        img_sz)  #ImgAug(aug.get_affine_seq('edge'))

    if dev_mode:
        train_shuffle = False
        img_mask_aug_train = None
        train_meta = train_meta.iloc[:10]
        val_meta = val_meta.iloc[:10]
    print(train_meta.shape, val_meta.shape)

    train_set = ImageDataset(
        True,
        train_meta,
        img_dir=settings.TRAIN_IMG_DIR,
        augment_with_target=img_mask_aug_train,
        image_augment=transforms.ColorJitter(
            0.2, 0.2, 0.2, 0.2),  #ImgAug(aug.brightness_seq),
        image_transform=get_img_transforms(img_sz),
        mask_transform=get_mask_transforms(img_sz))

    train_loader = data.DataLoader(train_set,
                                   batch_size=batch_size,
                                   shuffle=train_shuffle,
                                   num_workers=16,
                                   collate_fn=train_set.collate_fn,
                                   drop_last=True)
    train_loader.num = len(train_set)
    if dev_mode:
        train_loader.y_true = read_masks(train_meta['ImageId'].values,
                                         settings.TRAIN_MASK_DIR)

    val_set = ImageDataset(
        True,
        val_meta,
        img_dir=settings.TRAIN_IMG_DIR,
        augment_with_target=None,
        image_augment=None,  #ImgAug(aug.pad_to_fit_net(64, 'reflect')),
        image_transform=get_img_transforms(img_sz),
        mask_transform=get_mask_transforms(img_sz))
    val_loader = data.DataLoader(val_set,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=16,
                                 collate_fn=val_set.collate_fn)
    val_loader.num = len(val_set)
    val_loader.y_true = read_masks(val_meta['ImageId'].values,
                                   settings.TRAIN_MASK_DIR)
    val_loader.meta = val_meta

    return train_loader, val_loader
コード例 #5
0
ファイル: weighted_sampler.py プロジェクト: chicm/inclusive
def test_sampling():
    classes, stoi = get_classes('trainable', 0, 7172)
    train_meta, val_meta = get_train_val_meta('trainable', 0, 7172)
    print(train_meta.shape)
    print(train_meta.head())
    #train_meta = train_meta.iloc[:100]

    #w = get_weights(train_meta, 'rare_counts')
    w = get_weights_by_counts(train_meta['rare_counts'].values, max_weight=500)
    print('weights:', [int(x) for x in w.tolist()])
    print(w.shape)
    #w = [1]*100

    #sample = weighted_sample(train_meta['ImageID'].values, w, 50000)
    sample = get_weighted_sample(train_meta, 50000)
    counts = Counter()
    print(sample[:10])
    counts.update(sample)
    print(counts.most_common(100))
    common_ids = [x[0] for x in counts.most_common(50000)]

    df_selected = train_meta.set_index('ImageID')
    df_selected = df_selected.loc[sample]
    print(df_selected.head(20))
コード例 #6
0
ファイル: balanced_sampler.py プロジェクト: chicm/inclusive
                    self.img_ids.append(row[0])
                else:
                    pass

                if len(self.full_classes) >= self.n_classes:
                    break
            if len(self.img_ids) == last_image_nums:
                break
            rounds += 1
            if rounds > max_rounds:
                break


if __name__ == '__main__':
    classes, stoi = get_classes('trainable', 0, 500)
    meta, _ = get_train_val_meta('trainable', 0, 500)
    #print(meta.head())
    #print(meta.shape)

    sampler = BalancedSammpler(meta, classes, stoi, balanced=True)

    #print(len(sampler.img_ids))
    print(sampler.img_ids[:10])
    print(sampler.class_counts)

    df1 = meta.set_index('ImageID')
    print(df1.head())
    selected = df1.loc[sampler.img_ids]

    print('selected:')
    print(selected.shape)