Example #1
0
            values)):
    if fold == 0:
        break

imgs = bp.unpack_ndarray_from_file(
    '../features/train_images_size128_pad8_max_noclean.bloscpack')
lbls = pd.read_csv('../input/train.csv').iloc[:, 1:4].values

trn_imgs = imgs[trn_ndx]
trn_lbls = lbls[trn_ndx]
vld_imgs = imgs[vld_ndx]
vld_lbls = lbls[vld_ndx]

training_set = Bengaliai_DS(trn_imgs,
                            trn_lbls,
                            transform=augs,
                            split_label=True,
                            RGB=True)
validation_set = Bengaliai_DS(vld_imgs, vld_lbls, split_label=True, RGB=True)

batch_size = 64

training_loader = DataLoader(training_set,
                             batch_size=batch_size,
                             num_workers=4,
                             shuffle=True)
validation_loader = DataLoader(validation_set,
                               batch_size=batch_size,
                               num_workers=4,
                               shuffle=False)
    (1, 3),
    [
        iaa.Affine(scale={"x": (0.8, 1.), "y": (0.8, 1.)}, rotate=(-15, 15), shear=(-15, 15)),
        iaa.PiecewiseAffine(scale=(0.02, 0.04)),
        iaa.DirectedEdgeDetect(alpha=(.01, .99), direction=(0.0, 1.0)),
    ],
    random_order=True
)


# In[5]:


batch_size = 64 # 64 is important as the fit_one_cycle arguments are probably tuned for this batch size

training_set = Bengaliai_DS(trn_imgs, trn_lbls, transform=augs, RGB=False)
validation_set = Bengaliai_DS(vld_imgs, vld_lbls, RGB=False)

training_loader = DataLoader(training_set, batch_size=batch_size, num_workers=6, shuffle=True) # , sampler=sampler , shuffle=True
validation_loader = DataLoader(validation_set, batch_size=batch_size, num_workers=6, shuffle=False)

data_bunch = DataBunch(train_dl=training_loader, valid_dl=validation_loader)


# ---
# ### model

# In[6]:


device = 'cuda:0'
trn_pdf.reset_index(inplace=True, drop=True)
imgs = bp.unpack_ndarray_from_file('../features/train_images.bloscpack')
lbls = pd.read_csv('../input/train.csv').iloc[:, 1:4].values

trn_imgs = imgs[trn_ndx]
trn_lbls = lbls[trn_ndx, 0:1]
vld_imgs = imgs[vld_ndx]
vld_lbls = lbls[vld_ndx, 0:1]


# In[5]:


#sampler = Balanced_Sampler(trn_pdf, count_column='image_id', primary_group='grapheme_root', secondary_group=['vowel_diacritic', 'consonant_diacritic'], size=trn_imgs.shape[0])

training_set = Bengaliai_DS(trn_imgs, trn_lbls, transform=None)
validation_set = Bengaliai_DS(vld_imgs, vld_lbls)

training_loader = DataLoader(training_set, batch_size=64, num_workers=6, shuffle=True) # , sampler=sampler , shuffle=True
validation_loader = DataLoader(validation_set, batch_size=64, num_workers=6, shuffle=False)

data_bunch = DataBunch(train_dl=training_loader, valid_dl=validation_loader)


# ---
# ### model

# In[6]:


device = 'cuda:0'