def kfold(ds, lab, k=5, regression=False, model=None): # torch5Fold from sklearn.model_selection import KFold kf = KFold(n_splits=k, shuffle=True) fold = 0 for tr_idx, te_idx in kf.split(lab): try: os.mkdir('active%d' % fold) except FileExistsError: pass path = 'active%d' % fold fold += 1 xtr, ytr, xte, yte = ds[tr_idx], lab[tr_idx], ds[te_idx], lab[te_idx] if model: pass else: if regression: model = NN.FcRegRDKit() else: model = NN.FcRDKit() print('start training') if regression: nn = active.TorchRegressionFold(xtr, ytr, xte, yte, model, 'active', path, ['percent_of_unlabel', 1], measure='distance', distance='linear') else: nn = active.TorchFold(xtr, ytr, xte, yte, model, 'active', path, ['percent_of_unlabel', 1]) nn.train() print('finish training')
def split_train(ds, lab, test_ratio=0.3, regression=False, model=None): all = len(lab) splitor = split.TTSplit(all, 'portion', test=test_ratio) tr_idx, te_idx = splitor.split() xtr, ytr, xte, yte = ds[tr_idx], lab[tr_idx], ds[te_idx], lab[te_idx] if model: pass else: if regression: model = NN.FcRegRDKit() else: model = NN.FcRDKit() print('start training') if regression: nn = active.TorchRegressionFold(xtr, ytr, xte, yte, model, 'active', path, ['percent_of_unlabel', 1], measure='distance', distance='linear') else: nn = active.TorchFold(xtr, ytr, xte, yte, model, 'active', path, ['percent_of_unlabel', 1]) nn.train() print('finish training')