def test_rmsprop(self): np.random.seed(1337) model = TinyBobNet() optimizer = optim.RMSprop(model.parameters(), lr=0.0002) train(model, X_train, Y_train, optimizer, steps=1000, device=self.device) assert evaluate(model, X_test, Y_test, device=self.device) > 0.95
def test_conv(self): np.random.seed(1337) model = TinyConvNet() optimizer = optim.Adam(model.parameters(), lr=0.001) train(model, X_train, Y_train, optimizer, steps=200, device=self.device) assert evaluate(model, X_test, Y_test, device=self.device) > 0.95
def test_sgd(self): np.random.seed(1337) model = TinyBobNet() optimizer = optim.SGD(model.parameters(), lr=0.001) train(model, X_train, Y_train, optimizer, steps=1000) assert evaluate(model, X_test, Y_test) > 0.95
lmbd = 0.00025 lossfn = lambda out,y: out.mul(y).mean() + lmbd*(model.weight1.abs() + model.weight2.abs()).sum() X_train, Y_train, X_test, Y_test = fetch_mnist() steps = len(X_train)//BS np.random.seed(1337) if QUICK: steps = 1 X_test, Y_test = X_test[:BS], Y_test[:BS] model = BigConvNet() if len(sys.argv) > 1: try: model.load(sys.argv[1]) print('Loaded weights "'+sys.argv[1]+'", evaluating...') evaluate(model, X_test, Y_test, BS=BS) except: print('could not load weights "'+sys.argv[1]+'".') if GPU: params = get_parameters(model) [x.cuda_() for x in params] for lr, epochs in zip(lrs, epochss): optimizer = optim.Adam(model.parameters(), lr=lr) for epoch in range(1,epochs+1): #first epoch without augmentation X_aug = X_train if epoch == 1 else augment_img(X_train) train(model, X_aug, Y_train, optimizer, steps=steps, lossfn=lossfn, gpu=GPU, BS=BS) accuracy = evaluate(model, X_test, Y_test, BS=BS) model.save('examples/checkpoint'+str("%.0f" % (accuracy*1.0e6)))
return ds_X_train, ds_Y_train, ds_X_test, ds_Y_test from tinygrad.optim import Adam if __name__ == "__main__": model = Transformer(10, 6, 2, 128, 4) X_train, Y_train, X_test, Y_test = make_dataset() lr = 0.003 for i in range(10): optim = Adam(get_parameters(model), lr=lr) train(model, X_train, Y_train, optim, 50, BS=64) acc, Y_test_preds = evaluate(model, X_test, Y_test, num_classes=10, return_predict=True) lr /= 1.2 print(f'reducing lr to {lr:.4f}') if acc > 0.998: wrong = 0 for k in range(len(Y_test_preds)): if (Y_test_preds[k] != Y_test[k]).any(): wrong += 1 a, b, c, x = X_test[k, :2], X_test[k, 2:4], Y_test[ k, -3:], Y_test_preds[k, -3:] print( f'{a[0]}{a[1]} + {b[0]}{b[1]} = {x[0]}{x[1]}{x[2]} (correct: {c[0]}{c[1]}{c[2]})' ) print(f'Wrong predictions: {wrong}, acc = {acc:.4f}')
ds.append([ i // 10, i % 10, j // 10, j % 10, s // 100, (s // 10) % 10, s % 10 ]) random.shuffle(ds) ds = np.array(ds) ds_X = ds[:, 0:6] ds_Y = np.copy(ds[:, 1:]) ds_X_train, ds_X_test = ds_X[0:8000], ds_X[8000:] ds_Y_train, ds_Y_test = ds_Y[0:8000], ds_Y[8000:] return ds_X_train, ds_Y_train, ds_X_test, ds_Y_test from tinygrad.optim import Adam if __name__ == "__main__": model = Transformer(10, 6, 2, 128, 4) X_train, Y_train, X_test, Y_test = make_dataset() optim = Adam(get_parameters(model), lr=0.001) for i in range(5): train(model, X_train, Y_train, optim, 500, BS=32, device=Device.GPU if os.getenv("GPU") else Device.CPU) evaluate(model, X_test, Y_test, num_classes=10)