def main(n_hidden: int = 256, dropout: float = 0.2, n_iter: int = 10, batch_size: int = 128): # Define the model model: Model = chain( Relu(nO=n_hidden, dropout=dropout), Relu(nO=n_hidden, dropout=dropout), Softmax(), ) # Load the data (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist() # Set any missing shapes for the model. model.initialize(X=train_X[:5], Y=train_Y[:5]) train_data = model.ops.multibatch(batch_size, train_X, train_Y, shuffle=True) dev_data = model.ops.multibatch(batch_size, dev_X, dev_Y) # Create the optimizer. optimizer = Adam(0.001) for i in range(n_iter): for X, Y in tqdm(train_data, leave=False): Yh, backprop = model.begin_update(X) backprop(Yh - Y) model.finish_update(optimizer) # Evaluate and print progress correct = 0 total = 0 for X, Y in dev_data: Yh = model.predict(X) correct += (Yh.argmax(axis=1) == Y.argmax(axis=1)).sum() total += Yh.shape[0] score = correct / total msg.row((i, f"{score:.3f}"), widths=(3, 5))
def test_with_debug(): pytest.importorskip("ml_datasets") import ml_datasets (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist() counts = Counter() def on_init(*_): counts["init"] += 1 def on_forward(*_): counts["forward"] += 1 def on_backprop(*_): counts["backprop"] += 1 relu = Relu() relu2 = with_debug(Relu(), on_init=on_init, on_forward=on_forward, on_backprop=on_backprop) chained = chain(relu, relu2, relu2) chained.initialize(X=train_X[:5], Y=train_Y[:5]) _, backprop = chained(X=train_X[:5], is_train=False) # Not real loss gradients, but we don't care for testing. backprop(train_Y[:5]) # Four times forward, because initialization also applies forward for # validation. assert counts == {"init": 2, "forward": 4, "backprop": 2}
def test_model_gpu(): prefer_gpu() n_hidden = 32 dropout = 0.2 (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist() model = chain( Relu(nO=n_hidden, dropout=dropout), Relu(nO=n_hidden, dropout=dropout), Softmax(), ) # making sure the data is on the right device train_X = model.ops.asarray(train_X) train_Y = model.ops.asarray(train_Y) dev_X = model.ops.asarray(dev_X) dev_Y = model.ops.asarray(dev_Y) model.initialize(X=train_X[:5], Y=train_Y[:5]) optimizer = Adam(0.001) batch_size = 128 for i in range(2): batches = model.ops.multibatch(batch_size, train_X, train_Y, shuffle=True) for X, Y in batches: Yh, backprop = model.begin_update(X) backprop(Yh - Y) model.finish_update(optimizer) # Evaluate and print progress correct = 0 total = 0 for X, Y in model.ops.multibatch(batch_size, dev_X, dev_Y): Yh = model.predict(X) correct += (Yh.argmax(axis=1) == Y.argmax(axis=1)).sum() total += Yh.shape[0]
def mnist(limit=5000): (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist() return (train_X[:limit], train_Y[:limit]), (dev_X[:limit], dev_Y[:limit])
import numpy as np from _8_mnist_sinkprop import DigitCNN import ml_datasets from tqdm import tqdm trX, trY, teX, teY = ml_datasets.mnist(onehot=True) Q = np.random.permutation(np.eye(784)) ########################################## # Permute the columns of the train and test matrices ########################################## trX = np.dot(trX, Q) teX = np.dot(teX, Q) #trX = trX.reshape((-1, 1, 28, 28)) #teX = teX.reshape((-1, 1, 28, 28)) cnn = DigitCNN() def train(iters=1, eta=.1, perm_eta=.1): for i in xrange(iters): for start, end in tqdm(zip(range(0, len(trX), 128), range(128, len(trX), 128))): cost = cnn.train(trX[start:end], trY[start:end], eta=eta, perm_eta=perm_eta) test_error = 1 - np.mean(np.argmax(teY, axis=1) == cnn.predict(teX)) train_error = 1- np.mean(np.argmax(trY[:10000], axis=1) == cnn.predict(trX[:10000])) print ("test error: %s, train error %s" % (test_error, train_error))
import numpy as np from _2_mnist_cnn import DigitCNN import ml_datasets from tqdm import tqdm trX, trY, teX, teY = ml_datasets.mnist(onehot=True) trX = trX.reshape((-1, 1, 28, 28)) teX = teX.reshape((-1, 1, 28, 28)) cnn = DigitCNN() def train(iters=1, eta=.1): for i in xrange(iters): for start, end in tqdm(zip(range(0, len(trX), 128), range(128, len(trX), 128))): cost = cnn.train(trX[start:end], trY[start:end], eta=eta) test_error = 1 - np.mean(np.argmax(teY, axis=1) == cnn.predict(teX)) train_error = 1- np.mean(np.argmax(trY[:10000], axis=1) == cnn.predict(trX[:10000])) print ("test error: %s, train error %s" % (test_error, train_error))
recon_error = T.mean((Yhat - Y) ** 2) cost = recon_error + penalty params = [W] updates = SGD(cost, params, learning_rate) train_func = theano.function(inputs=[X,Y, row_penalty, col_penalty, learning_rate], outputs=[recon_error, penalty, cost], updates=updates) ######################################## # Code to run experiment ######################################## Xtrain, Ytrain, Xtest, Ytest = ml_datasets.mnist() def train(iters=1, row_penalty=0.0, col_penalty=0.0, eta=.1): for i in xrange(iters): for start, end in tqdm(zip(range(0, len(Xtrain), 128), range(128, len(Xtrain), 128))): recon_error, penalty, cost = train_func(Xtrain[start:end], Xtrain[start:end], row_penalty, col_penalty, eta) print ("Recon: %s, Penalty: %s, Cost: %s" % (recon_error, penalty, cost))
import thinc.api as api import cowsay import ml_datasets # configure config = api.Config() config.from_disk('./linear.cfg') loaded_config = api.registry.make_from_config(config) batch_size = loaded_config['training']['batch_size'] n_iter = loaded_config['training']['n_iter'] # dataset (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist() cowsay.cow(f"Training size={len(train_X)}, dev size={len(dev_X)}") # model model = api.Softmax() model.initialize(X=train_X, Y=train_Y) cowsay.cow( f"Initialized model with input dimension " f"nI={model.get_dim('nI')} and output dimension nO={model.get_dim('nO')}") api.fix_random_seed(0) optimizer = loaded_config['optimizer'] print("Training") for _ in range(n_iter): for X, Y in model.ops.multibatch(batch_size, train_X, train_Y, shuffle=True):
def mnist(limit=5000): pytest.importorskip("ml_datasets") import ml_datasets (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist() return (train_X[:limit], train_Y[:limit]), (dev_X[:limit], dev_Y[:limit])
penalty = row_penalty * row_norm + col_penalty * col_norm recon_error = T.mean((Yhat - Y)**2) cost = recon_error + penalty params = [W] updates = SGD(cost, params, learning_rate) train_func = theano.function( inputs=[X, Y, row_penalty, col_penalty, learning_rate], outputs=[recon_error, penalty, cost], updates=updates) ######################################## # Code to run experiment ######################################## Xtrain, Ytrain, Xtest, Ytest = ml_datasets.mnist() def train(iters=1, row_penalty=0.0, col_penalty=0.0, eta=.1): for i in xrange(iters): for start, end in tqdm( zip(range(0, len(Xtrain), 128), range(128, len(Xtrain), 128))): recon_error, penalty, cost = train_func(Xtrain[start:end], Xtrain[start:end], row_penalty, col_penalty, eta) print("Recon: %s, Penalty: %s, Cost: %s" % (recon_error, penalty, cost))