def _train_mdn(ps, xs, mdn, maxepochs, lreg, minibatch, step, logger): """ Train SVI MDN on parameter/data samples. """ ps = np.asarray(ps, np.float32) xs = np.asarray(xs, np.float32) n_samples = ps.shape[0] assert xs.shape[0] == n_samples, 'wrong sizes' regularizer = lf.SviRegularizer(mdn.mps, mdn.sps, lreg) / n_samples logger.write('training model...\n') trainer = trainers.SGD(model=mdn, trn_data=[xs, ps], trn_loss=mdn.trn_loss + regularizer, trn_target=mdn.y, step=step) trainer.train(minibatch=minibatch, maxepochs=maxepochs, monitor_every=1, logger=logger) logger.write('training model done\n') return mdn
def two_sample_test_classifier(x0, x1, rng=np.random): """ Classifier-based two sample test. Given two datasets, trains a binary classifier to discriminate between them, and reports how well it does. :param x0: first dataset :param x1: second dataset :param rng: random generator to use :return: discrimination accuracy """ import ml.models.neural_nets as nn import ml.trainers as trainers import ml.loss_functions as lf # create dataset x0 = np.asarray(x0) x1 = np.asarray(x1) n_x0, n_dims = x0.shape n_x1 = x1.shape[0] n_data = n_x0 + n_x1 assert n_dims == x1.shape[1], 'inconsistent sizes' xs = np.vstack([x0, x1]) ys = np.hstack([np.zeros(n_x0), np.ones(n_x1)]) # split in training / validation sets n_val = int(n_data * 0.1) xs_val, ys_val = xs[:n_val], ys[:n_val] xs_trn, ys_trn = xs[n_val:], ys[n_val:] # create classifier classifier = nn.FeedforwardNet(n_dims) classifier.addLayer(n_dims * 10, 'relu', rng=rng) classifier.addLayer(n_dims * 10, 'relu', rng=rng) classifier.addLayer(1, 'logistic', rng=rng) # train classifier trn_target, trn_loss = lf.CrossEntropy(classifier.output) val_target, val_loss = lf.CrossEntropy(classifier.output) trainer = trainers.SGD( model=classifier, trn_data=[xs_trn, ys_trn], trn_loss=trn_loss, trn_target=trn_target, val_data=[xs_val, ys_val], val_loss=val_loss, val_target=val_target ) trainer.train( minibatch=100, patience=20, monitor_every=1, logger=None ) # measure accuracy pred = classifier.eval(xs)[:, 0] > 0.5 acc = np.mean(pred == ys) return acc
def train(model, a): assert is_data_loaded(), 'Dataset hasn\'t been loaded' regularizer = lf.WeightDecay(model.parms, weight_decay_rate) trainer = trainers.SGD(model=model, trn_data=[data.trn.x], trn_loss=model.trn_loss + regularizer, val_data=[data.val.x], val_loss=model.trn_loss, step=ss.Adam(a=a)) trainer.train(minibatch=minibatch, patience=patience, monitor_every=monitor_every)
def train(net, ps, ys, val_frac=0.05, rng=np.random): """ Trains a network to predict whether a simulation will fail. :param net: network to train :param ps: training inputs (parameters from prior) :param ys: training labels (whether simulation failed) :param val_frac: fraction of data to use for validation. :param rng: random number generator :return: trained net """ ps = np.asarray(ps, np.float32) ys = np.asarray(ys, np.float32) n_data = ps.shape[0] assert ys.shape[0] == n_data, 'wrong sizes' # shuffle data, so that training and validation sets come from the same distribution idx = rng.permutation(n_data) ps = ps[idx] ys = ys[idx] # split data into training and validation sets n_trn = int(n_data - val_frac * n_data) xs_trn, xs_val = ps[:n_trn], ps[n_trn:] ys_trn, ys_val = ys[:n_trn], ys[n_trn:] trn_target, trn_loss = lf.CrossEntropy(net.output) trainer = trainers.SGD(model=net, trn_data=[xs_trn, ys_trn], trn_loss=trn_loss, trn_target=trn_target, val_data=[xs_val, ys_val], val_loss=trn_loss, val_target=trn_target) trainer.train(minibatch=100, patience=30, monitor_every=1) return net
def fit_neural_net_demo(): """ Fits a non-bayesian neural net to the training data by minimizing cross entropy. """ xs, ys = create_dataset() net = create_net() # train the net trn_target, trn_loss = lf.CrossEntropy(net.output) regularizer = lf.WeightDecay(net.parms, wdecay) trainer = trainers.SGD( model=net, trn_data=[xs, ys], trn_loss=trn_loss + regularizer / xs.shape[0], trn_target=trn_target ) trainer.train(tol=1.0e-9, monitor_every=10, show_progress=True) # make predictions tst_data, X, Y = create_grid(-12, 12, 50) pred = net.eval(tst_data) # plot the prediction surface fig = plt.figure() ax = fig.gca(projection='3d') Z = pred.reshape(list(X.shape)) ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0) ax.plot(xs[ys == 0, 0], xs[ys == 0, 1], 'b.', ms=12) ax.plot(xs[ys == 1, 0], xs[ys == 1, 1], 'r.', ms=12) ax.view_init(elev=90, azim=-90) plt.xlabel('x1') plt.ylabel('x2') plt.axis('equal') ax.axis([-12, 12, -12, 12]) fig.suptitle('Prediction surface of trained net') plt.show()
def bayesian_neural_net_svi_demo(): """ Trains a bayesian neural net on the training set using Stochastic Variational Inference. """ xs, ys = create_dataset() net = create_net(svi=True) tst_data, X, Y = create_grid(-12, 12, 50) # train the net trn_target, trn_loss = lf.CrossEntropy(net.output) regularizer = lf.SviRegularizer(net.mps, net.sps, wdecay) trainer = trainers.SGD( model=net, trn_data=[xs, ys], trn_loss=trn_loss + regularizer / xs.shape[0], trn_target=trn_target ) trainer.train(maxepochs=80000, monitor_every=10, show_progress=True) # make predictions with zero noise base_pred = net.eval(tst_data, rand=False) # make predictions by averaging samples n_samples = 1000 avg_pred = 0.0 for _ in xrange(n_samples): avg_pred += net.eval(tst_data, rand=True) avg_pred /= n_samples # plot the base prediction surface fig = plt.figure() ax = fig.gca(projection='3d') Z = base_pred.reshape(list(X.shape)) ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0) ax.plot(xs[ys == 0, 0], xs[ys == 0, 1], 'b.', ms=12) ax.plot(xs[ys == 1, 0], xs[ys == 1, 1], 'r.', ms=12) ax.view_init(elev=90, azim=-90) plt.xlabel('x1') plt.ylabel('x2') plt.axis('equal') ax.axis([-12, 12, -12, 12]) fig.suptitle('Prediction surface using average weights') # plot the average prediction surface fig = plt.figure() ax = fig.gca(projection='3d') Z = avg_pred.reshape(list(X.shape)) ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0) ax.plot(xs[ys == 0, 0], xs[ys == 0, 1], 'b.', ms=12) ax.plot(xs[ys == 1, 0], xs[ys == 1, 1], 'r.', ms=12) ax.view_init(elev=90, azim=-90) plt.xlabel('x1') plt.ylabel('x2') plt.axis('equal') ax.axis([-12, 12, -12, 12]) fig.suptitle('Bayesian prediction surface') # plot the sample prediction surfaces fig = plt.figure() fig.suptitle('Sample prediction surfaces') for i in xrange(6): sample_pred = net.eval(tst_data, rand=True) ax = fig.add_subplot(2, 3, i+1, projection='3d') Z = sample_pred.reshape(list(X.shape)) ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0) ax.plot(xs[ys == 0, 0], xs[ys == 0, 1], 'b.', ms=12) ax.plot(xs[ys == 1, 0], xs[ys == 1, 1], 'r.', ms=12) ax.view_init(elev=90, azim=-90) plt.xlabel('x1') plt.ylabel('x2') plt.axis('equal') ax.axis([-12, 12, -12, 12]) plt.show()
def learn_conditional_density(model, xs, ys, ws=None, regularizer=None, val_frac=0.05, step=ss.Adam(a=1.e-4), minibatch=100, patience=20, monitor_every=1, logger=sys.stdout, rng=np.random): """ Train model to learn the conditional density p(y|x). """ xs = np.asarray(xs, np.float32) ys = np.asarray(ys, np.float32) n_data = xs.shape[0] assert ys.shape[0] == n_data, 'wrong sizes' # shuffle data, so that training and validation sets come from the same distribution idx = rng.permutation(n_data) xs = xs[idx] ys = ys[idx] # split data into training and validation sets n_trn = int(n_data - val_frac * n_data) xs_trn, xs_val = xs[:n_trn], xs[n_trn:] ys_trn, ys_val = ys[:n_trn], ys[n_trn:] if ws is None: # train model without weights trainer = trainers.SGD(model=model, trn_data=[xs_trn, ys_trn], trn_loss=model.trn_loss if regularizer is None else model.trn_loss + regularizer, trn_target=model.y, val_data=[xs_val, ys_val], val_loss=model.trn_loss, val_target=model.y, step=step) trainer.train(minibatch=minibatch, patience=patience, monitor_every=monitor_every, logger=logger) else: # prepare weights ws = np.asarray(ws, np.float32) assert ws.size == n_data, 'wrong sizes' ws = ws[idx] ws_trn, ws_val = ws[:n_trn], ws[n_trn:] # train model with weights trainer = trainers.WeightedSGD(model=model, trn_data=[xs_trn, ys_trn], trn_losses=-model.L, trn_weights=ws_trn, trn_reg=regularizer, trn_target=model.y, val_data=[xs_val, ys_val], val_losses=-model.L, val_weights=ws_val, val_target=model.y, step=step) trainer.train(minibatch=minibatch, patience=patience, monitor_every=monitor_every, logger=logger) return model