def train_model(lrmodel, opt, cost, X, Y, devX, devY, devscores): """ Train model, using pearsonr on dev for early stopping """ done = False best = -1.0 r = np.arange(1, 6) train_set = ArrayIterator(X=X, y=Y, make_onehot=False) valid_set = ArrayIterator(X=devX, y=devY, make_onehot=False) eval_epoch = 10 while not done: callbacks = Callbacks(lrmodel, eval_set=valid_set) lrmodel.fit(train_set, optimizer=opt, num_epochs=eval_epoch, cost=cost, callbacks=callbacks) # Every 10 epochs, check Pearson on development set yhat = np.dot(lrmodel.get_outputs(valid_set), r) score = pearsonr(yhat, devscores)[0] if score > best: neon_logger.display('Dev Pearson: {}'.format(score)) best = score bestlrmodel = copy.copy(lrmodel) else: done = True eval_epoch += 10 yhat = np.dot(bestlrmodel.get_outputs(valid_set), r) score = pearsonr(yhat, devscores)[0] neon_logger.display('Dev Pearson: {}'.format(score)) return bestlrmodel
def test_iterator(): print('Testing iterator based data loader') parser = NeonArgparser(__doc__) args = parser.parse_args() (X_train, y_train), (X_test, y_test), nclass = load_cifar10_imgs(path=args.data_dir) train = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(3, 32, 32)) test = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(3, 32, 32)) return run(args, train, test)
def test_dataset(backend_default, data): (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data) train_set = ArrayIterator(X_train, y_train, nclass=nclass) train_set.be = NervanaObject.be for i in range(2): for X_batch, y_batch in train_set: neon_logger.display("Xshape: {}, yshape: {}".format(X_batch.shape, y_batch.shape)) train_set.index = 0
def test_dataset(backend_default, data): (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data) train_set = ArrayIterator(X_train, y_train, nclass=nclass) train_set.be = NervanaObject.be for i in range(2): for X_batch, y_batch in train_set: print X_batch.shape, y_batch.shape train_set.index = 0
def test_iterator(): print('Testing data iterator') NervanaObject.be.gen_rng(args.rng_seed) image_dir = os.path.join(args.data_dir, 'cifar-extracted') train_manifest, val_manifest = ingest_cifar10(out_dir=image_dir) (X_train, y_train), (X_test, y_test), nclass = load_cifar10_imgs( train_manifest, val_manifest) train = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(3, 32, 32)) test = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(3, 32, 32)) return run(args, train, test)
def load_data_to_array_iterator(self): """ Load data into ArrayIterators Returns: tuple(:obj:`neon.data.ArrayIterators`): ArrayIterator train_set, ArrayIterator test_set """ X_train, y_train, X_test, y_test = self.load_data_from_file() train_set = ArrayIterator(X=X_train, y=y_train, nclass=2) if X_test.size == 0 and y_test.size == 0: test_set = None else: test_set = ArrayIterator(X=X_test, y=y_test, nclass=2) return train_set, test_set
def wsd_classify(x_test, y_test=None): """ classifiy target word. output all word senses ranked according to the most probable sense Args: x_test(numpy.ndarray): input x data for inference y_test: input y data for inference Returns: str: predicted values by the model """ # test set x_test = np.array(x_test) if y_test is not None: y_test = np.array(y_test) test_set = ArrayIterator(X=x_test, y=y_test, make_onehot=False) mlp_clf = MostCommonWordSense(args.rounding, args.callback_args, args.epochs) # load existing model mlp_clf.load(args.model_prm) results = mlp_clf.get_outputs(test_set) return results
def predict_batch(batch): inp = ArrayIterator(X=batch, y=None, nclass=2, lshape=params.frame_shape, make_onehot=False) return model.get_outputs(inp)
def load_data(self): # load up the mnist data set # split into train and tests sets (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=self.args.data_dir) # setup a training set iterator self.train_set = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(1, 28, 28)) # setup a validation data set iterator self.valid_set = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(1, 28, 28))
def get_offset_predicted_naive(predictions): (replay_memory.clean_values_toone(predictions)[0] * 30) + 30 input_iter = ArrayIterator(X=batch, y=None, make_onehot=False) outputs = mlp.get_outputs(input_iter) outputs_ = 30 * outputs[0, 0] print "outputs", outputs_ return int(round(outputs_))
def test_model_get_outputs(backend_default, data): (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data) train_set = ArrayIterator(X_train[:backend_default.bsz * 3]) init_norm = Gaussian(loc=0.0, scale=0.1) layers = [Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] mlp = Model(layers=layers) out_list = [] mlp.initialize(train_set) for x, t in train_set: x = mlp.fprop(x) out_list.append(x.get().T.copy()) ref_output = np.vstack(out_list) train_set.reset() output = mlp.get_outputs(train_set) assert np.allclose(output, ref_output)
def get_offset_predicted(predictions): batch = np.zeros((bot_params.batch_size, 1)) batch[0, 0] = replay_memory.clean_values_toone(predictions)[0] input_iter = ArrayIterator(X=batch, y=None, make_onehot=False) outputs = mlp.get_outputs(input_iter) outputs_ = 30 * outputs[0, 0] print "outputs", outputs_ return int(round(outputs_))
def most_common_word_train(x_train, y_train, x_valid, y_valid): """ Train an MLP model, save it and evaluate it Args: x_train: x data for training y_train: y data for training x_valid: x data for validation y_valid: x data for validation Returns: str: reslts, predicted values by the model """ # train set x_train = np.array(x_train) y_train1 = np.array(y_train) train_set = ArrayIterator(X=x_train, y=y_train1, make_onehot=False) # validation set x_valid = np.array(x_valid) y_valid1 = np.array(y_valid) valid_set = ArrayIterator(X=x_valid, y=y_valid1, make_onehot=False) mlp_model = MostCommonWordSense(args.rounding, args.callback_args, args.epochs) # build model mlp_model.build() # train mlp_model.fit(valid_set, train_set) # save model mlp_model.save(args.model_prm) # evaluation error_rate = mlp_model.eval(valid_set) logger.info('Mis-classification error on validation set= %0.1f', error_rate * 100) reslts = mlp_model.get_outputs(valid_set) return reslts
def visualize(model, data, max_fm, filename): data_shape = data.shape data = data.reshape((data.shape[0], -1)) dataset = ArrayIterator(data, lshape=data_shape[1:]) deconv_file = h5py.File("no_file", driver='core', backing_store=False) deconv = DeconvCallback(dataset, dataset, max_fm=max_fm, dataset_pct=100) deconv.on_train_end(deconv_file, model) deconv_data = h5_deconv_data(deconv_file) deconv_summary_page(filename, deconv_data, max_fm)
def predict(input_img): # model.set_batch_size(1) x_new = np.zeros((params.batch_size, input_img.size), dtype=np.float16) x_new[0] = mem.prepare_image(input_img) inp = ArrayIterator(X=x_new, y=None, nclass=2, lshape=params.frame_shape, make_onehot=False) qvalues = model.get_outputs(inp) return qvalues[0][0]
def test_model_get_outputs(backend_default, data): (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data) train_set = ArrayIterator(X_train[:backend_default.bsz * 3]) init_norm = Gaussian(loc=0.0, scale=0.1) layers = [ Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] mlp = Model(layers=layers) out_list = [] mlp.initialize(train_set) for x, t in train_set: x = mlp.fprop(x) out_list.append(x.get().T.copy()) ref_output = np.vstack(out_list) train_set.reset() output = mlp.get_outputs(train_set) assert np.allclose(output, ref_output)
def DeepCascadeLearning(modelLayers,X_train,Y_train,callbacks,init_uni=Uniform(low=-0.1, high=0.1), testIterator=None,epochs=2, cost=GeneralizedCost(costfunc=CrossEntropyMulti()), opt_gdm=GradientDescentMomentum(learning_rate=0.01,momentum_coef=0.9)): importantLayersIndexes = list() i = 0 outputLayer = Affine(nout=10, init=init_uni, activation=Softmax()) modelToPredict = None for currentLayer in modelLayers: if(np.shape(currentLayer)): currentLayer = currentLayer[0] if((currentLayer.classnm == 'Convolution') or (currentLayer.classnm == 'Affine')): importantLayersIndexes.append(i) i += 1 for i in importantLayersIndexes: modelToTrain = list() for currentLayer in modelLayers[i:importantLayersIndexes[i+1]]: modelToTrain.append(currentLayer) modelToTrain.append(outputLayer) modelToTrain = Model(modelToTrain) if(modelToPredict == None): trainIterator = ArrayIterator(X_train, Y_train, nclass=10, lshape=(3,32,32)) x = trainIterator.__iter__() callbacks = Callbacks(modelToTrain) modelToTrain.fit(trainIterator, optimizer=opt_gdm, num_epochs=epochs, cost=GeneralizedCost(costfunc=CrossEntropyMulti()), callbacks=callbacks) else: tmpIterator = ArrayIterator(X_train,lshape=(3,32,32)) tmpTrain = modelToPredict.get_outputs(tmpIterator) tmpIterator = ArrayIterator(tmpTrain[0:20],Y_train[0:20],nclass=10,lshape=(32,30,30)) modelToTrain.fit(tmpIterator, optimizer=opt_gdm, num_epochs=epochs, cost=cost) if modelToPredict == None: modelToPredict = list() else: modelToPredict = modelToPredict.layers.layers for currentLayer in modelToTrain.layers.layers[0:-2]: modelToPredict.append(currentLayer) modelToPredict = Model(modelToPredict) return modelToPredict
def it_was_correct(last_in, last_out): # print "naive", replay_memory.clean_values_toone(last_in)[0], last_out offset_memory.add_episode(last_in, last_out) print "osize", offset_memory.outputs.size if offset_memory.outputs is not None and offset_memory.outputs.size % bot_params.batch_size == 0: X, y = offset_memory.get_dataset() train = ArrayIterator(X=X, y=y, make_onehot=False) mlp.fit(train, optimizer=optimizer, num_epochs=1, cost=cost, callbacks=Callbacks(mlp)) mlp.save_params(bot_params.aim_weights_path)
def train_regressor(orig_wordvecs, w2v_W, w2v_vocab): """ Return regressor to map word2vec to RNN word space Function modified from: https://github.com/ryankiros/skip-thoughts/blob/master/training/tools.py """ # Gather all words from word2vec that appear in wordvecs d = defaultdict(lambda: 0) for w in w2v_vocab.keys(): d[w] = 1 shared = OrderedDict() count = 0 for w in list(orig_wordvecs.keys())[:-2]: if d[w] > 0: shared[w] = count count += 1 # Get the vectors for all words in 'shared' w2v = np.zeros((len(shared), 300), dtype='float32') sg = np.zeros((len(shared), 620), dtype='float32') for w in shared.keys(): w2v[shared[w]] = w2v_W[w2v_vocab[w]] sg[shared[w]] = orig_wordvecs[w] train_set = ArrayIterator(X=w2v, y=sg, make_onehot=False) layers = [ Linear(nout=620, init=Gaussian(loc=0.0, scale=0.1)), Bias(init=Constant(0.0)) ] clf = Model(layers=layers) # regression model is trained using default global batch size cost = GeneralizedCost(costfunc=SumSquared()) opt = GradientDescentMomentum(0.1, 0.9, gradient_clip_value=5.0) callbacks = Callbacks(clf) clf.fit(train_set, num_epochs=20, optimizer=opt, cost=cost, callbacks=callbacks) return clf
from neon.layers import GeneralizedCost, Affine, Sequential, MergeMultistream from neon.models import Model from neon.optimizers import GradientDescentMomentum from neon.transforms import Rectlin, Logistic, CrossEntropyBinary from neon.callbacks.callbacks import Callbacks from neon.util.argparser import NeonArgparser # parse the command line arguments parser = NeonArgparser(__doc__) args = parser.parse_args() # hyperparameters num_epochs = args.epochs (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) valid_set = ArrayIterator([X_test, X_test], y_test, nclass=nclass, lshape=(1, 28, 28)) # weight initialization init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), Affine(nout=100, init=init_norm, activation=Rectlin())]) path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), Affine(nout=100, init=init_norm, activation=Rectlin())]) layers = [MergeMultistream(layers=[path1, path2], merge="stack"), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))]
Conv(fshape=(4, 4, 16), init=init_uni, activation=Rectlin()), Pooling(fshape=2, strides=2), Conv(fshape=(4, 4, 32), init=init_uni, activation=Rectlin()), Pooling(fshape=2, strides=2), Conv(fshape=(4, 4, 32), init=init_uni, activation=Rectlin()), Pooling(fshape=2, strides=2), Affine(nout=500, init=init_uni, activation=Rectlin()), Affine(nout=11, init=init_uni, activation=Softmax()) ] model = Model(layers) model.load_params('model.pkl') data = readfile('PreImage', 'label.csv') X_test = data.test_data test_set = ArrayIterator(X_test, None, nclass=11, lshape=(1, 200, 200)) true = data.test_label out = model.get_outputs(test_set) row = len(X_test) pred = np.zeros((row, 1)) i = 0 while i < row: pred[i] = out[i].argmax() i = i + 1 pred = pred + 1 loss = abs(true - pred) print(loss) count = 0 for i in range(len(loss)): if loss[i] != 0: count = count + 1
from neon.initializers import Gaussian from neon.layers import Conv, Pooling, GeneralizedCost, Affine from neon.optimizers import GradientDescentMomentum, MultiOptimizer, Schedule from neon.transforms import Rectlin, Softmax, CrossEntropyMulti from neon.models import Model from neon.data import ArrayIterator import numpy as np parser = NeonArgparser(__doc__) args = parser.parse_args() NervanaObject.be.enable_winograd = 4 # setup data provider X_train = np.random.uniform(-1, 1, (128, 3*224*224)) y_train = np.random.uniform(-1, 1, (128, 1000)) train = ArrayIterator(X_train, y_train, nclass=1000, lshape=(3, 224, 224)) layers = [Conv((11, 11, 64), init=Gaussian(scale=0.01), activation=Rectlin(), padding=3, strides=4), Pooling(3, strides=2), Conv((5, 5, 192), init=Gaussian(scale=0.01), activation=Rectlin(), padding=2), Pooling(3, strides=2), Conv((3, 3, 384), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1), Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1), Pooling(3, strides=2), Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()), Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()), Affine(nout=1000, init=Gaussian(scale=0.01), activation=Softmax())] model = Model(layers=layers)
from neon.data import ArrayIterator, load_cifar10 from neon.initializers import Uniform from neon.layers import GeneralizedCost, Affine from neon.models import Model from neon.optimizers import GradientDescentMomentum from neon.transforms import Misclassification, CrossEntropyBinary, Logistic, Rectlin from neon.callbacks.callbacks import Callbacks from neon.util.argparser import NeonArgparser # parse the command line arguments parser = NeonArgparser(__doc__) args = parser.parse_args() (X_train, y_train), (X_test, y_test), nclass = load_cifar10(path=args.data_dir) train = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(3, 32, 32)) test = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(3, 32, 32)) init_uni = Uniform(low=-0.1, high=0.1) opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9) # set up the model layers layers = [ Affine(nout=200, init=init_uni, activation=Rectlin()), Affine(nout=10, init=init_uni, activation=Logistic(shortcut=True)) ] cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp = Model(layers=layers)
drop_layers = {ky: -1 for ky in ["drop6", "drop7"]} for l in drop_layers: drop_layer = md.getlayer(l) drop_layers[l] = drop_layer['config']['keep'] drop_layer['config']['keep'] = 1.0 md = dict(md) # generate a fake input IM_SIZE = (be.bsz, 3, 224, 224) np.random.seed(1) im = np.random.randint(-150, 150, IM_SIZE) fake_labels = np.zeros((IM_SIZE[0], 10)) # need this iterator to initialize the model train = ArrayIterator(im.reshape(IM_SIZE[0], -1).copy(), fake_labels, nclass=10, lshape=IM_SIZE[1:]) # deserialize the neon model model = Model(md, train) # make sure the deserialization is setting the compat mode correctly assert be.compat_mode == 'caffe' # generate a fake input im_neon = be.array(im.reshape(IM_SIZE[0], -1).astype(np.float32).T.copy()) out_neon = model.fprop(im_neon) # get the neon layer output order using the layer names l_map = [] loss_lay = model.layers.layers[-1].name
polar_predict, filenames = load_data(os.path.join(NACPolarDir, 'Unlabelled Regions'), is_augment=False) files = [] locations = [] for filename in filenames: files.append(filename.split('/')[-1]) locations.append(filename.split(files[-1])[0]) log('Data loaded from ' + os.path.join(NACPolarDir, 'Unlabelled Regions')) classes = ["Crater", "Not Crater"] inference_set = ArrayIterator(polar_predict, None, nclass=2, lshape=(1, 32, 32)) out = model.get_outputs(inference_set) labels = np.argmax(out, axis=1) count = [] threshold = [] threshold = np.subtract( 1, np.divide( np.multiply(-400, np.power((np.arange(100).astype(float)), 2)), 10 * np.min( np.multiply(-400, np.power( (np.arange(100).astype(float)), 2)))))
from neon.backends import gen_backend # parser = NeonArgparser(__doc__) # args = parser.parse_args(gen_be=False) be = gen_backend(batch_size=100, backend='gpu') traindir = 'train' imwidth = 256 data = np.load("data.npy") coordinate = np.load("coordinate.npy") # X_train, y_train = data[0:1], coordinate[0:256] X_train, y_train = np.asarray(data[0:3000]), np.asarray(coordinate[0:3000]) train_set = ArrayIterator(X_train[0:2000], y_train[0:2000], make_onehot=False, lshape=(3, 256, 256)) eval_set = ArrayIterator(X_train[1000:2000], y_train[1000:2000], make_onehot=False, lshape=(3, 256, 256)) test_set = ArrayIterator(X_train[2000:2500], y_train[2000:2500], make_onehot=False, lshape=(3, 256, 256)) # weight initialization init_norm = Gaussian(loc=0.0, scale=0.01) # setup model layers
def evaluate(model, vocab, data_path, seed=1234, evaltest=False, vocab_size_layer=20002): """ Run experiment """ neon_logger.display('Preparing SICK evaluation data...') # Check if SICK data exists in specified directory, otherwise download sick_data = SICK(path=data_path) train, dev, test, scores = sick_data.load_eval_data() np.random.seed(seed) shuf_idxs = np.random.permutation(range(len(train[0]))) train_A_shuf = train[0][shuf_idxs] train_B_shuf = train[1][shuf_idxs] scores_shuf = scores[0][shuf_idxs] train_A_tok = tokenize_input(train_A_shuf, vocab=vocab) train_B_tok = tokenize_input(train_B_shuf, vocab=vocab) dev_A_tok = tokenize_input(dev[0], vocab=vocab) dev_B_tok = tokenize_input(dev[1], vocab=vocab) # Get iterator from tokenized data train_set_A = SentenceEncode(train_A_tok, [], len(train_A_tok), vocab_size_layer, max_len=30, index_from=2) train_set_B = SentenceEncode(train_B_tok, [], len(train_B_tok), vocab_size_layer, max_len=30, index_from=2) # Compute embeddings using iterator trainA = model.get_outputs(train_set_A) trainB = model.get_outputs(train_set_B) dev_set_A = SentenceEncode(dev_A_tok, [], len(dev_A_tok), vocab_size_layer, max_len=30, index_from=2) dev_set_B = SentenceEncode(dev_B_tok, [], len(dev_B_tok), vocab_size_layer, max_len=30, index_from=2) devA = model.get_outputs(dev_set_A) devB = model.get_outputs(dev_set_B) trainF = np.c_[np.abs(trainA - trainB), trainA * trainB] devF = np.c_[np.abs(devA - devB), devA * devB] trainY = encode_labels(scores_shuf, ndata=len(trainF)) devY = encode_labels(scores[1], ndata=len(devF)) lrmodel, opt, cost = prepare_model(ninputs=trainF.shape[1]) neon_logger.display('Training the regression model...') bestlrmodel = train_model(lrmodel, opt, cost, trainF, trainY, devF, devY, scores[1][:len(devF)]) if evaltest: test_A_tok = tokenize_input(test[0], vocab=vocab) test_B_tok = tokenize_input(test[1], vocab=vocab) test_set_A = SentenceEncode(test_A_tok, [], len(test_A_tok), vocab_size_layer, max_len=30, index_from=2) test_set_B = SentenceEncode(test_B_tok, [], len(test_B_tok), vocab_size_layer, max_len=30, index_from=2) testA = model.get_outputs(test_set_A) testB = model.get_outputs(test_set_B) testF = np.c_[np.abs(testA - testB), testA * testB] neon_logger.display('Evaluating using vectors and linear regression model') r = np.arange(1, 6) yhat = np.dot(bestlrmodel.get_outputs(ArrayIterator(testF)), r) pr = pearsonr(yhat, scores[2][:len(yhat)])[0] sr = spearmanr(yhat, scores[2][:len(yhat)])[0] se = np.mean((yhat - scores[2][:len(yhat)]) ** 2) neon_logger.display('Test Pearson: ' + str(pr)) neon_logger.display('Test Spearman: ' + str(sr)) neon_logger.display('Test MSE: ' + str(se)) return yhat
def main(): # setup the model and run for num_epochs saving the last state only # this is at the top so that the be is generated model = gen_model(args.backend) # setup data iterators (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) NN = batch_size*5 # avoid partial mini batches if args.backend == 'nervanacpu' or args.backend == 'cpu': # limit data since cpu backend runs slower train = ArrayIterator(X_train[:NN], y_train[:NN], nclass=nclass, lshape=(1, 28, 28)) valid = ArrayIterator(X_test[:NN], y_test[:NN], nclass=nclass, lshape=(1, 28, 28)) else: train = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(1, 28, 28)) valid = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(1, 28, 28)) # serialization related cost = GeneralizedCost(costfunc=CrossEntropyBinary()) opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) checkpoint_model_path = os.path.join('./', 'test_oneshot.pkl') checkpoint_schedule = 1 # save at every step callbacks = Callbacks(model) callbacks.add_callback(SerializeModelCallback(checkpoint_model_path, checkpoint_schedule, history=2)) # run the fit all the way through saving a checkpoint e model.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) # setup model with same random seed run epoch by epoch # serializing and deserializing at each step model = gen_model(args.backend) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) # reset data iterators train.reset() valid.reset() checkpoint_model_path = os.path.join('./', 'test_manyshot.pkl') checkpoint_schedule = 1 # save at evey step for epoch in range(num_epochs): # _0 points to state at end of epoch 0 callbacks = Callbacks(model) callbacks.add_callback(SerializeModelCallback(checkpoint_model_path, checkpoint_schedule, history=num_epochs)) model.fit(train, optimizer=opt_gdm, num_epochs=epoch+1, cost=cost, callbacks=callbacks) # load saved file prts = os.path.splitext(checkpoint_model_path) fn = prts[0] + '_%d' % epoch + prts[1] model.load_params(fn) # load the saved weights # compare test_oneshot_<num_epochs>.pkl to test_manyshot_<num_epochs>.pkl if not compare_model_pickles('test_oneshot_%d.pkl' % (num_epochs-1), 'test_manyshot_%d.pkl' % (num_epochs-1)): print 'No Match' sys.exit(1) else: print 'Match'
config = Config() # setup backend be = gen_backend(backend=config.backend, batch_size=config.batch_size, rng_seed=config.rng_seed, datatype=np.float32, stochastic_round=False) # generate data X = np.random.rand(config.batch_size, config.image_width**2 * 3) y = np.random.randint(config.ydim, size=config.batch_size) # setup a training set iterator data = ArrayIterator(X, y, nclass=config.ydim, lshape=(3, config.image_width, config.image_width)) # setup weight initialization function init_norm = Gaussian(loc=0.0, scale=0.01) init1 = Gaussian(scale=0.01) init2 = Gaussian(scale=0.03) init_uni = GlorotUniform() # setup model layers # no local response normalization relu = Rectlin() layers = [] layers.append( Conv((11, 11, 96), strides=4,
init_emb = Uniform(-0.1 / embedding_dim, 0.1 / embedding_dim) h5f = h5py.File(fname_h5, 'r') reviews, h5train, h5valid = h5f['reviews'], h5f['train'], h5f['valid'] ntrain, nvalid, nclass = reviews.attrs[ 'ntrain'], reviews.attrs['nvalid'], reviews.attrs['nclass'] # make train dataset Xy = h5train[:ntrain] X = [xy[1:] for xy in Xy] y = [xy[0] for xy in Xy] X_train, y_train = get_paddedXY( X, y, vocab_size=vocab_size, sentence_length=sentence_length) train_set = ArrayIterator(X_train, y_train, nclass=nclass) # make valid dataset Xy = h5valid[:nvalid] X = [xy[1:] for xy in Xy] y = [xy[0] for xy in Xy] X_valid, y_valid = get_paddedXY( X, y, vocab_size=vocab_size, sentence_length=sentence_length) valid_set = ArrayIterator(X_valid, y_valid, nclass=nclass) # initialization init_glorot = GlorotUniform() # define layers
from neon.benchmark import Benchmark from neon.data import ArrayIterator from neon.initializers import Gaussian from neon.layers import Conv, Affine, Pooling from neon.models import Model gen_backend(batch_size=16) test_layers = [ Conv((2, 2, 4), init=Gaussian(scale=0.01), padding=3, strides=4), Pooling(3, strides=2), Affine(nout=10, init=Gaussian(scale=0.01)) ] NervanaObject.be.enable_winograd = 4 x = np.random.uniform(-1, 1, (16, 3 * 2 * 2)) y = np.random.randint(0, 9, (16, 10)) test_dataset = ArrayIterator(x, y, nclass=10, lshape=(3, 2, 2)) def test_empty_dataset(): model = Model(test_layers) b = Benchmark(model=model) with pytest.raises(ValueError): b.time([], niterations=5, inference=True) def test_fw_bw_no_cost_or_optimizer(): model = Model(test_layers) model.initialize(test_dataset) b = Benchmark(model=model) with pytest.raises(RuntimeError): b.time(test_dataset, niterations=1)
from neon.util.argparser import NeonArgparser parser = NeonArgparser(__doc__) args = parser.parse_args() from neon.data import MNIST from neon.data import ArrayIterator mnist = MNIST() (X_train, y_train), (X_test, y_test), nclass = mnist.load_data() #print "X_test: %s" % X_test[1] # setup training set iterator train_set = ArrayIterator(X_train, y_train, nclass=nclass) # setup test set iterator test_set = ArrayIterator(X_test, y_test, nclass=nclass) #Initialize weights to small random numbers with Gaussian from neon.initializers import Gaussian init_norm = Gaussian(loc=0.0, scale=0.01) #Affine is a FC network with 100 hidden units from neon.layers import Affine #We will use ReLu for hidden units and SoftMax for output units. Softmax is used to ensure that #all outputs sum up to 1 and are within the range of [0, 1] from neon.transforms import Rectlin, Softmax layers = []
from neon.layers import Conv, Pooling, GeneralizedCost, Deconv from neon.models import Model from neon.optimizers import GradientDescentMomentum from neon.transforms import Rectlin, SumSquared from neon.callbacks.callbacks import Callbacks from neon.util.argparser import NeonArgparser # parse the command line arguments parser = NeonArgparser(__doc__) args = parser.parse_args() # Load dataset (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) # Set input and target to X_train train = ArrayIterator(X_train, lshape=(1, 28, 28)) # Initialize the weights and the learning rule init_uni = Uniform(low=-0.1, high=0.1) opt_gdm = GradientDescentMomentum(learning_rate=0.001, momentum_coef=0.9) # Strided conv autoencoder bn = False layers = [ Conv((4, 4, 8), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling(2), Conv((4, 4, 32), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling(2), Deconv(fshape=(4, 4, 8), init=init_uni, activation=Rectlin(),
def test_model_serialize(backend_default, data): (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data) train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = Sequential([ Conv((5, 5, 16), init=init_norm, bias=Constant(0), activation=Rectlin()), Pooling(2), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ]) path2 = Sequential([ Affine(nout=100, init=init_norm, bias=Constant(0), activation=Rectlin()), Dropout(keep=0.5), Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()) ]) layers = [ MergeMultistream(layers=[path1, path2], merge="stack"), Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)) ] tmp_save = 'test_model_serialize_tmp_save.pickle' mlp = Model(layers=layers) mlp.optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp.initialize(train_set, cost=mlp.cost) n_test = 3 num_epochs = 3 # Train model for num_epochs and n_test batches for epoch in range(num_epochs): for i, (x, t) in enumerate(train_set): x = mlp.fprop(x) delta = mlp.cost.get_errors(x, t) mlp.bprop(delta) mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch) if i > n_test: break # Get expected outputs of n_test batches and states of all layers outputs_exp = [] pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs_exp.append(mlp.fprop(x, inference=True)) if i > n_test: break # Serialize model mlp.save_params(tmp_save, keep_states=True) # Load model mlp = Model(tmp_save) mlp.initialize(train_set) outputs = [] pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize] for i, (x, t) in enumerate(train_set): outputs.append(mlp.fprop(x, inference=True)) if i > n_test: break # Check outputs, states, and params are the same for output, output_exp in zip(outputs, outputs_exp): assert np.allclose(output.get(), output_exp.get()) for pd, pd_exp in zip(pdicts, pdicts_exp): for s, s_e in zip(pd['states'], pd_exp['states']): if isinstance(s, list): # this is the batch norm case for _s, _s_e in zip(s, s_e): assert np.allclose(_s, _s_e) else: assert np.allclose(s, s_e) for p, p_e in zip(pd['params'], pd_exp['params']): assert type(p) == type(p_e) if isinstance(p, list): # this is the batch norm case for _p, _p_e in zip(p, p_e): assert np.allclose(_p, _p_e) elif isinstance(p, np.ndarray): assert np.allclose(p, p_e) else: assert p == p_e os.remove(tmp_save)