def main(): # setup the model and run for num_epochs saving the last state only # this is at the top so that the be is generated mlp = gen_model(args.backend) # setup data iterators (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) if args.backend == 'nervanacpu' or args.backend == 'cpu': # limit data since cpu backend runs slower train = DataIterator(X_train[:1000], y_train[:1000], nclass=nclass, lshape=(1, 28, 28)) valid = DataIterator(X_test[:1000], y_test[:1000], nclass=nclass, lshape=(1, 28, 28)) else: train = DataIterator(X_train, y_train, nclass=nclass, lshape=(1, 28, 28)) valid = DataIterator(X_test, y_test, nclass=nclass, lshape=(1, 28, 28)) # serialization related cost = GeneralizedCost(costfunc=CrossEntropyBinary()) opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) checkpoint_model_path = os.path.join('./', 'test_oneshot.pkl') checkpoint_schedule = 1 # save at every step callbacks = Callbacks(mlp, train) callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path, history=2) # run the fit all the way through saving a checkpoint e mlp.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) # setup model with same random seed run epoch by epoch # serializing and deserializing at each step mlp = gen_model(args.backend) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) # reset data iterators train.reset() valid.reset() checkpoint_model_path = os.path.join('./', 'test_manyshot.pkl') checkpoint_schedule = 1 # save at evey step callbacks = Callbacks(mlp, train) callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path, history=num_epochs) for epoch in range(num_epochs): # _0 points to state at end of epoch 0 mlp.fit(train, optimizer=opt_gdm, num_epochs=epoch+1, cost=cost, callbacks=callbacks) # load saved file prts = os.path.splitext(checkpoint_model_path) fn = prts[0] + '_%d' % epoch + prts[1] mlp.load_weights(fn) # load the saved weights # compare test_oneshot_<num_epochs>.pkl to test_manyshot_<num_epochs>.pkl try: compare_model_pickles('test_oneshot_%d.pkl' % (num_epochs-1), 'test_manyshot_%d.pkl' % (num_epochs-1)) except: print 'test failed....' sys.exit(1)
def run(train, test): init = Gaussian(scale=0.01) layers = [ Conv((3, 3, 128), init=init, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), Conv((3, 3, 256), init=init, batch_norm=True, activation=Rectlin()), Pooling(2, strides=2), Conv((2, 2, 512), init=init, batch_norm=True, activation=Rectlin()), DeepBiRNN(256, init=init, activation=Rectlin(), reset_cells=True, depth=3), RecurrentLast(), Affine(32, init=init, batch_norm=True, activation=Rectlin()), Affine(nout=common['nclasses'], init=init, activation=Softmax()) ] model = Model(layers=layers) opt = Adadelta() metric = Misclassification() callbacks = Callbacks(model, eval_set=test, metric=metric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) return model
def main(args): # load up the mnist data set dataset = MNIST(path=args.data_dir) # initialize model object mlp = Model(layers=[ Affine(nout=100, init=Gaussian(loc=0.0, scale=0.01), activation=Rectlin()), Affine(nout=10, init=Gaussian(loc=0.0, scale=0.01), activation=Logistic(shortcut=True)) ]) # setup optimizer optimizer = GradientDescentMomentum(0.1, momentum_coef=0.9, stochastic_round=args.rounding) # configure callbacks callbacks = Callbacks(mlp, eval_set=dataset.valid_iter, **args.callback_args) # run fit # setup cost function as CrossEntropy mlp.fit(dataset.train_iter, optimizer=optimizer, num_epochs=args.epochs, cost=GeneralizedCost(costfunc=CrossEntropyBinary()), callbacks=callbacks) error_rate = mlp.eval(dataset.valid_iter, metric=Misclassification()) neon_logger.display('Classification accuracy = %.4f' % (1 - error_rate))
def train_model(lrmodel, opt, cost, X, Y, devX, devY, devscores): """ Train model, using pearsonr on dev for early stopping """ done = False best = -1.0 r = np.arange(1, 6) train_set = ArrayIterator(X=X, y=Y, make_onehot=False) valid_set = ArrayIterator(X=devX, y=devY, make_onehot=False) eval_epoch = 10 while not done: callbacks = Callbacks(lrmodel, eval_set=valid_set) lrmodel.fit(train_set, optimizer=opt, num_epochs=eval_epoch, cost=cost, callbacks=callbacks) # Every 10 epochs, check Pearson on development set yhat = np.dot(lrmodel.get_outputs(valid_set), r) score = pearsonr(yhat, devscores)[0] if score > best: neon_logger.display('Dev Pearson: {}'.format(score)) best = score bestlrmodel = copy.copy(lrmodel) else: done = True eval_epoch += 10 yhat = np.dot(bestlrmodel.get_outputs(valid_set), r) score = pearsonr(yhat, devscores)[0] neon_logger.display('Dev Pearson: {}'.format(score)) return bestlrmodel
def main(): # parse the command line arguments parser = NeonArgparser(__doc__) args = parser.parse_args() logger = logging.getLogger() logger.setLevel(args.log_thresh) #Set up batch iterator for training images train = ImgMaster(repo_dir='dataTmp_optFlow_BW', set_name='train', inner_size=240, subset_pct=100) val = ImgMaster(repo_dir='dataTmp_optFlow_BW', set_name='train', inner_size=240, subset_pct=100, do_transforms=False) test = ImgMaster(repo_dir='dataTestTmp_optFlow_BW', set_name='train', inner_size=240, subset_pct=100, do_transforms=False) train.init_batch_provider() val.init_batch_provider() test.init_batch_provider() print "Constructing network..." #Create AlexNet architecture model = constuct_network() # Optimzer for model opt = Adadelta() # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, train, eval_set=test, metric=valmetric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) #flag = input("Press Enter if you want to begin training process.") print "Training network..." model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) mets = model.eval(test, metric=valmetric) print 'Validation set metrics:' print 'LogLoss: %.2f, Accuracy: %.1f %%0 (Top-1), %.1f %% (Top-5)' % ( mets[0], (1.0 - mets[1]) * 100, (1.0 - mets[2]) * 100) return
def main(): parser = get_parser() args = parser.parse_args() print('Args:', args) loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') ext = extension_from_parameters(args) loader = p1b3.DataLoader(feature_subsample=args.feature_subsample, scaling=args.scaling, drug_features=args.drug_features, scramble=args.scramble, min_logconc=args.min_logconc, max_logconc=args.max_logconc, subsample=args.subsample, category_cutoffs=args.category_cutoffs) # initializer = Gaussian(loc=0.0, scale=0.01) initializer = GlorotUniform() activation = get_function(args.activation)() layers = [] reshape = None if args.convolution and args.convolution[0]: reshape = (1, loader.input_dim, 1) layer_list = list(range(0, len(args.convolution), 3)) for l, i in enumerate(layer_list): nb_filter = args.convolution[i] filter_len = args.convolution[i+1] stride = args.convolution[i+2] # print(nb_filter, filter_len, stride) # fshape: (height, width, num_filters). layers.append(Conv((1, filter_len, nb_filter), strides={'str_h':1, 'str_w':stride}, init=initializer, activation=activation)) if args.pool: layers.append(Pooling((1, args.pool))) for layer in args.dense: if layer: layers.append(Affine(nout=layer, init=initializer, activation=activation)) if args.drop: layers.append(Dropout(keep=(1-args.drop))) layers.append(Affine(nout=1, init=initializer, activation=neon.transforms.Identity())) model = Model(layers=layers) train_iter = ConcatDataIter(loader, ndata=args.train_samples, lshape=reshape, datatype=args.datatype) val_iter = ConcatDataIter(loader, partition='val', ndata=args.val_samples, lshape=reshape, datatype=args.datatype) cost = GeneralizedCost(get_function(args.loss)()) optimizer = get_function(args.optimizer)() callbacks = Callbacks(model, eval_set=val_iter, **args.callback_args) model.fit(train_iter, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
def train(self, dataset, model=None): """Trains the passed model on the given dataset. If no model is passed, `generate_default_model` is used.""" print "[%s] Starting training..." % self.model_name start = time.time() # The training will be run on the CPU. If a GPU is available it should be used instead. backend = gen_backend(backend='cpu', batch_size=self.batch_size, rng_seed=self.random_seed, stochastic_round=False) cost = GeneralizedCost( name='cost', costfunc=CrossEntropyMulti()) optimizer = GradientDescentMomentum( learning_rate=self.lrate, momentum_coef=0.9) # set up the model and experiment if not model: model = self.generate_default_model(dataset.num_labels) args = NeonCallbackParameters() args.output_file = os.path.join(self.root_path, self.Callback_Store_Filename) args.evaluation_freq = 1 args.progress_bar = False args.epochs = self.max_epochs args.save_path = os.path.join(self.root_path, self.Intermediate_Model_Filename) args.serialize = 1 args.history = 100 args.model_file = None callbacks = Callbacks(model, dataset.train(), args, eval_set=dataset.test()) # add a callback that saves the best model state callbacks.add_save_best_state_callback(self.model_path) # Uncomment line below to run on GPU using cudanet backend # backend = gen_backend(rng_seed=0, gpu='cudanet') model.fit( dataset.train(), optimizer=optimizer, num_epochs=self.max_epochs, cost=cost, callbacks=callbacks) print("[%s] Misclassification error = %.1f%%" % (self.model_name, model.eval(dataset.test(), metric=Misclassification()) * 100)) print "[%s] Finished training!" % self.model_name end = time.time() print "[%s] Duration in seconds", end - start return model
def fit_model(train_set, val_set, num_epochs=50): relu = Rectlin() conv_params = { 'strides': 1, 'padding': 1, 'init': Xavier(local=True), # Xavier sqrt(3)/num_inputs [CHECK THIS] 'bias': Constant(0), 'activation': relu } layers = [] layers.append(Conv((3, 3, 128), **conv_params)) # 3x3 kernel * 128 nodes layers.append(Pooling(2)) layers.append(Conv((3, 3, 128), **conv_params)) layers.append(Pooling(2)) # Highest value from 2x2 window. layers.append(Conv((3, 3, 128), **conv_params)) layers.append( Dropout(keep=0.5) ) # Flattens Convolution into a flat array, with probability 0.5 sets activation values to 0 layers.append( Affine(nout=128, init=GlorotUniform(), bias=Constant(0), activation=relu) ) # 1 value per conv kernel - Linear Combination of layers layers.append(Dropout(keep=0.5)) layers.append( Affine(nout=2, init=GlorotUniform(), bias=Constant(0), activation=Softmax(), name="class_layer")) # initialize model object cnn = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) optimizer = Adam() # callbacks = Callbacks(cnn) # out_fname = 'yarin_fdl_out_data.h5' callbacks = Callbacks(cnn, eval_set=val_set, eval_freq=1) # , output_file=out_fname cnn.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks) return cnn
def it_was_correct(last_in, last_out): # print "naive", replay_memory.clean_values_toone(last_in)[0], last_out offset_memory.add_episode(last_in, last_out) print "osize", offset_memory.outputs.size if offset_memory.outputs is not None and offset_memory.outputs.size % bot_params.batch_size == 0: X, y = offset_memory.get_dataset() train = ArrayIterator(X=X, y=y, make_onehot=False) mlp.fit(train, optimizer=optimizer, num_epochs=1, cost=cost, callbacks=Callbacks(mlp)) mlp.save_params(bot_params.aim_weights_path)
def train(args, hyper_params, model, opt, data_set): # setup cost function as CrossEntropy cost = GeneralizedCost(costfunc=CrossEntropyMulti()) callbacks = Callbacks(model, **args.callback_args) callbacks.add_callback(EpochEndCallback()) data_set.set_mode('train') model.fit(data_set, optimizer=opt, num_epochs=hyper_params.num_epochs, cost=cost, callbacks=callbacks) return
def train_regressor(orig_wordvecs, w2v_W, w2v_vocab): """ Return regressor to map word2vec to RNN word space Function modified from: https://github.com/ryankiros/skip-thoughts/blob/master/training/tools.py """ # Gather all words from word2vec that appear in wordvecs d = defaultdict(lambda: 0) for w in w2v_vocab.keys(): d[w] = 1 shared = OrderedDict() count = 0 for w in list(orig_wordvecs.keys())[:-2]: if d[w] > 0: shared[w] = count count += 1 # Get the vectors for all words in 'shared' w2v = np.zeros((len(shared), 300), dtype='float32') sg = np.zeros((len(shared), 620), dtype='float32') for w in shared.keys(): w2v[shared[w]] = w2v_W[w2v_vocab[w]] sg[shared[w]] = orig_wordvecs[w] train_set = ArrayIterator(X=w2v, y=sg, make_onehot=False) layers = [ Linear(nout=620, init=Gaussian(loc=0.0, scale=0.1)), Bias(init=Constant(0.0)) ] clf = Model(layers=layers) # regression model is trained using default global batch size cost = GeneralizedCost(costfunc=SumSquared()) opt = GradientDescentMomentum(0.1, 0.9, gradient_clip_value=5.0) callbacks = Callbacks(clf) clf.fit(train_set, num_epochs=20, optimizer=opt, cost=cost, callbacks=callbacks) return clf
def run(args, train, test): init_uni = Uniform(low=-0.1, high=0.1) opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, stochastic_round=args.rounding) layers = [Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=True), Pooling((2, 2)), Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=True), Pooling((2, 2)), Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=True), Affine(nout=10, init=init_uni, activation=Softmax())] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) callbacks = Callbacks(mlp, eval_set=test, **args.callback_args) mlp.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks) err = mlp.eval(test, metric=Misclassification())*100 print('Misclassification error = %.2f%%' % err) return err
def fit(self, test_set, train_set): """ Train and fit the model on the datasets Args: test_set (:obj:`neon.data.ArrayIterators`): The test set train_set (:obj:`neon.data.ArrayIterators`): The train set args: callback_args and epochs from ArgParser input """ # configure callbacks callbacks = Callbacks(self.model, eval_set=test_set, **self.callback_args) self.model.fit(train_set, optimizer=self.optimizer, num_epochs=self.epochs, cost=self.cost, callbacks=callbacks)
Affine(nout=16, linear_name="b1_l1", **normrelu), Affine(nout=10, linear_name="b1_l2", **normsigm)] p3 = [b2, Affine(nout=16, linear_name="b2_l1", **normrelu), Affine(nout=10, linear_name="b2_l2", **normsigm)] # setup cost function as CrossEntropy cost = Multicost(costs=[GeneralizedCost(costfunc=CrossEntropyMulti()), GeneralizedCost(costfunc=CrossEntropyBinary()), GeneralizedCost(costfunc=CrossEntropyBinary())], weights=[1, 0., 0.]) # setup optimizer optimizer = GradientDescentMomentum(0.1, momentum_coef=0.9, stochastic_round=args.rounding) # initialize model object alphas = [1, 0.25, 0.25] mlp = Model(layers=Tree([p1, p2, p3], alphas=alphas)) # setup standard fit callbacks callbacks = Callbacks(mlp, train_set, eval_set=valid_set, **args.callback_args) # run fit mlp.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) logging.getLogger('neon').info("Misclassification error = %.1f%%", (mlp.eval(valid_set, metric=Misclassification())*100)) print('Misclassification error = %.1f%%' % (mlp.eval(valid_set, metric=Misclassification())*100))
layers.append(Pooling(2, strides=2)) layers.append(Conv((3, 3, 256), **conv_params)) layers.append(Conv((3, 3, 256), **conv_params)) layers.append(Conv((3, 3, 256), **conv_params)) layers.append(Pooling(2, strides=2)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Pooling(2, strides=2)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Conv((3, 3, 512), **conv_params)) # not used after this layer model = Model(layers=layers) # cost = GeneralizedCost(costfunc=CrossEntropyBinary()) cost = GeneralizedCost(costfunc=SumSquared()) # fit and validate optimizer = Adam(learning_rate=0.001) # configure callbacks # callbacks = Callbacks(model, eval_set=eval_set) callbacks = Callbacks(model, train_set=train) model.fit(train, cost=cost, optimizer=optimizer, num_epochs=10, callbacks=callbacks)
Conv((3, 3, 128), init=gauss, strides=small, **common), Pooling(2, strides=2), Dropout(0.4), Conv((3, 3, 256), init=gauss, strides=small, **common), Dropout(0.2), Conv((2, 2, 512), init=gauss, strides=tiny, **common), Conv((2, 2, 128), init=gauss, strides=tiny, **common), DeepBiRNN(64, init=glorot, reset_cells=True, depth=5, **common), RecurrentMean(), Affine(nout=2, init=gauss, activation=Softmax()) ] }[subj] model = Model(layers=layers) opt = Adagrad(learning_rate=rate) callbacks = Callbacks(model, eval_set=test, **args.callback_args) if args.validate_mode: evaluator = Evaluator(subj, data_dir, test) callbacks.add_callback(evaluator) preds_name = 'eval.' else: preds_name = 'test.' cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.fit(tain, optimizer=opt, num_epochs=nepochs, cost=cost, callbacks=callbacks) preds = model.get_outputs(test)[:, 1] preds_file = preds_name + str(subj) + '.npy'
# hyperparameters num_epochs = args.epochs (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) valid_set = ArrayIterator([X_test, X_test], y_test, nclass=nclass, lshape=(1, 28, 28)) # weight initialization init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), Affine(nout=100, init=init_norm, activation=Rectlin())]) path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), Affine(nout=100, init=init_norm, activation=Rectlin())]) layers = [MergeMultistream(layers=[path1, path2], merge="stack"), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) # fit and validate optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) model.fit(train_set, cost=cost, optimizer=optimizer, num_epochs=num_epochs, callbacks=callbacks)
MergeMultistream(layers=[image_path, sent_path], merge="recurrent"), Dropout(keep=0.5), LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(train_set.vocab_size, init, bias=init2, activation=Softmax()) ] cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)) # configure callbacks checkpoint_model_path = "~/image_caption2.pickle" if args.callback_args['save_path'] is None: args.callback_args['save_path'] = checkpoint_model_path if args.callback_args['serialize'] is None: args.callback_args['serialize'] = 1 model = Model(layers=layers) callbacks = Callbacks(model, train_set, **args.callback_args) opt = RMSProp(decay_rate=0.997, learning_rate=0.0005, epsilon=1e-8, gradient_clip_value=1) # train model model.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks) # load model (if exited) and evaluate bleu score on test set model.load_params(checkpoint_model_path) test_set = ImageCaptionTest(path=data_path) sents, targets = test_set.predict(model) test_set.bleu_score(sents, targets)
# hyperparameters num_epochs = args.epochs (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) train_set = DataIterator([X_train, X_train], y_train, nclass=nclass) valid_set = DataIterator([X_test, X_test], y_test, nclass=nclass) # weight initialization init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), Affine(nout=100, init=init_norm, activation=Rectlin())]) path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), Affine(nout=100, init=init_norm, activation=Rectlin())]) layers = [MergeMultistream(layers=[path1, path2], merge="stack"), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) # fit and validate optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) # configure callbacks callbacks = Callbacks(model, train_set, args, eval_set=valid_set) model.fit(train_set, cost=cost, optimizer=optimizer, num_epochs=num_epochs, callbacks=callbacks)
model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.initialize(train_set, cost) model.load_params('models/mnist/mnist_cnn.pkl', load_states=False) # define optimizer opt_w = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, wdecay=0.0005) opt_b = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9) opt = MultiOptimizer({'default': opt_w, 'Bias': opt_b}, name='multiopt') # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, metric=Misclassification(), **args.callback_args) callbacks.add_callback( TrainByStageCallback(model, valid_set, Misclassification(), max_patience=5)) num_prune = [5, 10, 25, 10] callbacks.add_callback( FuzzyPruneCallback(num_states=100, num_prune=num_prune, model=model)) print('Original Accuracy = %.2f%%' % (100. - model.eval(valid_set, metric=Misclassification()) * 100)) logger.info('Training ...') model.fit(train_set, optimizer=opt, num_epochs=250,
LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(train_set.vocab_size, init, bias=init2, activation=Softmax()) ] cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)) checkpoint_model_path = "~/image_caption2.pickle" checkpoint_schedule = range(num_epochs) model = Model(layers=layers) callbacks = Callbacks(model, train_set, output_file=args.output_file, progress_bar=args.progress_bar) callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path) opt = RMSProp(decay_rate=0.997, learning_rate=0.0005, epsilon=1e-8, clip_gradients=True, gradient_limit=1.0) # train model model.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks)
LR_start = 1.65e-2 def ShiftAdaMax_with_Scale(LR=1): return ShiftAdaMax(learning_rate=LR_start * LR, schedule=ShiftSchedule(2, shift_size=1)) optimizer = MultiOptimizer({ 'default': ShiftAdaMax_with_Scale(), 'BinaryLinear_0': ShiftAdaMax_with_Scale(57.038), 'BinaryLinear_1': ShiftAdaMax_with_Scale(73.9008), 'BinaryLinear_2': ShiftAdaMax_with_Scale(73.9008), 'BinaryLinear_3': ShiftAdaMax_with_Scale(52.3195) }) # initialize model object bnn = Model(layers=layers) # configure callbacks callbacks = Callbacks(bnn, eval_set=valid_set, **args.callback_args) # run fit bnn.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) print('Misclassification error = %.1f%%' % (bnn.eval(valid_set, metric=Misclassification()) * 100))
model, cost = create_network() rseed = 0 if args.rng_seed is None else args.rng_seed # setup data provider assert 'train' in args.manifest, "Missing train manifest" assert 'val' in args.manifest, "Missing validation manifest" train = make_alexnet_train_loader(args.manifest['train'], args.manifest_root, model.be, args.subset_pct, rseed) valid = make_validation_loader(args.manifest['val'], args.manifest_root, model.be, args.subset_pct) sched_weight = Schedule([10], change=0.1) opt = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005, schedule=sched_weight) # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, eval_set=valid, metric=valmetric, **args.callback_args) if args.deconv: callbacks.add_deconv_callback(train, valid) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
classifier = TextClassifier(options.word_vectors, options.model_file, optimizer=optimizer, num_analytics_features=0, num_subject_words=0, num_body_words=60, overlapping_classes=overlapping_classes, exclusive_classes=exclusive_classes, network_type=options.network_type) # we will supercede the email classification function to test the content classification network only print('loading sentiment data from {}'.format(options.sentiment_path)) sdata = SentimentLoader(classifier, options.sentiment_path) if options.shuffle_test: print('SHUFFLE TEST BEGIN... please wait...') sdata.train.test_shuffle() sdata.test.test_shuffle() print('SHUFFLE TEST PASSED... reloading inputs and targets') sdata = SentimentLoader(classifier, options.sentiment_path) callbacks = Callbacks(classifier.neuralnet, eval_freq=1, eval_set=sdata.test, metric=Misclassification()) callbacks.add_callback(MisclassificationTest(sdata.test, Misclassification())) print('Training neural networks on {} samples for {} epochs'.format(sdata.train.targets[0].shape[1], options.epochs)) classifier.fit(sdata.train, optimizer, options.epochs, callbacks) print('finished sentiment classification test, exiting') exit(0) classifier = TextClassifier(options.word_vectors, options.model_file, optimizer=optimizer, num_analytics_features=0 if options.sentiment_path else 4, overlapping_classes=overlapping_classes, exclusive_classes=exclusive_classes, network_type=options.network_type, lookup_size=options.lookup_size, lookup_dim=options.lookup_dim) # determine if we expect to use a csv file or a maildir as our data source if os.path.isfile(options.data_path):
# Pooling(fshape=2, strides=2), # Affine(nout=500, init=init_uni, activation=Rectlin()), # Affine(nout=10, init=init_uni, activation=Softmax())] # learning_rate = 0.005 # momentum = 0.9 cnn = Model(layers=layers) # - cost function cost = GeneralizedCost(costfunc=CrossEntropyMulti()) # - learning rule optimizer = GradientDescentMomentum(learning_rate, momentum_coef=momentum) # Progress bar for each epoch - what's an epoch again? by default 10 Crazy magic - don't even go here! callbacks = Callbacks(cnn, eval_set=test_set, **args.callback_args) # put everything together! cnn.fit(train_set, optimizer=optimizer, num_epochs=epochs, cost=cost, callbacks=callbacks) # # Calculate test set results # results = cnn.get_outputs(test_set) # dump(cnn, "cnn_0_005.jbl") # # work out the performance! # error = cnn.eval(test_set, metric=Misclassification())
layers.append( Affine(nout=1000, init=init1, bias=Constant(-7), activation=Softmax())) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases}) mlp = Model(layers=layers) if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file mlp.load_weights(args.model_file) # configure callbacks callbacks = Callbacks(mlp, train, output_file=args.output_file) if args.validation_freq: class TopKMetrics(Callback): def __init__(self, valid_set, epoch_freq=args.validation_freq): super(TopKMetrics, self).__init__(epoch_freq=epoch_freq) self.valid_set = valid_set def on_epoch_end(self, epoch): self.valid_set.reset() allmetrics = TopKMisclassification(k=5) stats = mlp.eval(self.valid_set, metric=allmetrics) print ", ".join(allmetrics.metric_names) + ": " + ", ".join( map(str, stats.flatten()))
depth=4, n_extra_layers=4, batch_norm=True, dis_iters=5, wgan_param_clamp=0.01, wgan_train_sched=True) # setup optimizer optimizer = RMSProp(learning_rate=5e-5, decay_rate=0.99, epsilon=1e-8) # setup data provider train = make_loader(args.manifest['train'], args.manifest_root, model.be, args.subset_pct, random_seed) # configure callbacks callbacks = Callbacks(model, **args.callback_args) fdir = ensure_dirs_exist( os.path.join(os.path.dirname(os.path.realpath(__file__)), 'results/')) fname = os.path.splitext(os.path.basename(__file__))[0] +\ '_[' + datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + ']' im_args = dict(filename=os.path.join(fdir, fname), hw=64, num_samples=args.batch_size, nchan=3, sym_range=True) callbacks.add_callback(GANPlotCallback(**im_args)) callbacks.add_callback(GANCostCallback()) # model fit model.fit(train, optimizer=optimizer,
def stopFunc(s, v): # Stop if validation error ever increases from epoch to epoch if s is None: return (v, False) return (min(v, s), v > s) # fit and validate optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) # configure callbacks if args.validation_freq is None: args.validation_freq = 1 callbacks = Callbacks(mlp, train_set, output_file=args.output_file, valid_set=valid_set, valid_freq=args.validation_freq, progress_bar=args.progress_bar) callbacks.add_early_stop_callback(stopFunc) callbacks.add_save_best_state_callback( os.path.join(args.data_dir, "early_stop-best_state.pkl")) mlp.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks)
layers.append(Conv((3, 3, 384), init=init_uni, activation=relu, strides=1, padding=1)) layers.append(Conv((1, 1, 384), init=init_uni, activation=relu, strides=1)) layers.append(Conv((3, 3, 384), init=init_uni, activation=relu, strides=2, padding=1)) # 12->6 layers.append(Dropout(keep=0.5)) layers.append(Conv((3, 3, 1024), init=init_uni, activation=relu, strides=1, padding=1)) layers.append(Conv((1, 1, 1024), init=init_uni, activation=relu, strides=1)) layers.append(Conv((1, 1, 1000), init=init_uni, activation=relu, strides=1)) layers.append(Pooling(6, op='avg')) layers.append(Activation(Softmax())) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file mlp.load_weights(args.model_file) # configure callbacks callbacks = Callbacks(mlp, train, eval_set=test, **args.callback_args) if args.deconv: callbacks.add_deconv_callback(train, test) mlp.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks) test.exit_batch_provider() train.exit_batch_provider()
# setup model layers layers = [ Conv((5, 5, 16), init=init_norm, activation=Rectlin()), Pooling(2), Conv((5, 5, 32), init=init_norm, activation=Rectlin()), Pooling(2), Conv((3, 3, 32), init=init_norm, activation=Rectlin()), Pooling(2), Affine(nout=100, init=init_norm, activation=Rectlin()), Linear(nout=4, init=init_norm) ] model = Model(layers=layers) # cost = GeneralizedCost(costfunc=CrossEntropyBinary()) cost = GeneralizedCost(costfunc=SumSquared()) # fit and validate optimizer = RMSProp() # configure callbacks callbacks = Callbacks(model, eval_set=eval_set, eval_freq=1) model.fit(train_set, cost=cost, optimizer=optimizer, num_epochs=10, callbacks=callbacks) y_test = model.get_outputs(test_set)