def train(self, dataset, model=None): """Trains the passed model on the given dataset. If no model is passed, `generate_default_model` is used.""" print "[%s] Starting training..." % self.model_name start = time.time() # The training will be run on the CPU. If a GPU is available it should be used instead. backend = gen_backend(backend='cpu', batch_size=self.batch_size, rng_seed=self.random_seed, stochastic_round=False) cost = GeneralizedCost( name='cost', costfunc=CrossEntropyMulti()) optimizer = GradientDescentMomentum( learning_rate=self.lrate, momentum_coef=0.9) # set up the model and experiment if not model: model = self.generate_default_model(dataset.num_labels) args = NeonCallbackParameters() args.output_file = os.path.join(self.root_path, self.Callback_Store_Filename) args.evaluation_freq = 1 args.progress_bar = False args.epochs = self.max_epochs args.save_path = os.path.join(self.root_path, self.Intermediate_Model_Filename) args.serialize = 1 args.history = 100 args.model_file = None callbacks = Callbacks(model, dataset.train(), args, eval_set=dataset.test()) # add a callback that saves the best model state callbacks.add_save_best_state_callback(self.model_path) # Uncomment line below to run on GPU using cudanet backend # backend = gen_backend(rng_seed=0, gpu='cudanet') model.fit( dataset.train(), optimizer=optimizer, num_epochs=self.max_epochs, cost=cost, callbacks=callbacks) print("[%s] Misclassification error = %.1f%%" % (self.model_name, model.eval(dataset.test(), metric=Misclassification()) * 100)) print "[%s] Finished training!" % self.model_name end = time.time() print "[%s] Duration in seconds", end - start return model
def train(args, hyper_params, model, opt, data_set): # setup cost function as CrossEntropy cost = GeneralizedCost(costfunc=CrossEntropyMulti()) callbacks = Callbacks(model, **args.callback_args) callbacks.add_callback(EpochEndCallback()) data_set.set_mode('train') model.fit(data_set, optimizer=opt, num_epochs=hyper_params.num_epochs, cost=cost, callbacks=callbacks) return
def main(): # setup the model and run for num_epochs saving the last state only # this is at the top so that the be is generated mlp = gen_model(args.backend) # setup data iterators (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) if args.backend == 'nervanacpu' or args.backend == 'cpu': # limit data since cpu backend runs slower train = DataIterator(X_train[:1000], y_train[:1000], nclass=nclass, lshape=(1, 28, 28)) valid = DataIterator(X_test[:1000], y_test[:1000], nclass=nclass, lshape=(1, 28, 28)) else: train = DataIterator(X_train, y_train, nclass=nclass, lshape=(1, 28, 28)) valid = DataIterator(X_test, y_test, nclass=nclass, lshape=(1, 28, 28)) # serialization related cost = GeneralizedCost(costfunc=CrossEntropyBinary()) opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) checkpoint_model_path = os.path.join('./', 'test_oneshot.pkl') checkpoint_schedule = 1 # save at every step callbacks = Callbacks(mlp, train) callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path, history=2) # run the fit all the way through saving a checkpoint e mlp.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) # setup model with same random seed run epoch by epoch # serializing and deserializing at each step mlp = gen_model(args.backend) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) # reset data iterators train.reset() valid.reset() checkpoint_model_path = os.path.join('./', 'test_manyshot.pkl') checkpoint_schedule = 1 # save at evey step callbacks = Callbacks(mlp, train) callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path, history=num_epochs) for epoch in range(num_epochs): # _0 points to state at end of epoch 0 mlp.fit(train, optimizer=opt_gdm, num_epochs=epoch+1, cost=cost, callbacks=callbacks) # load saved file prts = os.path.splitext(checkpoint_model_path) fn = prts[0] + '_%d' % epoch + prts[1] mlp.load_weights(fn) # load the saved weights # compare test_oneshot_<num_epochs>.pkl to test_manyshot_<num_epochs>.pkl try: compare_model_pickles('test_oneshot_%d.pkl' % (num_epochs-1), 'test_manyshot_%d.pkl' % (num_epochs-1)) except: print 'test failed....' sys.exit(1)
def run(train, test): init = Gaussian(scale=0.01) layers = [ Conv((3, 3, 128), init=init, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), Conv((3, 3, 256), init=init, batch_norm=True, activation=Rectlin()), Pooling(2, strides=2), Conv((2, 2, 512), init=init, batch_norm=True, activation=Rectlin()), DeepBiRNN(256, init=init, activation=Rectlin(), reset_cells=True, depth=3), RecurrentLast(), Affine(32, init=init, batch_norm=True, activation=Rectlin()), Affine(nout=common['nclasses'], init=init, activation=Softmax()) ] model = Model(layers=layers) opt = Adadelta() metric = Misclassification() callbacks = Callbacks(model, eval_set=test, metric=metric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) return model
def main(args): # load up the mnist data set dataset = MNIST(path=args.data_dir) # initialize model object mlp = Model(layers=[ Affine(nout=100, init=Gaussian(loc=0.0, scale=0.01), activation=Rectlin()), Affine(nout=10, init=Gaussian(loc=0.0, scale=0.01), activation=Logistic(shortcut=True)) ]) # setup optimizer optimizer = GradientDescentMomentum(0.1, momentum_coef=0.9, stochastic_round=args.rounding) # configure callbacks callbacks = Callbacks(mlp, eval_set=dataset.valid_iter, **args.callback_args) # run fit # setup cost function as CrossEntropy mlp.fit(dataset.train_iter, optimizer=optimizer, num_epochs=args.epochs, cost=GeneralizedCost(costfunc=CrossEntropyBinary()), callbacks=callbacks) error_rate = mlp.eval(dataset.valid_iter, metric=Misclassification()) neon_logger.display('Classification accuracy = %.4f' % (1 - error_rate))
def train_model(lrmodel, opt, cost, X, Y, devX, devY, devscores): """ Train model, using pearsonr on dev for early stopping """ done = False best = -1.0 r = np.arange(1, 6) train_set = ArrayIterator(X=X, y=Y, make_onehot=False) valid_set = ArrayIterator(X=devX, y=devY, make_onehot=False) eval_epoch = 10 while not done: callbacks = Callbacks(lrmodel, eval_set=valid_set) lrmodel.fit(train_set, optimizer=opt, num_epochs=eval_epoch, cost=cost, callbacks=callbacks) # Every 10 epochs, check Pearson on development set yhat = np.dot(lrmodel.get_outputs(valid_set), r) score = pearsonr(yhat, devscores)[0] if score > best: neon_logger.display('Dev Pearson: {}'.format(score)) best = score bestlrmodel = copy.copy(lrmodel) else: done = True eval_epoch += 10 yhat = np.dot(bestlrmodel.get_outputs(valid_set), r) score = pearsonr(yhat, devscores)[0] neon_logger.display('Dev Pearson: {}'.format(score)) return bestlrmodel
def main(): # parse the command line arguments parser = NeonArgparser(__doc__) args = parser.parse_args() logger = logging.getLogger() logger.setLevel(args.log_thresh) #Set up batch iterator for training images train = ImgMaster(repo_dir='dataTmp_optFlow_BW', set_name='train', inner_size=240, subset_pct=100) val = ImgMaster(repo_dir='dataTmp_optFlow_BW', set_name='train', inner_size=240, subset_pct=100, do_transforms=False) test = ImgMaster(repo_dir='dataTestTmp_optFlow_BW', set_name='train', inner_size=240, subset_pct=100, do_transforms=False) train.init_batch_provider() val.init_batch_provider() test.init_batch_provider() print "Constructing network..." #Create AlexNet architecture model = constuct_network() # Optimzer for model opt = Adadelta() # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, train, eval_set=test, metric=valmetric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) #flag = input("Press Enter if you want to begin training process.") print "Training network..." model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) mets = model.eval(test, metric=valmetric) print 'Validation set metrics:' print 'LogLoss: %.2f, Accuracy: %.1f %%0 (Top-1), %.1f %% (Top-5)' % ( mets[0], (1.0 - mets[1]) * 100, (1.0 - mets[2]) * 100) return
def deserialize(fn, datasets=None, inference=False): """ Helper function to load all objects from a serialized file, this includes callbacks and datasets as well as the model, layers, etc. Arguments: datasets (DataSet, optional): If the dataset is not serialized in the file it can be passed in as an argument. This will also override any dataset in the serialized file inference (bool, optional): if true only the weights will be loaded, not the states Returns: Model: the model object Dataset: the data set object Callback: the callbacks """ config_dict = load_obj(fn) if datasets is not None: logger.warn('Ignoring datasets serialized in archive file %s' % fn) elif 'datasets' in config_dict: ds_cls = load_class(config_dict['datasets']['type']) dataset = ds_cls.gen_class(config_dict['datasets']['config']) datasets = dataset.gen_iterators() if 'train' in datasets: data_iter = datasets['train'] else: key = list(datasets.keys())[0] data_iter = datasets[key] logger.warn('Could not find training set iterator' 'using %s instead' % key) model = Model(config_dict, data_iter) callbacks = None if 'callbacks' in config_dict: # run through the callbacks looking for dataset objects # replace them with the corresponding data set above cbs = config_dict['callbacks']['callbacks'] for cb in cbs: if 'config' not in cb: cb['config'] = {} for arg in cb['config']: if type(cb['config'] [arg]) is dict and 'type' in cb['config'][arg]: if cb['config'][arg]['type'] == 'Data': key = cb['config'][arg]['name'] if key in datasets: cb['config'][arg] = datasets[key] else: cb['config'][arg] = None # now we can generate the callbacks callbacks = Callbacks.load_callbacks(config_dict['callbacks'], model) return (model, dataset, callbacks)
def deserialize(fn, datasets=None, inference=False): """ Helper function to load all objects from a serialized file, this includes callbacks and datasets as well as the model, layers, etc. Arguments: datasets (DataSet, optional): If the dataset is not serialized in the file it can be passed in as an argument. This will also override any dataset in the serialized file inference (bool, optional): if true only the weights will be loaded, not the states Returns: Model: the model object Dataset: the data set object Callback: the callbacks """ config_dict = load_obj(fn) if datasets is not None: logger.warn('Ignoring datasets serialized in archive file %s' % fn) elif 'datasets' in config_dict: ds_cls = load_class(config_dict['datasets']['type']) dataset = ds_cls.gen_class(config_dict['datasets']['config']) datasets = dataset.gen_iterators() if 'train' in datasets: data_iter = datasets['train'] else: key = datasets.keys()[0] data_iter = datasets[key] logger.warn('Could not find training set iterator' 'using %s instead' % key) model = Model(config_dict, data_iter) callbacks = None if 'callbacks' in config_dict: # run through the callbacks looking for dataset objects # replace them with the corresponding data set above cbs = config_dict['callbacks']['callbacks'] for cb in cbs: if 'config' not in cb: cb['config'] = {} for arg in cb['config']: if type(cb['config'][arg]) is dict and 'type' in cb['config'][arg]: if cb['config'][arg]['type'] == 'Data': key = cb['config'][arg]['name'] if key in datasets: cb['config'][arg] = datasets[key] else: cb['config'][arg] = None # now we can generate the callbacks callbacks = Callbacks.load_callbacks(config_dict['callbacks'], model) return (model, dataset, callbacks)
def main(): parser = get_parser() args = parser.parse_args() print('Args:', args) loggingLevel = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig(level=loggingLevel, format='') ext = extension_from_parameters(args) loader = p1b3.DataLoader(feature_subsample=args.feature_subsample, scaling=args.scaling, drug_features=args.drug_features, scramble=args.scramble, min_logconc=args.min_logconc, max_logconc=args.max_logconc, subsample=args.subsample, category_cutoffs=args.category_cutoffs) # initializer = Gaussian(loc=0.0, scale=0.01) initializer = GlorotUniform() activation = get_function(args.activation)() layers = [] reshape = None if args.convolution and args.convolution[0]: reshape = (1, loader.input_dim, 1) layer_list = list(range(0, len(args.convolution), 3)) for l, i in enumerate(layer_list): nb_filter = args.convolution[i] filter_len = args.convolution[i+1] stride = args.convolution[i+2] # print(nb_filter, filter_len, stride) # fshape: (height, width, num_filters). layers.append(Conv((1, filter_len, nb_filter), strides={'str_h':1, 'str_w':stride}, init=initializer, activation=activation)) if args.pool: layers.append(Pooling((1, args.pool))) for layer in args.dense: if layer: layers.append(Affine(nout=layer, init=initializer, activation=activation)) if args.drop: layers.append(Dropout(keep=(1-args.drop))) layers.append(Affine(nout=1, init=initializer, activation=neon.transforms.Identity())) model = Model(layers=layers) train_iter = ConcatDataIter(loader, ndata=args.train_samples, lshape=reshape, datatype=args.datatype) val_iter = ConcatDataIter(loader, partition='val', ndata=args.val_samples, lshape=reshape, datatype=args.datatype) cost = GeneralizedCost(get_function(args.loss)()) optimizer = get_function(args.optimizer)() callbacks = Callbacks(model, eval_set=val_iter, **args.callback_args) model.fit(train_iter, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
def fit_model(train_set, val_set, num_epochs=50): relu = Rectlin() conv_params = { 'strides': 1, 'padding': 1, 'init': Xavier(local=True), # Xavier sqrt(3)/num_inputs [CHECK THIS] 'bias': Constant(0), 'activation': relu } layers = [] layers.append(Conv((3, 3, 128), **conv_params)) # 3x3 kernel * 128 nodes layers.append(Pooling(2)) layers.append(Conv((3, 3, 128), **conv_params)) layers.append(Pooling(2)) # Highest value from 2x2 window. layers.append(Conv((3, 3, 128), **conv_params)) layers.append( Dropout(keep=0.5) ) # Flattens Convolution into a flat array, with probability 0.5 sets activation values to 0 layers.append( Affine(nout=128, init=GlorotUniform(), bias=Constant(0), activation=relu) ) # 1 value per conv kernel - Linear Combination of layers layers.append(Dropout(keep=0.5)) layers.append( Affine(nout=2, init=GlorotUniform(), bias=Constant(0), activation=Softmax(), name="class_layer")) # initialize model object cnn = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) optimizer = Adam() # callbacks = Callbacks(cnn) # out_fname = 'yarin_fdl_out_data.h5' callbacks = Callbacks(cnn, eval_set=val_set, eval_freq=1) # , output_file=out_fname cnn.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks) return cnn
def it_was_correct(last_in, last_out): # print "naive", replay_memory.clean_values_toone(last_in)[0], last_out offset_memory.add_episode(last_in, last_out) print "osize", offset_memory.outputs.size if offset_memory.outputs is not None and offset_memory.outputs.size % bot_params.batch_size == 0: X, y = offset_memory.get_dataset() train = ArrayIterator(X=X, y=y, make_onehot=False) mlp.fit(train, optimizer=optimizer, num_epochs=1, cost=cost, callbacks=Callbacks(mlp)) mlp.save_params(bot_params.aim_weights_path)
def train_regressor(orig_wordvecs, w2v_W, w2v_vocab): """ Return regressor to map word2vec to RNN word space Function modified from: https://github.com/ryankiros/skip-thoughts/blob/master/training/tools.py """ # Gather all words from word2vec that appear in wordvecs d = defaultdict(lambda: 0) for w in w2v_vocab.keys(): d[w] = 1 shared = OrderedDict() count = 0 for w in list(orig_wordvecs.keys())[:-2]: if d[w] > 0: shared[w] = count count += 1 # Get the vectors for all words in 'shared' w2v = np.zeros((len(shared), 300), dtype='float32') sg = np.zeros((len(shared), 620), dtype='float32') for w in shared.keys(): w2v[shared[w]] = w2v_W[w2v_vocab[w]] sg[shared[w]] = orig_wordvecs[w] train_set = ArrayIterator(X=w2v, y=sg, make_onehot=False) layers = [ Linear(nout=620, init=Gaussian(loc=0.0, scale=0.1)), Bias(init=Constant(0.0)) ] clf = Model(layers=layers) # regression model is trained using default global batch size cost = GeneralizedCost(costfunc=SumSquared()) opt = GradientDescentMomentum(0.1, 0.9, gradient_clip_value=5.0) callbacks = Callbacks(clf) clf.fit(train_set, num_epochs=20, optimizer=opt, cost=cost, callbacks=callbacks) return clf
def fit(self, test_set, train_set): """ Train and fit the model on the datasets Args: test_set (:obj:`neon.data.ArrayIterators`): The test set train_set (:obj:`neon.data.ArrayIterators`): The train set args: callback_args and epochs from ArgParser input """ # configure callbacks callbacks = Callbacks(self.model, eval_set=test_set, **self.callback_args) self.model.fit(train_set, optimizer=self.optimizer, num_epochs=self.epochs, cost=self.cost, callbacks=callbacks)
def run(args, train, test): init_uni = Uniform(low=-0.1, high=0.1) opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, stochastic_round=args.rounding) layers = [Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=True), Pooling((2, 2)), Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=True), Pooling((2, 2)), Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=True), Affine(nout=10, init=init_uni, activation=Softmax())] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) callbacks = Callbacks(mlp, eval_set=test, **args.callback_args) mlp.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks) err = mlp.eval(test, metric=Misclassification())*100 print('Misclassification error = %.2f%%' % err) return err
layers.append(Dropout(keep=0.5)) layers.append(Affine(nout=1000, init=init1, bias=Constant(-7), activation=Softmax())) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases}) mlp = Model(layers=layers) if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file mlp.load_weights(args.model_file) # configure callbacks callbacks = Callbacks(mlp, train, output_file=args.output_file) if args.validation_freq: class TopKMetrics(Callback): def __init__(self, valid_set, epoch_freq=args.validation_freq): super(TopKMetrics, self).__init__(epoch_freq=epoch_freq) self.valid_set = valid_set def on_epoch_end(self, epoch): self.valid_set.reset() allmetrics = TopKMisclassification(k=5) stats = mlp.eval(self.valid_set, metric=allmetrics) print ", ".join(allmetrics.metric_names) + ": " + ", ".join(map(str, stats.flatten())) callbacks.add_callback(TopKMetrics(test))
# setup cost function as CrossEntropy cost = GeneralizedCost(costfunc=CrossEntropyBinary()) # setup optimizer optimizer = GradientDescentMomentum(0.1, momentum_coef=0.9, stochastic_round=args.rounding) # initialize model object mlp = Model(layers=layers) if args.model_file: assert os.path.exists(args.model_file), '%s not found' % args.model_file logger.info('loading initial model state from %s' % args.model_file) mlp.load_weights(args.model_file) # setup standard fit callbacks callbacks = Callbacks(mlp, train_set, output_file=args.output_file, progress_bar=args.progress_bar) # add a callback ot calculate if args.validation_freq: # setup validation trial callbacks callbacks.add_validation_callback(valid_set, args.validation_freq) if args.serialize > 0: # add callback for saving checkpoint file # every args.serialize epchs checkpoint_schedule = args.serialize checkpoint_model_path = args.save_path callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path) # run fit
Conv((3, 3, 128), init=gauss, strides=small, **common), Pooling(2, strides=2), Dropout(0.4), Conv((3, 3, 256), init=gauss, strides=small, **common), Dropout(0.2), Conv((2, 2, 512), init=gauss, strides=tiny, **common), Conv((2, 2, 128), init=gauss, strides=tiny, **common), DeepBiRNN(64, init=glorot, reset_cells=True, depth=5, **common), RecurrentMean(), Affine(nout=2, init=gauss, activation=Softmax()) ] }[subj] model = Model(layers=layers) opt = Adagrad(learning_rate=rate) callbacks = Callbacks(model, eval_set=test, **args.callback_args) if args.validate_mode: evaluator = Evaluator(subj, data_dir, test) callbacks.add_callback(evaluator) preds_name = 'eval.' else: preds_name = 'test.' cost = GeneralizedCost(costfunc=CrossEntropyBinary()) model.fit(tain, optimizer=opt, num_epochs=nepochs, cost=cost, callbacks=callbacks) preds = model.get_outputs(test)[:, 1] preds_file = preds_name + str(subj) + '.npy'
# hyperparameters num_epochs = args.epochs (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) train_set = DataIterator([X_train, X_train], y_train, nclass=nclass) valid_set = DataIterator([X_test, X_test], y_test, nclass=nclass) # weight initialization init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), Affine(nout=100, init=init_norm, activation=Rectlin())]) path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), Affine(nout=100, init=init_norm, activation=Rectlin())]) layers = [MergeMultistream(layers=[path1, path2], merge="stack"), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) # fit and validate optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) # configure callbacks callbacks = Callbacks(model, train_set, args, eval_set=valid_set) model.fit(train_set, cost=cost, optimizer=optimizer, num_epochs=num_epochs, callbacks=callbacks)
#cost function cost = GeneralizedCost(costfunc=CrossEntropyBinary()) #final model mlp = Model(layers=layers) logger.info("model construction complete...") """ model training and classification accurate rate """ #model training and results callbacks = Callbacks(mlp,train, args, eval_set=valid,metric=Misclassification()) #add lost and metric call backs facilitate more diagnostic callbacks.add_callback(MetricCallback(mlp,eval_set=train,metric=Misclassification(),epoch_freq=args.evaluation_freq)) callbacks.add_callback(MetricCallback(mlp,eval_set=valid,metric=Misclassification(),epoch_freq=args.evaluation_freq)) #run the model mlp.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks) #final classification accuracy t_mis_rate=mlp.eval(train, metric=Misclassification())*100 v_mis_rate=mlp.eval(valid, metric=Misclassification())*100 #test_mis_rate=mlp.eval(test, metric=Misclassification())*100
classifier = TextClassifier(options.word_vectors, options.model_file, optimizer=optimizer, num_analytics_features=0, num_subject_words=0, num_body_words=60, overlapping_classes=overlapping_classes, exclusive_classes=exclusive_classes, network_type=options.network_type) # we will supercede the email classification function to test the content classification network only print('loading sentiment data from {}'.format(options.sentiment_path)) sdata = SentimentLoader(classifier, options.sentiment_path) if options.shuffle_test: print('SHUFFLE TEST BEGIN... please wait...') sdata.train.test_shuffle() sdata.test.test_shuffle() print('SHUFFLE TEST PASSED... reloading inputs and targets') sdata = SentimentLoader(classifier, options.sentiment_path) callbacks = Callbacks(classifier.neuralnet, eval_freq=1, eval_set=sdata.test, metric=Misclassification()) callbacks.add_callback(MisclassificationTest(sdata.test, Misclassification())) print('Training neural networks on {} samples for {} epochs'.format(sdata.train.targets[0].shape[1], options.epochs)) classifier.fit(sdata.train, optimizer, options.epochs, callbacks) print('finished sentiment classification test, exiting') exit(0) classifier = TextClassifier(options.word_vectors, options.model_file, optimizer=optimizer, num_analytics_features=0 if options.sentiment_path else 4, overlapping_classes=overlapping_classes, exclusive_classes=exclusive_classes, network_type=options.network_type, lookup_size=options.lookup_size, lookup_dim=options.lookup_dim) # determine if we expect to use a csv file or a maildir as our data source if os.path.isfile(options.data_path):
# define stopping function # it takes as input a tuple (State,val[t]) # which describes the cumulative validation state (generated by this function) # and the validation error at time t # and returns as output a tuple (State', Bool), # which represents the new state and whether to stop # Stop if validation error ever increases from epoch to epoch def stop_func(s, v): if s is None: return (v, False) return (min(v, s), v > s) # fit and validate optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) # configure callbacks if args.callback_args['eval_freq'] is None: args.callback_args['eval_freq'] = 1 callbacks = Callbacks(mlp, eval_set=valid_set, **args.callback_args) callbacks.add_early_stop_callback(stop_func) callbacks.add_save_best_state_callback(os.path.join(args.data_dir, "early_stop-best_state.pkl")) mlp.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
model, cost = create_network() rseed = 0 if args.rng_seed is None else args.rng_seed # setup data provider assert 'train' in args.manifest, "Missing train manifest" assert 'val' in args.manifest, "Missing validation manifest" train = make_alexnet_train_loader(args.manifest['train'], args.manifest_root, model.be, args.subset_pct, rseed) valid = make_validation_loader(args.manifest['val'], args.manifest_root, model.be, args.subset_pct) sched_weight = Schedule([10], change=0.1) opt = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005, schedule=sched_weight) # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, eval_set=valid, metric=valmetric, **args.callback_args) if args.deconv: callbacks.add_deconv_callback(train, valid) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
Dropout(keep=0.5), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1), Conv((3, 3, 192), **convp1s2), Dropout(keep=0.5), Conv((3, 3, 192), **convp1), Conv((1, 1, 192), **conv), Conv((1, 1, 16), **conv), Pooling(8, op="avg"), Activation(Softmax()), ] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) if args.model_file: import os assert os.path.exists(args.model_file), "%s not found" % args.model_file mlp.load_params(args.model_file) # configure callbacks callbacks = Callbacks(mlp, eval_set=valid_set, **args.callback_args) if args.deconv: callbacks.add_deconv_callback(train_set, valid_set) mlp.fit(train_set, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) print("Misclassification error = %.1f%%" % (mlp.eval(valid_set, metric=Misclassification()) * 100))
parser = NeonArgparser(__doc__, default_config_files=config_files, default_overrides=dict(batch_size=64)) parser.add_argument('--deconv', action='store_true', help='save visualization data from deconvolution') parser.add_argument('--subset_pct', type=float, default=100, help='subset of training dataset to use (percentage)') args = parser.parse_args() model, cost = create_network() rseed = 0 if args.rng_seed is None else args.rng_seed # setup data provider assert 'train' in args.manifest, "Missing train manifest" assert 'val' in args.manifest, "Missing validation manifest" train = make_alexnet_train_loader(args.manifest['train'], args.manifest_root, model.be, args.subset_pct, rseed) valid = make_validation_loader(args.manifest['val'], args.manifest_root, model.be, args.subset_pct) sched_weight = Schedule([10], change=0.1) opt = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005, schedule=sched_weight) # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, eval_set=valid, metric=valmetric, **args.callback_args) if args.deconv: callbacks.add_deconv_callback(train, valid) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
layers.append(Conv((3, 3, 384), pad=1, init=init2, bias=Constant(0), activation=relu)) layers.append(Conv((3, 3, 256), pad=1, init=init2, bias=Constant(1), activation=relu)) layers.append(Conv((3, 3, 256), pad=1, init=init2, bias=Constant(1), activation=relu)) layers.append(Pooling(3, strides=2)) layers.append(Affine(nout=4096, init=init1, bias=Constant(1), activation=relu)) layers.append(Dropout(keep=0.5)) layers.append(Affine(nout=4096, init=init1, bias=Constant(1), activation=relu)) layers.append(Dropout(keep=0.5)) layers.append(Affine(nout=1000, init=init1, bias=Constant(-7), activation=Softmax())) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases}) mlp = Model(layers=layers) # configure callbacks callbacks = Callbacks(mlp, train, output_file=args.output_file) if args.validation_freq: callbacks.add_validation_callback(test, args.validation_freq) if args.save_path: checkpoint_schedule = range(1, args.epochs) callbacks.add_serialize_callback(checkpoint_schedule, args.save_path, history=2) mlp.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) test.exit_batch_provider() train.exit_batch_provider()
model = Model(layers=SSD(ssd_config=train_config['ssd_config'], dataset=train_set)) cost = MBoxLoss(num_classes=train_set.num_classes) if args.model_file is None: load_vgg_weights(model, cache_dir) else: model.load_params(args.model_file) if args.lr_step is None: args.lr_step = [40, 80, 120] base_lr = 0.0001 * be.bsz * args.lr_scale schedule = Schedule(args.lr_step, 0.1) opt_w = GradientDescentMomentum(base_lr, momentum_coef=0.9, wdecay=0.0005, schedule=schedule) opt_b = GradientDescentMomentum(base_lr, momentum_coef=0.9, schedule=schedule) opt = MultiOptimizer({'default': opt_w, 'Bias': opt_b}) # hijack the eval callback arg here eval_freq = args.callback_args.pop('eval_freq') callbacks = Callbacks(model, **args.callback_args) callbacks.add_callback(MAP_Callback(eval_set=val_set, epoch_freq=eval_freq)) if args.image_sample_dir is not None: callbacks.add_callback(ssd_image_callback(eval_set=val_set, image_dir=args.image_sample_dir, epoch_freq=eval_freq, num_images=args.num_images, classes=val_config['class_names'])) model.fit(train_set, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
validation=False, remove_history=False, minimal_set=False, next_N=3) valid = HDF5Iterator(filenames, ndata=(16 * 2014), validation=True, remove_history=False, minimal_set=False, next_N=1) out1, out2, out3 = model.layers.get_terminal() cost = Multicost(costs=[GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)), GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)), GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))]) schedule = ExpSchedule(decay=(1.0 / 50)) # halve the learning rate every 50 epochs opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, stochastic_round=args.rounding, gradient_clip_value=1, gradient_clip_norm=5, wdecay=0.0001, schedule=schedule) callbacks = Callbacks(model, eval_set=valid, metric=TopKMisclassification(5), **args.callback_args) callbacks.add_save_best_state_callback(os.path.join(args.workspace_dir, "best_state_h5resnet.pkl")) model.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) model.save_params(os.path.join(args.workspace_dir, "final_state_h5resnet.pkl"))
init = Uniform(low=-0.08, high=0.08) # model initialization layers = [ LSTM(hidden_size, init, Logistic(), Tanh()), Affine(len(train_set.vocab), init, bias=init, activation=Softmax()) ] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) optimizer = RMSProp(clip_gradients=clip_gradients, stochastic_round=args.rounding) # configure callbacks callbacks = Callbacks(model, train_set, output_file=args.output_file, progress_bar=args.progress_bar, valid_set=valid_set, valid_freq=1, ) callbacks.add_serialize_callback(1, args.save_path) # fit and validate model.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks) def sample(prob): """ Sample index from probability distribution """ prob = prob / (prob.sum() + 1e-6) return np.argmax(np.random.multinomial(1, prob, 1)) # Set batch size and time_steps to 1 for generation and reset buffers
common_params = dict(sampling_freq=22050, clip_duration=16000, frame_duration=16) train_params = AudioParams(**common_params) valid_params = AudioParams(**common_params) common = dict(target_size=1, nclasses=10, repo_dir=args.data_dir) train = DataLoader(set_name='music-train', media_params=train_params, index_file=train_idx, shuffle=True, **common) valid = DataLoader(set_name='music-valid', media_params=valid_params, index_file=valid_idx, shuffle=False, **common) init = Gaussian(scale=0.01) layers = [Conv((2, 2, 4), init=init, activation=Rectlin(), strides=dict(str_h=2, str_w=4)), Pooling(2, strides=2), Conv((3, 3, 4), init=init, batch_norm=True, activation=Rectlin(), strides=dict(str_h=1, str_w=2)), DeepBiRNN(128, init=GlorotUniform(), batch_norm=True, activation=Rectlin(), reset_cells=True, depth=3), RecurrentMean(), Affine(nout=common['nclasses'], init=init, activation=Softmax())] model = Model(layers=layers) opt = Adagrad(learning_rate=0.01, gradient_clip_value=15) metric = Misclassification() callbacks = Callbacks(model, eval_set=valid, metric=metric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) print('Misclassification error = %.1f%%' % (model.eval(valid, metric=metric)*100)) display(model, ['Convolution_0'], 'inputs') display(model, ['Convolution_0', 'Convolution_1', 'Pooling_0'], 'outputs')
model = Model(layers=layers) # scale LR by 0.1 every 20 epochs (this assumes batch_size = 256) weight_sched = Schedule(20, 0.1) opt_gdm = GradientDescentMomentum(0.01, 0.9, wdecay=0.0005, schedule=weight_sched) opt_biases = GradientDescentMomentum(0.02, 0.9, schedule=weight_sched) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases}) # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, eval_set=test, metric=valmetric, **args.callback_args) if args.model_file is not None: model.load_params(args.model_file) if not args.test_only: cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) mets = model.eval(test, metric=valmetric) print 'Validation set metrics:' print 'LogLoss: %.2f, Accuracy: %.1f %% (Top-1), %.1f %% (Top-5)' % (
Affine(train_set.vocab_size, init, bias=init2, activation=Softmax()) ] cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)) # configure callbacks checkpoint_model_path = "~/image_caption2.pickle" if args.callback_args['save_path'] is None: args.callback_args['save_path'] = checkpoint_model_path if args.callback_args['serialize'] is None: args.callback_args['serialize'] = 1 model = Model(layers=layers) callbacks = Callbacks(model, **args.callback_args) opt = RMSProp(decay_rate=0.997, learning_rate=0.0005, epsilon=1e-8, gradient_clip_value=1) # train model model.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks) # load model (if exited) and evaluate bleu score on test set model.load_params(checkpoint_model_path)
# Pooling(fshape=2, strides=2), # Affine(nout=500, init=init_uni, activation=Rectlin()), # Affine(nout=10, init=init_uni, activation=Softmax())] # learning_rate = 0.005 # momentum = 0.9 cnn = Model(layers=layers) # - cost function cost = GeneralizedCost(costfunc=CrossEntropyMulti()) # - learning rule optimizer = GradientDescentMomentum(learning_rate, momentum_coef=momentum) # Progress bar for each epoch - what's an epoch again? by default 10 Crazy magic - don't even go here! callbacks = Callbacks(cnn, eval_set=test_set, **args.callback_args) # put everything together! cnn.fit(train_set, optimizer=optimizer, num_epochs=epochs, cost=cost, callbacks=callbacks) # # Calculate test set results # results = cnn.get_outputs(test_set) # dump(cnn, "cnn_0_005.jbl") # # work out the performance! # error = cnn.eval(test_set, metric=Misclassification())
LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(train_set.vocab_size, init, bias=init2, activation=Softmax()) ] cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)) checkpoint_model_path = "~/image_caption2.pickle" checkpoint_schedule = range(num_epochs) model = Model(layers=layers) callbacks = Callbacks(model, train_set, output_file=args.output_file, progress_bar=args.progress_bar) callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path) opt = RMSProp(decay_rate=0.997, learning_rate=0.0005, epsilon=1e-8, clip_gradients=True, gradient_limit=1.0) # train model model.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks)
MergeMultistream(layers=[image_path, sent_path], merge="recurrent"), Dropout(keep=0.5), LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(train_set.vocab_size, init, bias=init2, activation=Softmax()) ] cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)) # configure callbacks checkpoint_model_path = "~/image_caption2.pickle" if args.callback_args['save_path'] is None: args.callback_args['save_path'] = checkpoint_model_path if args.callback_args['serialize'] is None: args.callback_args['serialize'] = 1 model = Model(layers=layers) callbacks = Callbacks(model, train_set, **args.callback_args) opt = RMSProp(decay_rate=0.997, learning_rate=0.0005, epsilon=1e-8, gradient_clip_value=1) # train model model.fit(train_set, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks) # load model (if exited) and evaluate bleu score on test set model.load_params(checkpoint_model_path) test_set = ImageCaptionTest(path=data_path) sents, targets = test_set.predict(model) test_set.bleu_score(sents, targets)
Conv((1, 1, 192), **conv), Conv((1, 1, 16), **conv), Pooling(8, op="avg"), Activation(Softmax())] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file mlp.load_weights(args.model_file) # configure callbacks callbacks = Callbacks(mlp, train_set, eval_set=valid_set, **args.callback_args) if args.deconv: callbacks.add_deconv_callback(train_set, valid_set) callbacks.add_callback( MetricCallback( valid_set, Misclassification() ) ) mlp.fit(train_set, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) import logging logger = logging.getLogger(__name__) logger.critical('Misclassification error = %.1f%%' % (mlp.eval(valid_set, metric=Misclassification())*100))
model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.initialize(train_set, cost) model.load_params('models/mnist/mnist_cnn.pkl', load_states=False) # define optimizer opt_w = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, wdecay=0.0005) opt_b = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9) opt = MultiOptimizer({'default': opt_w, 'Bias': opt_b}, name='multiopt') # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, metric=Misclassification(), **args.callback_args) callbacks.add_callback( TrainByStageCallback(model, valid_set, Misclassification(), max_patience=5)) num_prune = [5, 10, 25, 10] callbacks.add_callback( FuzzyPruneCallback(num_states=100, num_prune=num_prune, model=model)) print('Original Accuracy = %.2f%%' % (100. - model.eval(valid_set, metric=Misclassification()) * 100)) logger.info('Training ...') model.fit(train_set, optimizer=opt, num_epochs=250,
Affine(nout=16, linear_name="b1_l1", **normrelu), Affine(nout=10, linear_name="b1_l2", **normsigm)] p3 = [b2, Affine(nout=16, linear_name="b2_l1", **normrelu), Affine(nout=10, linear_name="b2_l2", **normsigm)] # setup cost function as CrossEntropy cost = Multicost(costs=[GeneralizedCost(costfunc=CrossEntropyMulti()), GeneralizedCost(costfunc=CrossEntropyBinary()), GeneralizedCost(costfunc=CrossEntropyBinary())], weights=[1, 0., 0.]) # setup optimizer optimizer = GradientDescentMomentum(0.1, momentum_coef=0.9, stochastic_round=args.rounding) # initialize model object alphas = [1, 0.25, 0.25] mlp = Model(layers=Tree([p1, p2, p3], alphas=alphas)) # setup standard fit callbacks callbacks = Callbacks(mlp, train_set, eval_set=valid_set, **args.callback_args) # run fit mlp.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) logging.getLogger('neon').info("Misclassification error = %.1f%%", (mlp.eval(valid_set, metric=Misclassification())*100)) print('Misclassification error = %.1f%%' % (mlp.eval(valid_set, metric=Misclassification())*100))
weights=[1, 1, 1]) # setup optimizer schedule_w = StepSchedule(step_config=[5], change=[0.001 / 10]) schedule_b = StepSchedule(step_config=[5], change=[0.002 / 10]) opt_w = GradientDescentMomentum(0.001, 0.9, wdecay=0.0005, schedule=schedule_w) opt_b = GradientDescentMomentum(0.002, 0.9, wdecay=0.0005, schedule=schedule_b) opt_skip = GradientDescentMomentum(0.0, 0.0) optimizer = MultiOptimizer({'default': opt_w, 'Bias': opt_b, 'skip': opt_skip, 'skip_bias': opt_skip}) # if training a new model, seed the image model conv layers with pre-trained weights # otherwise, just load the model file if args.model_file is None: util.load_vgg_all_weights(model, args.data_dir) callbacks = Callbacks(model, eval_set=train_set, **args.callback_args) callbacks.add_callback(TrainMulticostCallback()) # model.benchmark(train_set, optimizer=optimizer, cost=cost) model.fit(train_set, optimizer=optimizer, cost=cost, num_epochs=args.epochs, callbacks=callbacks) # Scale the bbox regression branch linear layer weights before saving the model model = util.scale_bbreg_weights(model, [0.0, 0.0, 0.0, 0.0], [0.1, 0.1, 0.2, 0.2], train_set.num_classes) if args.save_path is not None: save_obj(model.serialize(keep_states=True), args.save_path)
depth=4, n_extra_layers=4, batch_norm=True, dis_iters=5, wgan_param_clamp=0.01, wgan_train_sched=True) # setup optimizer optimizer = RMSProp(learning_rate=5e-5, decay_rate=0.99, epsilon=1e-8) # setup data provider train = make_loader(args.manifest['train'], args.manifest_root, model.be, args.subset_pct, random_seed) # configure callbacks callbacks = Callbacks(model, **args.callback_args) fdir = ensure_dirs_exist( os.path.join(os.path.dirname(os.path.realpath(__file__)), 'results/')) fname = os.path.splitext(os.path.basename(__file__))[0] +\ '_[' + datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + ']' im_args = dict(filename=os.path.join(fdir, fname), hw=64, num_samples=args.batch_size, nchan=3, sym_range=True) callbacks.add_callback(GANPlotCallback(**im_args)) callbacks.add_callback(GANCostCallback()) # model fit model.fit(train, optimizer=optimizer,
# Set up the model layers layers = [] nchan = 128 layers.append(Conv((2, 2, nchan), strides=2, **common)) for idx in range(16): layers.append(Conv((3, 3, nchan), **common)) if nchan > 16: nchan /= 2 for idx in range(15): layers.append(Deconv((3, 3, nchan), **common)) layers.append(Deconv((4, 4, nchan), strides=2, **common)) layers.append(Deconv((3, 3, 1), init=init, activation=Logistic(shortcut=True))) cost = GeneralizedCost(costfunc=SumSquared()) mlp = Model(layers=layers) callbacks = Callbacks(mlp, train, **args.callback_args) evaluator = Evaluator(callbacks.callback_data, mlp, test, imwidth, args.epochs, args.data_dir, point_num) callbacks.add_callback(evaluator) mlp.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) train.exit_batch_provider() preds = evaluator.get_outputs() paths = np.genfromtxt(os.path.join(args.test_data_dir, 'val_file.csv'), dtype=str)[1:] basenames = [os.path.basename(path) for path in paths] filenames = [path.split(',')[0] for path in basenames] filenames.sort() content = [] for i, filename in enumerate(filenames):
Conv((1, 1, 192), **conv), Conv((1, 1, 16), **conv), Pooling(8, op="avg"), Activation(Softmax())] def stop_func(s, v): if s is None: return (v, False) return (min(v, s), v > s) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file mlp.load_params(args.model_file) # configure callbacks callbacks = Callbacks(mlp, eval_set=valid_set, **args.callback_args) callbacks.add_early_stop_callback(stop_func) if args.deconv: callbacks.add_deconv_callback(train_set, valid_set) mlp.fit(train_set, optimizer=opt_gdm, num_epochs=50, cost=cost, callbacks=callbacks) print('Misclassification error = %.1f%%' % (mlp.eval(valid_set, metric=Misclassification())*100))
def train_mlp(): """ Train data and save scaling and network weights and biases to file to be used by forward prop phase on test data """ parser = NeonArgparser(__doc__) args = parser.parse_args() logger = logging.getLogger() logger.setLevel(args.log_thresh) # hyperparameters num_epochs = args.epochs #preprocessor std_scale = preprocessing.StandardScaler(with_mean=True,with_std=True) #std_scale = feature_scaler(type='Standardizer',with_mean=True,with_std=True) #number of non one-hot encoded features, including ground truth num_feat = 4 # load up the mnist data set # split into train and tests sets #load data from csv-files and rescale #training traindf = pd.DataFrame.from_csv('data/train.csv') ncols = traindf.shape[1] #tmpmat=std_scale.fit_transform(traindf.as_matrix()) #print std_scale.scale_ #print std_scale.mean_ tmpmat = traindf.as_matrix() #print tmpmat[:,1:num_feat] tmpmat[:,:num_feat] = std_scale.fit_transform(tmpmat[:,:num_feat]) X_train = tmpmat[:,1:] y_train = np.reshape(tmpmat[:,0],(tmpmat[:,0].shape[0],1)) #validation validdf = pd.DataFrame.from_csv('data/validate.csv') ncols = validdf.shape[1] tmpmat = validdf.as_matrix() tmpmat[:,:num_feat] = std_scale.transform(tmpmat[:,:num_feat]) X_valid = tmpmat[:,1:] y_valid = np.reshape(tmpmat[:,0],(tmpmat[:,0].shape[0],1)) #test testdf = pd.DataFrame.from_csv('data/test.csv') ncols = testdf.shape[1] tmpmat = testdf.as_matrix() tmpmat[:,:num_feat] = std_scale.transform(tmpmat[:,:num_feat]) X_test = tmpmat[:,1:] y_test = np.reshape(tmpmat[:,0],(tmpmat[:,0].shape[0],1)) # setup a training set iterator train_set = CustomDataIterator(X_train, lshape=(X_train.shape[1]), y_c=y_train) # setup a validation data set iterator valid_set = CustomDataIterator(X_valid, lshape=(X_valid.shape[1]), y_c=y_valid) # setup a validation data set iterator test_set = CustomDataIterator(X_test, lshape=(X_test.shape[1]), y_c=y_test) # setup weight initialization function init_norm = Xavier() # setup model layers layers = [Affine(nout=X_train.shape[1], init=init_norm, activation=Rectlin()), Dropout(keep=0.5), Affine(nout=X_train.shape[1]/2, init=init_norm, activation=Rectlin()), Linear(nout=1, init=init_norm)] # setup cost function as CrossEntropy cost = GeneralizedCost(costfunc=SmoothL1Loss()) # setup optimizer #schedule #schedule = ExpSchedule(decay=0.3) #optimizer = GradientDescentMomentum(0.0001, momentum_coef=0.9, stochastic_round=args.rounding, schedule=schedule) optimizer = Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1.e-8) # initialize model object mlp = Model(layers=layers) # configure callbacks if args.callback_args['eval_freq'] is None: args.callback_args['eval_freq'] = 1 # configure callbacks callbacks = Callbacks(mlp, eval_set=valid_set, **args.callback_args) callbacks.add_early_stop_callback(stop_func) callbacks.add_save_best_state_callback(os.path.join(args.data_dir, "early_stop-best_state.pkl")) # run fit mlp.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) #evaluate model print('Evaluation Error = %.4f'%(mlp.eval(valid_set, metric=SmoothL1Metric()))) print('Test set error = %.4f'%(mlp.eval(test_set, metric=SmoothL1Metric()))) # Saving the model print 'Saving model parameters!' mlp.save_params("model/homeapp_model.prm") # Reloading saved model # This should go in run.py mlp=Model("model/homeapp_model.prm") print('Test set error = %.4f'%(mlp.eval(test_set, metric=SmoothL1Metric()))) # save the preprocessor vectors: np.savez("model/homeapp_preproc", mean=std_scale.mean_, std=std_scale.scale_) return 1
Pooling(3, strides=2)] # Structure of the deep residual part of the network: # args.depth modules of 2 convolutional layers each at feature map depths # of 64, 128, 256, 512 nfms = list(itt.chain.from_iterable( [itt.repeat(2**(x + 6), r) for x, r in enumerate(stages)])) strides = [-1] + [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])] for nfm, stride in zip(nfms, strides): layers.append(module_factory(nfm, stride)) layers.append(Pooling('all', op='avg')) layers.append(Conv(**conv_params(1, train.nclass, relu=False))) layers.append(Activation(Softmax())) model = Model(layers=layers) weight_sched = Schedule([30, 60], 0.1) opt = GradientDescentMomentum(0.1, 0.9, wdecay=0.0001, schedule=weight_sched) # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, eval_set=test, metric=valmetric, **args.callback_args) callbacks.add_callback(BatchNormTuneCallback(tune), insert_pos=0) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
(X_train, y_train), (X_test, y_test), nclass = load_cifar10(path=args.data_dir) train = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(3, 32, 32)) test = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(3, 32, 32)) init_uni = Uniform(low=-0.1, high=0.1) opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9) # set up the model layers layers = [ Affine(nout=200, init=init_uni, activation=Rectlin()), Affine(nout=10, init=init_uni, activation=Logistic(shortcut=True)) ] cost = GeneralizedCost(costfunc=CrossEntropyBinary()) mlp = Model(layers=layers) # configure callbacks callbacks = Callbacks(mlp, eval_set=test, **args.callback_args) mlp.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks) print('Misclassification error = %.1f%%' % (mlp.eval(test, metric=Misclassification()) * 100))
print 'Training specialist: ', i path = EXPERIMENT_DIR + confusion_matrix_name + '_' + clustering_name + '_' + str(num_clusters) + 'clusters/' + 'specialist' + '_' + str(i) + '.prm' # Create datasets X_spec, y_spec, spec_out = filter_dataset(X_train, y_train, cluster) X_spec_test, y_spec_test, spec_out = filter_dataset( X_test, y_test, cluster) spec_out = nout spec_set = DataIterator( X_spec, y_spec, nclass=spec_out, lshape=(3, 32, 32)) spec_test = DataIterator( X_spec_test, y_spec_test, nclass=spec_out, lshape=(3, 32, 32)) # Train the specialist specialist, opt, cost = spec_net(nout=spec_out, archive_path=gene_path) callbacks = Callbacks(specialist, spec_set, args, eval_set=spec_test) callbacks.add_early_stop_callback(early_stop) callbacks.add_save_best_state_callback(path) specialist.fit(spec_set, optimizer=opt, num_epochs=specialist.epoch_index + num_epochs, cost=cost, callbacks=callbacks) # Print results print 'Specialist Train misclassification error: ', specialist.eval(spec_set, metric=Misclassification()) print 'Specialist Test misclassification error: ', specialist.eval(spec_test, metric=Misclassification()) print 'Generalist Train misclassification error: ', generalist.eval(spec_set, metric=Misclassification()) print 'Generalist Test misclassification error: ', generalist.eval(spec_test, metric=Misclassification()) # specialists.append(specialist) save_obj(specialist.serialize(), path) except: path = confusion_matrix_name + '_' + clustering_name + '_' + str(num_clusters) + 'clusters/' print 'Failed for ', path
# hyperparameters num_epochs = args.epochs (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28)) valid_set = ArrayIterator([X_test, X_test], y_test, nclass=nclass, lshape=(1, 28, 28)) # weight initialization init_norm = Gaussian(loc=0.0, scale=0.01) # initialize model path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), Affine(nout=100, init=init_norm, activation=Rectlin())]) path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()), Affine(nout=100, init=init_norm, activation=Rectlin())]) layers = [MergeMultistream(layers=[path1, path2], merge="stack"), Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))] model = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) # fit and validate optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) model.fit(train_set, cost=cost, optimizer=optimizer, num_epochs=num_epochs, callbacks=callbacks)
output_size = 8 N = 120 # number of memory locations M = 8 # size of a memory location # model initialization layers = [ GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic()), Affine(train_set.nout, init, bias=init, activation=Logistic()) ] cost = GeneralizedCostMask(costfunc=CrossEntropyBinary()) model = Model(layers=layers) optimizer = RMSProp(gradient_clip_value=gradient_clip_value, stochastic_round=args.rounding) # configure callbacks callbacks = Callbacks(model, **args.callback_args) # we can use the training set as the validation set, # since the data is tickerally generated callbacks.add_watch_ticker_callback(train_set) # train model model.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
random_seed = args.rng_seed if args.rng_seed else 0 # load up the mnist data set, padding images to size 32 dataset = MNIST(path=args.data_dir, sym_range=True, size=32, shuffle=True) train = dataset.train_iter # create a GAN model, cost = create_model(dis_model=args.dmodel, gen_model=args.gmodel, cost_type='wasserstein', noise_type='normal', im_size=32, n_chan=1, n_noise=128, n_gen_ftr=args.n_gen_ftr, n_dis_ftr=args.n_dis_ftr, depth=4, n_extra_layers=4, batch_norm=True, dis_iters=5, wgan_param_clamp=0.01, wgan_train_sched=True) # setup optimizer optimizer = RMSProp(learning_rate=2e-4, decay_rate=0.99, epsilon=1e-8) # configure callbacks callbacks = Callbacks(model, **args.callback_args) fdir = ensure_dirs_exist(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'results/')) fname = os.path.splitext(os.path.basename(__file__))[0] +\ '_[' + datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + ']' im_args = dict(filename=os.path.join(fdir, fname), hw=32, num_samples=args.batch_size, nchan=1, sym_range=True) callbacks.add_callback(GANPlotCallback(**im_args)) callbacks.add_callback(GANCostCallback()) # model fit model.fit(train, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
# setup optimizer optimizer = {'momentum': [0], 'step_config': 1, 'learning_rate': 0.1, 'weight_decay': 0} # initialize model object rbm = RBM(layers=layers) if args.model_file: assert os.path.exists(args.model_file), '%s not found' % args.model_file logger.info('loading initial model state from %s' % args.model_file) rbm.load_weights(args.model_file) # setup standard fit callbacks callbacks = Callbacks(rbm, train_set, output_file=args.output_file, progress_bar=args.progress_bar) # add a callback ot calculate if args.serialize > 0: # add callback for saving checkpoint file # every args.serialize epchs checkpoint_schedule = args.serialize checkpoint_model_path = args.save_path callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path) rbm.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, callbacks=callbacks) for mb_idx, (x_val, y_val) in enumerate(valid_set): hidden = rbm.fprop(x_val) break
layers.append(Conv((3, 3, 384), init=init_uni, activation=relu, strides=1, padding=1)) layers.append(Conv((1, 1, 384), init=init_uni, activation=relu, strides=1)) layers.append(Conv((3, 3, 384), init=init_uni, activation=relu, strides=2, padding=1)) # 12->6 layers.append(Dropout(keep=0.5)) layers.append(Conv((3, 3, 1024), init=init_uni, activation=relu, strides=1, padding=1)) layers.append(Conv((1, 1, 1024), init=init_uni, activation=relu, strides=1)) layers.append(Conv((1, 1, 1000), init=init_uni, activation=relu, strides=1)) layers.append(Pooling(6, op='avg')) layers.append(Activation(Softmax())) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file mlp.load_weights(args.model_file) # configure callbacks callbacks = Callbacks(mlp, train, eval_set=test, **args.callback_args) if args.deconv: callbacks.add_deconv_callback(train, test) mlp.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks) test.exit_batch_provider() train.exit_batch_provider()
opt_gdm = GradientDescentMomentum(args.rate_init[0], args.momentum[0], wdecay=args.weight_decay, schedule=weight_sched, stochastic_round=args.rounding) opt_biases = GradientDescentMomentum(args.rate_init[1], args.momentum[1], schedule=weight_sched, stochastic_round=args.rounding) opt_fixed = GradientDescentMomentum(0.0, 1.0, wdecay=0.0) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases, 'DOG': opt_fixed}) # configure cost and test metrics cost = GeneralizedCost(costfunc=(CrossEntropyBinary() \ if train.parser.independent_labels else CrossEntropyMulti())) metric = EMMetric(oshape=test.parser.oshape, use_softmax=not train.parser.independent_labels) if test else None # configure callbacks if not args.neon_progress: args.callback_args['progress_bar'] = False callbacks = Callbacks(model, eval_set=test, metric=metric, **args.callback_args) if not args.neon_progress: callbacks.add_callback(EMEpochCallback(args.callback_args['eval_freq'],train.nmacrobatches),insert_pos=None) # xxx - thought of making this an option but not clear that it slows anything down? #callbacks.add_hist_callback() # xxx - not clear what information this conveys if args.save_best_path: callbacks.add_save_best_state_callback(args.save_best_path) model.fit(train, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks) print('Model training complete for %d epochs!' % (args.epochs,)) #test.stop(); train.stop() elif args.write_output: # write_output mode, must have model loaded if args.data_config: