def train(args, hyper_params, model, opt, data_set): # setup cost function as CrossEntropy cost = GeneralizedCost(costfunc=CrossEntropyMulti()) callbacks = Callbacks(model, **args.callback_args) callbacks.add_callback(EpochEndCallback()) data_set.set_mode('train') model.fit(data_set, optimizer=opt, num_epochs=hyper_params.num_epochs, cost=cost, callbacks=callbacks) return
random_seed = args.rng_seed if args.rng_seed else 0 # load up the mnist data set, padding images to size 32 dataset = MNIST(path=args.data_dir, sym_range=True, size=32, shuffle=True) train = dataset.train_iter # create a GAN model, cost = create_model(dis_model=args.dmodel, gen_model=args.gmodel, cost_type='wasserstein', noise_type='normal', im_size=32, n_chan=1, n_noise=128, n_gen_ftr=args.n_gen_ftr, n_dis_ftr=args.n_dis_ftr, depth=4, n_extra_layers=4, batch_norm=True, dis_iters=5, wgan_param_clamp=0.01, wgan_train_sched=True) # setup optimizer optimizer = RMSProp(learning_rate=2e-4, decay_rate=0.99, epsilon=1e-8) # configure callbacks callbacks = Callbacks(model, **args.callback_args) fdir = ensure_dirs_exist(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'results/')) fname = os.path.splitext(os.path.basename(__file__))[0] +\ '_[' + datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + ']' im_args = dict(filename=os.path.join(fdir, fname), hw=32, num_samples=args.batch_size, nchan=1, sym_range=True) callbacks.add_callback(GANPlotCallback(**im_args)) callbacks.add_callback(GANCostCallback()) # model fit model.fit(train, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
model = Model(layers=SSD(ssd_config=train_config['ssd_config'], dataset=train_set)) cost = MBoxLoss(num_classes=train_set.num_classes) if args.model_file is None: load_vgg_weights(model, cache_dir) else: model.load_params(args.model_file) if args.lr_step is None: args.lr_step = [40, 80, 120] base_lr = 0.0001 * be.bsz * args.lr_scale schedule = Schedule(args.lr_step, 0.1) opt_w = GradientDescentMomentum(base_lr, momentum_coef=0.9, wdecay=0.0005, schedule=schedule) opt_b = GradientDescentMomentum(base_lr, momentum_coef=0.9, schedule=schedule) opt = MultiOptimizer({'default': opt_w, 'Bias': opt_b}) # hijack the eval callback arg here eval_freq = args.callback_args.pop('eval_freq') callbacks = Callbacks(model, **args.callback_args) callbacks.add_callback(MAP_Callback(eval_set=val_set, epoch_freq=eval_freq)) if args.image_sample_dir is not None: callbacks.add_callback(ssd_image_callback(eval_set=val_set, image_dir=args.image_sample_dir, epoch_freq=eval_freq, num_images=args.num_images, classes=val_config['class_names'])) model.fit(train_set, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
#final model mlp = Model(layers=layers) logger.info("model construction complete...") """ model training and classification accurate rate """ #model training and results callbacks = Callbacks(mlp,train, args, eval_set=valid,metric=Misclassification()) #add lost and metric call backs facilitate more diagnostic callbacks.add_callback(MetricCallback(mlp,eval_set=train,metric=Misclassification(),epoch_freq=args.evaluation_freq)) callbacks.add_callback(MetricCallback(mlp,eval_set=valid,metric=Misclassification(),epoch_freq=args.evaluation_freq)) #run the model mlp.fit(train, optimizer=opt_gdm, num_epochs=args.epochs, cost=cost, callbacks=callbacks) #final classification accuracy t_mis_rate=mlp.eval(train, metric=Misclassification())*100 v_mis_rate=mlp.eval(valid, metric=Misclassification())*100 #test_mis_rate=mlp.eval(test, metric=Misclassification())*100 print ('Train Misclassification error = %.1f%%' %t_mis_rate) print ('Valid Miscladdifcaiton error = %.1f%%' %v_mis_rate) #print ('Test Miscladdifcaiton error = %.1f%%' %test_mis_rate)
def main(): # setup the model and run for num_epochs saving the last state only # this is at the top so that the be is generated model = gen_model(args.backend) # setup data iterators (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir) NN = batch_size*5 # avoid partial mini batches if args.backend == 'nervanacpu' or args.backend == 'cpu': # limit data since cpu backend runs slower train = ArrayIterator(X_train[:NN], y_train[:NN], nclass=nclass, lshape=(1, 28, 28)) valid = ArrayIterator(X_test[:NN], y_test[:NN], nclass=nclass, lshape=(1, 28, 28)) else: train = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(1, 28, 28)) valid = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(1, 28, 28)) # serialization related cost = GeneralizedCost(costfunc=CrossEntropyBinary()) opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) checkpoint_model_path = os.path.join('./', 'test_oneshot.pkl') checkpoint_schedule = 1 # save at every step callbacks = Callbacks(model) callbacks.add_callback(SerializeModelCallback(checkpoint_model_path, checkpoint_schedule, history=2)) # run the fit all the way through saving a checkpoint e model.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) # setup model with same random seed run epoch by epoch # serializing and deserializing at each step model = gen_model(args.backend) cost = GeneralizedCost(costfunc=CrossEntropyBinary()) opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9) # reset data iterators train.reset() valid.reset() checkpoint_model_path = os.path.join('./', 'test_manyshot.pkl') checkpoint_schedule = 1 # save at evey step for epoch in range(num_epochs): # _0 points to state at end of epoch 0 callbacks = Callbacks(model) callbacks.add_callback(SerializeModelCallback(checkpoint_model_path, checkpoint_schedule, history=num_epochs)) model.fit(train, optimizer=opt_gdm, num_epochs=epoch+1, cost=cost, callbacks=callbacks) # load saved file prts = os.path.splitext(checkpoint_model_path) fn = prts[0] + '_%d' % epoch + prts[1] model.load_params(fn) # load the saved weights # compare test_oneshot_<num_epochs>.pkl to test_manyshot_<num_epochs>.pkl if not compare_model_pickles('test_oneshot_%d.pkl' % (num_epochs-1), 'test_manyshot_%d.pkl' % (num_epochs-1)): print 'No Match' sys.exit(1) else: print 'Match'
model, cost = create_network(args.depth) # setup data provider train = make_train_loader(args.manifest['train'], args.manifest_root, model.be, args.subset_pct, random_seed) test = make_validation_loader(args.manifest['val'], args.manifest_root, model.be, args.subset_pct) # tune batch norm parameters on subset of train set with no augmentations tune_set = make_tuning_loader(args.manifest['train'], args.manifest_root, model.be) # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, eval_set=test, metric=valmetric, **args.callback_args) callbacks.add_callback(BatchNormTuneCallback(tune_set), insert_pos=0) # begin training opt = GradientDescentMomentum(0.1, 0.9, wdecay=0.0001, schedule=Schedule([82, 124], 0.1)) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
opt_biases = GradientDescentMomentum(args.rate_init[1], args.momentum[1], schedule=weight_sched, stochastic_round=args.rounding) opt_fixed = GradientDescentMomentum(0.0, 1.0, wdecay=0.0) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases, 'DOG': opt_fixed}) # configure cost and test metrics cost = GeneralizedCost(costfunc=(CrossEntropyBinary() \ if train.parser.independent_labels else CrossEntropyMulti())) metric = EMMetric(oshape=test.parser.oshape, use_softmax=not train.parser.independent_labels) if test else None # configure callbacks if not args.neon_progress: args.callback_args['progress_bar'] = False callbacks = Callbacks(model, eval_set=test, metric=metric, **args.callback_args) if not args.neon_progress: callbacks.add_callback(EMEpochCallback(args.callback_args['eval_freq'],train.nmacrobatches),insert_pos=None) # xxx - thought of making this an option but not clear that it slows anything down? #callbacks.add_hist_callback() # xxx - not clear what information this conveys if args.save_best_path: callbacks.add_save_best_state_callback(args.save_best_path) model.fit(train, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks) print('Model training complete for %d epochs!' % (args.epochs,)) #test.stop(); train.stop() elif args.write_output: # write_output mode, must have model loaded if args.data_config: test = EMDataIterator(args.data_config, write_output=args.write_output, chunk_skip_list=args.chunk_skip_list, dim_ordering=args.dim_ordering,
weights=[1, 1, 1]) # setup optimizer schedule_w = StepSchedule(step_config=[5], change=[0.001 / 10]) schedule_b = StepSchedule(step_config=[5], change=[0.002 / 10]) opt_w = GradientDescentMomentum(0.001, 0.9, wdecay=0.0005, schedule=schedule_w) opt_b = GradientDescentMomentum(0.002, 0.9, wdecay=0.0005, schedule=schedule_b) opt_skip = GradientDescentMomentum(0.0, 0.0) optimizer = MultiOptimizer({'default': opt_w, 'Bias': opt_b, 'skip': opt_skip, 'skip_bias': opt_skip}) # if training a new model, seed the image model conv layers with pre-trained weights # otherwise, just load the model file if args.model_file is None: util.load_vgg_all_weights(model, args.data_dir) callbacks = Callbacks(model, eval_set=train_set, **args.callback_args) callbacks.add_callback(TrainMulticostCallback()) # model.benchmark(train_set, optimizer=optimizer, cost=cost) model.fit(train_set, optimizer=optimizer, cost=cost, num_epochs=args.epochs, callbacks=callbacks) # Scale the bbox regression branch linear layer weights before saving the model model = util.scale_bbreg_weights(model, [0.0, 0.0, 0.0, 0.0], [0.1, 0.1, 0.2, 0.2], train_set.num_classes) if args.save_path is not None: save_obj(model.serialize(keep_states=True), args.save_path)
def benchmark(self): for d in self.devices: b = d if (self.backends is None) or ( "mkl" not in self.backends) else "mkl" print("Use {} as backend.".format(b)) # Set up backend # backend: 'cpu' for single cpu, 'mkl' for cpu using mkl library, and 'gpu' for gpu be = gen_backend(backend=b, batch_size=self.batch_size, rng_seed=542, datatype=np.float32) # Make iterators neon_train_set = ArrayIterator(X=np.asarray( [t.flatten().astype('float32') / 255 for t in self.x_train]), y=np.asarray(self.y_train), make_onehot=True, nclass=self.class_num, lshape=(3, self.resize_size[0], self.resize_size[1])) neon_valid_set = ArrayIterator(X=np.asarray( [t.flatten().astype('float32') / 255 for t in self.x_valid]), y=np.asarray(self.y_valid), make_onehot=True, nclass=self.class_num, lshape=(3, self.resize_size[0], self.resize_size[1])) neon_test_set = ArrayIterator(X=np.asarray([ t.flatten().astype('float32') / 255 for t in self.testImages ]), y=np.asarray(self.testLabels), make_onehot=True, nclass=self.class_num, lshape=(3, self.resize_size[0], self.resize_size[1])) # Initialize model object self.neon_model = SelfModel(layers=self.constructCNN()) # Costs neon_cost = GeneralizedCost(costfunc=CrossEntropyMulti()) # Model summary self.neon_model.initialize(neon_train_set, neon_cost) print(self.neon_model) # Learning rules neon_optimizer = SGD(0.01, momentum_coef=0.9, schedule=ExpSchedule(0.2)) # neon_optimizer = RMSProp(learning_rate=0.0001, decay_rate=0.95) # # Benchmark for 20 minibatches # d[b] = self.neon_model.benchmark(neon_train_set, cost=neon_cost, optimizer=neon_optimizer) # Reset model # self.neon_model = None # self.neon_model = Model(layers=layers) # self.neon_model.initialize(neon_train_set, neon_cost) # Callbacks: validate on validation set callbacks = Callbacks( self.neon_model, eval_set=neon_valid_set, metric=Misclassification(3), output_file= "{}saved_data/{}/{}/callback_data_neon_{}_{}_{}by{}_{}.h5". format(self.root, self.network_type, d, b, self.dataset, self.resize_size[0], self.resize_size[1], self.preprocessing)) callbacks.add_callback( SelfCallback(eval_set=neon_valid_set, test_set=neon_test_set, epoch_freq=1)) # Fit start = time.time() self.neon_model.fit(neon_train_set, optimizer=neon_optimizer, num_epochs=self.epoch_num, cost=neon_cost, callbacks=callbacks) print("Neon training finishes in {:.2f} seconds.".format( time.time() - start)) # Result # results = self.neon_model.get_outputs(neon_valid_set) # Print error on validation set start = time.time() neon_error_mis = self.neon_model.eval( neon_valid_set, metric=Misclassification()) * 100 print( 'Misclassification error = {:.1f}%. Finished in {:.2f} seconds.' .format(neon_error_mis[0], time.time() - start)) # start = time.time() # neon_error_top3 = self.neon_model.eval(neon_valid_set, metric=TopKMisclassification(3))*100 # print('Top 3 Misclassification error = {:.1f}%. Finished in {:.2f} seconds.'.format(neon_error_top3[2], time.time() - start)) # start = time.time() # neon_error_top5 = self.neon_model.eval(neon_valid_set, metric=TopKMisclassification(5))*100 # print('Top 5 Misclassification error = {:.1f}%. Finished in {:.2f} seconds.'.format(neon_error_top5[2], time.time() - start)) self.neon_model.save_params( "{}saved_models/{}/{}/neon_weights_{}_{}_{}by{}_{}.prm".format( self.root, self.network_type, d, b, self.dataset, self.resize_size[0], self.resize_size[1], self.preprocessing)) # Print error on test set start = time.time() neon_error_mis_t = self.neon_model.eval( neon_test_set, metric=Misclassification()) * 100 print( 'Misclassification error = {:.1f}% on test set. Finished in {:.2f} seconds.' .format(neon_error_mis_t[0], time.time() - start)) # start = time.time() # neon_error_top3_t = self.neon_model.eval(neon_test_set, metric=TopKMisclassification(3))*100 # print('Top 3 Misclassification error = {:.1f}% on test set. Finished in {:.2f} seconds.'.format(neon_error_top3_t[2], time.time() - start)) # start = time.time() # neon_error_top5_t = self.neon_model.eval(neon_test_set, metric=TopKMisclassification(5))*100 # print('Top 5 Misclassification error = {:.1f}% on test set. Finished in {:.2f} seconds.'.format(neon_error_top5_t[2], time.time() - start)) cleanup_backend() self.neon_model = None
# setup cost function as CrossEntropy cost = GeneralizedGANCost(costfunc=GANCost(func="modified")) # setup optimizer optimizer = Adam(learning_rate=0.0005, beta_1=0.5) # initialize model noise_dim = (2, 7, 7) gan = GAN(layers=layers, noise_dim=noise_dim, k=args.kbatch) # configure callbacks callbacks = Callbacks(gan, eval_set=valid_set, **args.callback_args) fdir = ensure_dirs_exist( os.path.join(os.path.dirname(os.path.realpath(__file__)), 'results/')) fname = os.path.splitext(os.path.basename(__file__))[0] +\ '_[' + datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + ']' im_args = dict(filename=os.path.join(fdir, fname), hw=27, num_samples=args.batch_size, nchan=1, sym_range=True) callbacks.add_callback(GANPlotCallback(**im_args)) callbacks.add_callback(GANCostCallback()) # run fit gan.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
init=glorot, activation=Rectlinclip(), batch_norm=True, reset_cells=True, depth=depth), Affine(hidden_size, init=glorot, activation=Rectlinclip()), Affine(nout=nout, init=glorot, activation=Identity()) ] model = Model(layers=layers) opt = GradientDescentMomentum(learning_rate, momentum, gradient_clip_norm=gradient_clip_norm, stochastic_round=False, nesterov=True) callbacks = Callbacks(model, eval_set=dev, **args.callback_args) # Print validation set word error rate at the end of every epoch pcb = WordErrorRateCallback(dev, argmax_decoder, max_tscrpt_len, epoch_freq=1) callbacks.add_callback(pcb) cost = GeneralizedCost(costfunc=CTC(max_tscrpt_len, nout=nout)) # Fit the model model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
Conv((1, 1, 16), **conv), Pooling(8, op="avg"), Activation(Softmax())] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) mlp = Model(layers=layers) if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file mlp.load_weights(args.model_file) # configure callbacks callbacks = Callbacks(mlp, train_set, eval_set=valid_set, **args.callback_args) if args.deconv: callbacks.add_deconv_callback(train_set, valid_set) callbacks.add_callback( MetricCallback( valid_set, Misclassification() ) ) mlp.fit(train_set, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) import logging logger = logging.getLogger(__name__) logger.critical('Misclassification error = %.1f%%' % (mlp.eval(valid_set, metric=Misclassification())*100))
layers.append(Conv((2, 2, nchan), strides=2, **common)) for idx in range(16): layers.append(Conv((3, 3, nchan), **common)) if nchan > 16: nchan /= 2 for idx in range(15): layers.append(Deconv((3, 3, nchan), **common)) layers.append(Deconv((4, 4, nchan), strides=2, **common)) layers.append(Deconv((3, 3, 1), init=init, activation=Logistic(shortcut=True))) cost = GeneralizedCost(costfunc=SumSquared()) mlp = Model(layers=layers) callbacks = Callbacks(mlp, train, **args.callback_args) evaluator = Evaluator(callbacks.callback_data, mlp, test, imwidth, args.epochs, args.data_dir, point_num) callbacks.add_callback(evaluator) mlp.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) train.exit_batch_provider() preds = evaluator.get_outputs() paths = np.genfromtxt(os.path.join(args.test_data_dir, 'val_file.csv'), dtype=str)[1:] basenames = [os.path.basename(path) for path in paths] filenames = [path.split(',')[0] for path in basenames] filenames.sort() content = [] for i, filename in enumerate(filenames):
cost = GeneralizedCost(costfunc=(CrossEntropyBinary() \ if train.parser.independent_labels else CrossEntropyMulti())) metric = EMMetric( oshape=test.parser.oshape, use_softmax=not train.parser.independent_labels) if test else None # configure callbacks if not args.neon_progress: args.callback_args['progress_bar'] = False callbacks = Callbacks(model, eval_set=test, metric=metric, **args.callback_args) if not args.neon_progress: callbacks.add_callback(EMEpochCallback( args.callback_args['eval_freq'], train.nmacrobatches), insert_pos=None) # xxx - thought of making this an option but not clear that it slows anything down? #callbacks.add_hist_callback() # xxx - not clear what information this conveys if args.save_best_path: callbacks.add_save_best_state_callback(args.save_best_path) model.fit(train, optimizer=opt, num_epochs=num_epochs, cost=cost, callbacks=callbacks) print('Model training complete for %d epochs!' % (args.epochs, )) #test.stop(); train.stop() elif args.write_output:
Pooling(3, strides=2)] # Structure of the deep residual part of the network: # args.depth modules of 2 convolutional layers each at feature map depths # of 64, 128, 256, 512 nfms = list(itt.chain.from_iterable( [itt.repeat(2**(x + 6), r) for x, r in enumerate(stages)])) strides = [-1] + [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])] for nfm, stride in zip(nfms, strides): layers.append(module_factory(nfm, stride)) layers.append(Pooling('all', op='avg')) layers.append(Conv(**conv_params(1, train.nclass, relu=False))) layers.append(Activation(Softmax())) model = Model(layers=layers) weight_sched = Schedule([30, 60], 0.1) opt = GradientDescentMomentum(0.1, 0.9, wdecay=0.0001, schedule=weight_sched) # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, eval_set=test, metric=valmetric, **args.callback_args) callbacks.add_callback(BatchNormTuneCallback(tune), insert_pos=0) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
] model = Model(layers=layers) # define optimizer opt_w = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, wdecay=0.0005) opt_b = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9) opt = MultiOptimizer({'default': opt_w, 'Bias': opt_b}, name='multiopt') # configure callbacks callbacks = Callbacks(model, eval_set=valid_set, metric=Misclassification(), **args.callback_args) callbacks.add_callback( TrainByStageCallback(model, valid_set, Misclassification(), max_patience=10)) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) logger.info('Training ...') model.fit(train_set, optimizer=opt, num_epochs=250, cost=cost, callbacks=callbacks) print('Accuracy = %.2f%%' % (100. - model.eval(valid_set, metric=Misclassification()) * 100))
if args.model_file: import os assert os.path.exists(args.model_file), '%s not found' % args.model_file mlp.load_weights(args.model_file) # configure callbacks callbacks = Callbacks(mlp, train, output_file=args.output_file) if args.validation_freq: class TopKMetrics(Callback): def __init__(self, valid_set, epoch_freq=args.validation_freq): super(TopKMetrics, self).__init__(epoch_freq=epoch_freq) self.valid_set = valid_set def on_epoch_end(self, epoch): self.valid_set.reset() allmetrics = TopKMisclassification(k=5) stats = mlp.eval(self.valid_set, metric=allmetrics) print ", ".join(allmetrics.metric_names) + ": " + ", ".join(map(str, stats.flatten())) callbacks.add_callback(TopKMetrics(test)) if args.save_path: checkpoint_schedule = range(1, args.epochs) callbacks.add_serialize_callback(checkpoint_schedule, args.save_path, history=2) mlp.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) test.exit_batch_provider() train.exit_batch_provider()
def train(self, content, targets, test_content=None, test_targets=None, has_features=False, serialize=0, save_path=None, learning_rate=0.001, epochs=5): """ :param content: numerical content returned from gen_training_set :param targets: numerical targets returned from gen_training_set :param test_content: separate test set to be used for evaluation :param test_targets: "" :param test_features: "" :param features: features arte a list of float lists that can also be in string form, but will be converted to arrays of floats that must be of the same length as the features specified when creating the classifier. :param receiver_address: deprecated in favor of features, but left in for testing :param serialize: :param save_path: :param model_file: :param holdout_pct: :param learning_rate: :param epochs: :return: """ multicost = self.exclusive_classes is not None and self.overlapping_classes is not None if multicost: metric = MultiMetric(Misclassification(), 0) elif self.overlapping_classes is None: metric = Misclassification() else: metric = AverageLogLoss() print('Training neural networks on {} samples for {} epochs.'.format( len(targets[0]), epochs)) if not test_content is None and not test_targets is None: valid = BatchIterator(test_content, targets=test_targets, steps=[1] if has_features else [1, 1]) else: valid = None train = BatchIterator(content, targets=targets, steps=[1] if has_features else [1, 1]) callbacks = Callbacks(self.neuralnet, train_set=train, multicost=multicost, metric=metric, eval_freq=None if valid is None else 1, eval_set=valid) #save_path=save_path, serialize=serialize) if serialize: callbacks.add_save_best_state_callback(save_path) if not valid is None: if self.exclusive_classes is not None: print('Starting misclassification error = {:.03}%'.format( self.neuralnet.eval(valid, metric)[0] * 100)) callbacks.add_callback(MisclassificationTest(valid, metric)) else: print('Starting average logloss = {:.04}'.format( self.neuralnet.eval(valid, metric)[0])) callbacks.add_callback(LogLossTest(valid, metric)) if hasattr(self.optimizer, 'learning_rate'): self.optimizer.learning_rate = learning_rate print('learning rate = {}'.format(learning_rate)) self.fit(train, self.optimizer, epochs, callbacks)
layers.append(Conv((2, 2, nchan), strides=2, **common)) for idx in range(16): layers.append(Conv((3, 3, nchan), **common)) if nchan > 16: nchan /= 2 for idx in range(15): layers.append(Deconv((3, 3, nchan), **common)) layers.append(Deconv((4, 4, nchan), strides=2, **common)) layers.append(Deconv((3, 3, 1), init=init, activation=Logistic(shortcut=True))) cost = GeneralizedCost(costfunc=SumSquared()) mlp = Model(layers=layers) callbacks = Callbacks(mlp, train, **args.callback_args) evaluator = Evaluator(callbacks.callback_data, mlp, test, imwidth, args.epochs, args.data_dir, point_num) callbacks.add_callback(evaluator) mlp.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) train.exit_batch_provider() preds = evaluator.get_outputs() paths = np.genfromtxt(os.path.join(args.test_data_dir, 'val_file.csv'), dtype=str)[1:] basenames = [os.path.basename(path) for path in paths] filenames = [path.split(',')[0] for path in basenames] filenames.sort() content = [] for i, filename in enumerate(filenames): item = { "annotations": [
opt_b = GradientDescentMomentum(0.002, 0.9, wdecay=0.0005, schedule=schedule_b) opt_skip = GradientDescentMomentum(0.0, 0.0) optimizer = MultiOptimizer({ 'default': opt_w, 'Bias': opt_b, 'skip': opt_skip, 'skip_bias': opt_skip }) # if training a new model, seed the image model conv layers with pre-trained weights # otherwise, just load the model file if args.model_file is None: util.load_vgg_all_weights(model, cache_dir) callbacks = Callbacks(model, eval_set=train_set, **args.callback_args) callbacks.add_callback(TrainMulticostCallback()) model.fit(train_set, optimizer=optimizer, cost=cost, num_epochs=args.epochs, callbacks=callbacks) # Scale the bbox regression branch linear layer weights before saving the model model = util.scale_bbreg_weights(model, [0.0, 0.0, 0.0, 0.0], [0.1, 0.1, 0.2, 0.2], train_set.num_classes) if args.save_path is not None: save_obj(model.serialize(keep_states=True), args.save_path)