bias=Constant(1), activation=relu), Conv((3, 3, 256), padding=1, init=init1b, bias=Constant(1), activation=relu), Pooling(3, strides=2), Affine(nout=4096, init=init1, bias=Constant(1), activation=relu), Dropout(keep=0.5), Affine(nout=4096, init=init1, bias=Constant(1), activation=relu), Dropout(keep=0.5), Affine(nout=1000, init=init1, bias=Constant(-7), activation=Softmax()) ] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases}) model = Model(layers=layers) # configure callbacks callbacks = Callbacks(model, train, args, eval_set=test, metric=TopKMisclassification(k=5)) try: model.fit(train, optimizer=opt,
gate_activation=Logistic(), reset_cells=True, name=name + "Dec")) decoder_connections.append(ii) decoder.append( Affine(train_set.nout, init, bias=init, activation=Softmax(), name="AffOut")) layers = Seq2Seq([encoder, decoder], decoder_connections=decoder_connections, name="Seq2Seq") cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)) model = Model(layers=layers) optimizer = RMSProp(gradient_clip_value=gradient_clip_value, stochastic_round=args.rounding) callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args) # train model model.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks) # Misclassification rate on validation set error_rate = model.eval(valid_set, metric=Misclassification(steps=time_steps)) neon_logger.display('Misclassification error = %.2f%%' % (error_rate * 100))
def main(): # parse the command line arguments parser = NeonArgparser(__doc__) args = parser.parse_args() logger = logging.getLogger() logger.setLevel(args.log_thresh) #Set up batch iterator for training images print "Setting up data batch loaders..." train = ImgMaster(repo_dir='dataTmp', set_name='train', inner_size=120, subset_pct=100) val = ImgMaster(repo_dir='dataTmp', set_name='train', inner_size=120, subset_pct=100, do_transforms=False) test = ImgMaster(repo_dir='dataTestTmp', set_name='train', inner_size=120, subset_pct=100, do_transforms=False) train.init_batch_provider() val.init_batch_provider() test.init_batch_provider() print "Constructing network..." #Create AlexNet architecture model = constuct_network() #model.load_weights(args.model_file) # drop weights LR by 1/250**(1/3) at epochs (23, 45, 66), drop bias LR by 1/10 at epoch 45 weight_sched = Schedule([22, 44, 65, 90, 97], (1 / 250.)**(1 / 3.)) opt_gdm = GradientDescentMomentum(0.01, 0.9, wdecay=0.005, schedule=weight_sched) opt_biases = GradientDescentMomentum(0.04, 1.0, schedule=Schedule([130], .1)) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases}) # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, train, eval_set=val, metric=valmetric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) #flag = input("Press Enter if you want to begin training process.") print "Training network..." model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) mets = model.eval(test, metric=valmetric) print 'Validation set metrics:' print 'LogLoss: %.2f, Accuracy: %.1f %%0 (Top-1), %.1f %% (Top-5)' % ( mets[0], (1.0 - mets[1]) * 100, (1.0 - mets[2]) * 100) test.exit_batch_provider() val.exit_batch_provider() train.exit_batch_provider()
opt_gdm = GradientDescentMomentum(learning_rate=0.01 / cost_scale, momentum_coef=0.9, stochastic_round=args.rounding) bn = True layers = [ Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling((2, 2)), Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=bn), Pooling((2, 2)), Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=bn), Affine(nout=10, init=init_uni, activation=Softmax()) ] if args.datatype in [np.float32, np.float64]: cost = GeneralizedCost(costfunc=CrossEntropyMulti()) elif args.datatype in [np.float16]: cost = GeneralizedCost(costfunc=CrossEntropyMulti(scale=cost_scale)) mlp = Model(layers=layers) # configure callbacks callbacks = Callbacks(mlp, eval_set=test, **args.callback_args) mlp.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks) print 'Misclassification error = %.1f%%' % (
height=args.height, rois_per_img=rpn_rois_per_img, inference=False) config['subset_fraction'] = float(args.subset_pct / 100.0) train_set = faster_rcnn.build_dataloader(config, frcn_rois_per_img) # build the Faster-RCNN model model = faster_rcnn.build_model(train_set, frcn_rois_per_img, inference=False) # set up cost different branches, respectively weights = 1.0 / (rpn_rois_per_img) roi_w = 1.0 / (frcn_rois_per_img) frcn_tree_cost = Multicost(costs=[ GeneralizedCostMask(costfunc=CrossEntropyMulti(), weights=roi_w), GeneralizedCostMask(costfunc=SmoothL1Loss(), weights=roi_w) ], weights=[1, 1]) cost = Multicost(costs=[ GeneralizedCostMask(costfunc=CrossEntropyMulti(), weights=weights), GeneralizedCostMask(costfunc=SmoothL1Loss(sigma=3.0), weights=weights), frcn_tree_cost, ], weights=[1, 1, 1]) # setup optimizer schedule_w = StepSchedule(step_config=[10], change=[0.001 / 10]) schedule_b = StepSchedule(step_config=[10], change=[0.002 / 10])
max_len=args.max_len_w, index_from=index_from) skip = SkipThought(vocab_size_layer, embed_dim, init_embed_dev, nhidden, rec_layer=GRU, init_rec=Orthonormal(), activ_rec=Tanh(), activ_rec_gate=Logistic(), init_ff=Uniform(low=-0.1, high=0.1), init_const=Constant(0.0)) model = Model(skip) if args.model_file and os.path.isfile(args.model_file): neon_logger.display("Loading saved weights from: {}".format(args.model_file)) model_dict = load_obj(args.model_file) model.deserialize(model_dict, load_states=True) elif args.model_file: neon_logger.display("Unable to find model file {}, restarting training.". format(args.model_file)) cost = Multicost(costs=[GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)), GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True))], weights=[1, 1]) optimizer = Adam(gradient_clip_norm=gradient_clip_norm) # metric valmetric = None # configure callbacks if valid_split and valid_split > 0.0: callbacks = MetricCallback(eval_set=valid_set, metric=valmetric, epoch_freq=args.eval_freq) else: callbacks = Callbacks(model, metric=valmetric, **args.callback_args) # train model model.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
# setup optimizer opt_w = GradientDescentMomentum(0.001 * learning_rate_scale, 0.9, wdecay=0.0005) opt_b = GradientDescentMomentum(0.002 * learning_rate_scale, 0.9) optimizer = MultiOptimizer({'default': opt_w, 'Bias': opt_b}) # setup model model = Model(layers=Tree([frcn_layers, bb_layers])) # if training a new model, seed the Alexnet conv layers with pre-trained weights # otherwise, just load the model file if args.model_file is None: load_imagenet_weights(model, args.data_dir) cost = Multicost(costs=[GeneralizedCost(costfunc=CrossEntropyMulti()), GeneralizedCostMask(costfunc=SmoothL1Loss())], weights=[1, 1]) callbacks = Callbacks(model, **args.callback_args) model.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks) print 'running eval on the training set...' metric_train = model.eval(train_set, metric=ObjectDetection()) print 'Train: label accuracy - {}%, object deteciton SmoothL1Loss - {}'.format( metric_train[0]*100, metric_train[1])
def benchmark(self): for d in self.devices: b = d if (self.backends is None) or ( "mkl" not in self.backends) else "mkl" print("Use {} as backend.".format(b)) # Common suffix suffix = "neon_{}_{}_{}by{}_{}".format(b, self.dataset, self.resize_size[0], self.resize_size[1], self.preprocessing) # Set up backend # backend: 'cpu' for single cpu, 'mkl' for cpu using mkl library, and 'gpu' for gpu be = gen_backend(backend=b, batch_size=self.batch_size, rng_seed=542, datatype=np.float32) # Prepare training/validation/testing sets neon_train_set = ArrayIterator(X=np.asarray( [t.flatten().astype('float32') / 255 for t in self.x_train]), y=np.asarray(self.y_train), make_onehot=True, nclass=self.class_num, lshape=(3, self.resize_size[0], self.resize_size[1])) neon_valid_set = ArrayIterator(X=np.asarray( [t.flatten().astype('float32') / 255 for t in self.x_valid]), y=np.asarray(self.y_valid), make_onehot=True, nclass=self.class_num, lshape=(3, self.resize_size[0], self.resize_size[1])) neon_test_set = ArrayIterator(X=np.asarray([ t.flatten().astype('float32') / 255 for t in self.testImages ]), y=np.asarray(self.testLabels), make_onehot=True, nclass=self.class_num, lshape=(3, self.resize_size[0], self.resize_size[1])) # Initialize model object self.neon_model = SelfModel(layers=self.constructCNN()) # Costs neon_cost = GeneralizedCost(costfunc=CrossEntropyMulti()) # Model summary self.neon_model.initialize(neon_train_set, neon_cost) print(self.neon_model) # Learning rules neon_optimizer = SGD(0.01, momentum_coef=0.9, schedule=ExpSchedule(0.2)) # neon_optimizer = RMSProp(learning_rate=0.0001, decay_rate=0.95) # # Benchmark for 20 minibatches # d[b] = self.neon_model.benchmark(neon_train_set, cost=neon_cost, optimizer=neon_optimizer) # Reset model # self.neon_model = None # self.neon_model = Model(layers=layers) # self.neon_model.initialize(neon_train_set, neon_cost) # Callbacks: validate on validation set callbacks = Callbacks( self.neon_model, eval_set=neon_valid_set, metric=Misclassification(3), output_file="./saved_data/{}/{}/callback_data_{}.h5".format( self.network_type, d, suffix)) callbacks.add_callback( SelfCallback(eval_set=neon_valid_set, test_set=neon_test_set, epoch_freq=1)) # Fit start = time.time() self.neon_model.fit(neon_train_set, optimizer=neon_optimizer, num_epochs=self.epoch_num, cost=neon_cost, callbacks=callbacks) print("Neon training finishes in {:.2f} seconds.".format( time.time() - start)) # Result # results = self.neon_model.get_outputs(neon_valid_set) # Print error on validation set start = time.time() neon_error_mis = self.neon_model.eval( neon_valid_set, metric=Misclassification()) * 100 print( 'Misclassification error = {:.1f}%. Finished in {:.2f} seconds.' .format(neon_error_mis[0], time.time() - start)) # start = time.time() # neon_error_top3 = self.neon_model.eval(neon_valid_set, metric=TopKMisclassification(3))*100 # print('Top 3 Misclassification error = {:.1f}%. Finished in {:.2f} seconds.'.format(neon_error_top3[2], time.time() - start)) # start = time.time() # neon_error_top5 = self.neon_model.eval(neon_valid_set, metric=TopKMisclassification(5))*100 # print('Top 5 Misclassification error = {:.1f}%. Finished in {:.2f} seconds.'.format(neon_error_top5[2], time.time() - start)) self.neon_model.save_params("./saved_models/{}/{}/{}.prm".format( self.network_type, d, suffix)) # Print error on test set start = time.time() neon_error_mis_t = self.neon_model.eval( neon_test_set, metric=Misclassification()) * 100 print( 'Misclassification error = {:.1f}% on test set. Finished in {:.2f} seconds.' .format(neon_error_mis_t[0], time.time() - start)) # start = time.time() # neon_error_top3_t = self.neon_model.eval(neon_test_set, metric=TopKMisclassification(3))*100 # print('Top 3 Misclassification error = {:.1f}% on test set. Finished in {:.2f} seconds.'.format(neon_error_top3_t[2], time.time() - start)) # start = time.time() # neon_error_top5_t = self.neon_model.eval(neon_test_set, metric=TopKMisclassification(5))*100 # print('Top 5 Misclassification error = {:.1f}% on test set. Finished in {:.2f} seconds.'.format(neon_error_top5_t[2], time.time() - start)) cleanup_backend() self.neon_model = None
# setting model layers for AE1 encoder1 = Affine(nout=config.encoder_size[0], init=init_norm, activation=Logistic(), name='encoder1') decoder1 = Affine(nout=image_size, init=init_norm, activation=Logistic(), name='decoder1') encoder2 = Affine(nout=config.encoder_size[1], init=init_norm, activation=Logistic(), name='encoder2') decoder2 = Affine(nout=config.encoder_size[0], init=init_norm, activation=Logistic(), name='decoder2') encoder3 = Affine(nout=config.encoder_size[2], init=init_norm, activation=Logistic(), name='encoder3') decoder3 = Affine(nout=config.encoder_size[1], init=init_norm, activation=Logistic(), name='decoder3') classifier = Affine(nout=config.ydim, init=init_norm, activation=Softmax()) cost_reconst = GeneralizedCost(costfunc=SumSquared()) cost_classification = GeneralizedCost(costfunc=CrossEntropyMulti()) # Setting model layers for AE1 AE1 = Model([encoder1, decoder1]) AE1.cost = cost_reconst AE1.initialize(data, cost_reconst) # AE1.optimizer = optimizer_default measure_time(data, AE1, config, 'AE1') # Setting model layers for AE2 # It has an extra encoder layer compared to what AE should really be. This is # done to avoid saving the outputs for each AE. AE2_mimic = Model([encoder1, encoder2, decoder2]) AE2_mimic.cost = cost_reconst AE2_mimic.initialize(data, cost_reconst) # Learning rates for extra layers that should not be updated are set to zero.
def test_cross_entropy_multi_derivative(backend_default): outputs = np.array([0.5, 1.0, 0.0, 0.0001]).reshape((4, 1)) targets = np.array(([0.5, 0.0, 1.0, 0.2])).reshape((4, 1)) expected_result = ((outputs - targets) / outputs.shape[1]) compare_tensors(CrossEntropyMulti(), outputs, targets, expected_result, deriv=True, tol=1e-6)
from neon.backends import gen_backend import bot_params as params import replay_memory as mem from enemydetector1 import model, predict params.batch_size = 64 be = gen_backend(backend='cpu', batch_size=params.batch_size) dataset = mem.load() opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9, stochastic_round=0) cost = GeneralizedCost(costfunc=CrossEntropyMulti(scale=10)) (X_train, y_train), (X_test, y_test) = dataset.get_dataset() print X_train.shape, y_train.shape, X_test.shape, y_test.shape train_set = ArrayIterator(X=X_train, y=y_train, nclass=dataset.nclass, lshape=dataset.shape, make_onehot=False) test = ArrayIterator(X=X_test, y=y_test, nclass=dataset.nclass, lshape=dataset.shape, make_onehot=False) callbacks = Callbacks(model, eval_set=test, eval_freq=1,) model.fit(train_set, optimizer=opt_gdm, num_epochs=2, cost=cost, callbacks=callbacks) model.save_params(params.weigths_path) def test_example(i): val = predict(X_train[i])
def create_network_lrn(): init1 = Gaussian(scale=0.01) init2 = Gaussian(scale=0.005) layers = [ Conv((11, 11, 96), padding=0, strides=4, init=init1, bias=Constant(0), activation=Rectlin(), name='conv1'), Pooling(3, strides=2, name='pool1'), LRN(5, ascale=0.0001, bpower=0.75, name='norm1'), Conv((5, 5, 256), padding=2, init=init1, bias=Constant(1.0), activation=Rectlin(), name='conv2'), Pooling(3, strides=2, name='pool2'), LRN(5, ascale=0.0001, bpower=0.75, name='norm2'), Conv((3, 3, 384), padding=1, init=init1, bias=Constant(0), activation=Rectlin(), name='conv3'), Conv((3, 3, 384), padding=1, init=init1, bias=Constant(1.0), activation=Rectlin(), name='conv4'), Conv((3, 3, 256), padding=1, init=init1, bias=Constant(1.0), activation=Rectlin(), name='conv5'), Pooling(3, strides=2, name='pool5'), Affine(nout=4096, init=init2, bias=Constant(1.0), activation=Rectlin(), name='fc6'), Dropout(keep=0.5, name='drop6'), Affine(nout=4096, init=init2, bias=Constant(1.0), activation=Rectlin(), name='fc7'), Dropout(keep=0.5, name='drop7'), Affine(nout=1000, init=init1, bias=Constant(0.0), activation=Softmax(), name='fc8') ] return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
def main(): # larger batch sizes may not fit on GPU parser = NeonArgparser(__doc__, default_overrides={'batch_size': 4}) parser.add_argument("--bench", action="store_true", help="run benchmark instead of training") parser.add_argument("--num_classes", type=int, default=12, help="number of classes in the annotation") parser.add_argument("--height", type=int, default=256, help="image height") parser.add_argument("--width", type=int, default=512, help="image width") args = parser.parse_args(gen_be=False) # check that image dimensions are powers of 2 if((args.height & (args.height - 1)) != 0): raise TypeError("Height must be a power of 2.") if((args.width & (args.width - 1)) != 0): raise TypeError("Width must be a power of 2.") (c, h, w) = (args.num_classes, args.height, args.width) # need to use the backend with the new upsampling layer implementation be = NervanaGPU_Upsample(rng_seed=args.rng_seed, device_id=args.device_id) # set batch size be.bsz = args.batch_size # couple backend to global neon object NervanaObject.be = be shape = dict(channel_count=3, height=h, width=w, subtract_mean=False) train_params = ImageParams(center=True, flip=False, scale_min=min(h, w), scale_max=min(h, w), aspect_ratio=0, **shape) test_params = ImageParams(center=True, flip=False, scale_min=min(h, w), scale_max=min(h, w), aspect_ratio=0, **shape) common = dict(target_size=h*w, target_conversion='read_contents', onehot=False, target_dtype=np.uint8, nclasses=args.num_classes) train_set = PixelWiseImageLoader(set_name='train', repo_dir=args.data_dir, media_params=train_params, shuffle=False, subset_percent=100, index_file=os.path.join(args.data_dir, 'train_images.csv'), **common) val_set = PixelWiseImageLoader(set_name='val', repo_dir=args.data_dir,media_params=test_params, index_file=os.path.join(args.data_dir, 'val_images.csv'), **common) # initialize model object layers = gen_model(c, h, w) segnet_model = Model(layers=layers) # configure callbacks callbacks = Callbacks(segnet_model, eval_set=val_set, **args.callback_args) opt_gdm = GradientDescentMomentum(1.0e-6, 0.9, wdecay=0.0005, schedule=Schedule()) opt_biases = GradientDescentMomentum(2.0e-6, 0.9, schedule=Schedule()) opt_bn = GradientDescentMomentum(1.0e-6, 0.9, schedule=Schedule()) opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_biases, 'BatchNorm': opt_bn}) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) if args.bench: segnet_model.initialize(train_set, cost=cost) segnet_model.benchmark(train_set, cost=cost, optimizer=opt) sys.exit(0) else: segnet_model.fit(train_set, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) # get the trained segnet model outputs for valisation set outs_val = segnet_model.get_outputs(val_set) with open('outputs.pkl', 'w') as fid: pickle.dump(outs_val, fid, -1)
schedule=weight_sched, stochastic_round=args.rounding) opt_biases = GradientDescentMomentum(args.rate_init[1], args.momentum[1], schedule=weight_sched, stochastic_round=args.rounding) opt_fixed = GradientDescentMomentum(0.0, 1.0, wdecay=0.0) opt = MultiOptimizer({ 'default': opt_gdm, 'Bias': opt_biases, 'DOG': opt_fixed }) # configure cost and test metrics cost = GeneralizedCost(costfunc=(CrossEntropyBinary() \ if train.parser.independent_labels else CrossEntropyMulti())) metric = EMMetric( oshape=test.parser.oshape, use_softmax=not train.parser.independent_labels) if test else None # configure callbacks if not args.neon_progress: args.callback_args['progress_bar'] = False callbacks = Callbacks(model, eval_set=test, metric=metric, **args.callback_args) if not args.neon_progress: callbacks.add_callback(EMEpochCallback( args.callback_args['eval_freq'], train.nmacrobatches), insert_pos=None)
def main(): # parse the command line arguments parser = NeonArgparser(__doc__) args = parser.parse_args() logger = logging.getLogger() logger.setLevel(args.log_thresh) #Set up batch iterator for training images train = ImgMaster(repo_dir='spectroDataTmp', set_name='train', inner_size=400, subset_pct=100) val = ImgMaster(repo_dir='spectroDataTmp', set_name='validation', inner_size=400, subset_pct=100, do_transforms=False) test = ImgMaster(repo_dir='spectroTestDataTmp', set_name='validation', inner_size=400, subset_pct=100, do_transforms=False) train.init_batch_provider() test.init_batch_provider() print "Constructing network..." model = constuct_network() model.load_weights(args.model_file) #Optimizer opt = Adadelta() # configure callbacks valmetric = TopKMisclassification(k=5) callbacks = Callbacks(model, train, eval_set=val, metric=valmetric, **args.callback_args) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) #flag = input("Press Enter if you want to begin training process.") print "Training network..." print args.epochs model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks) mets = model.eval(test, metric=valmetric) print 'Validation set metrics:' print 'LogLoss: %.2f, Accuracy: %.1f %%0 (Top-1), %.1f %% (Top-5)' % ( mets[0], (1.0 - mets[1]) * 100, (1.0 - mets[2]) * 100) test.exit_batch_provider() train.exit_batch_provider()