def __init__(self): self.in_shape = [1024, (2538, 38)] init = Constant(0) image_path = Sequential( [Affine(20, init, bias=init), Affine(10, init, bias=init)]) sent_path = Sequential([Affine(30, init, bias=init), Affine(10, init)]) layers = [ MergeMultistream(layers=[image_path, sent_path], merge="recurrent"), Dropout(keep=0.5), LSTM(4, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(20, init, bias=init, activation=Softmax()) ] self.layers = layers self.cost = GeneralizedCostMask(CrossEntropyMulti()) self.model = Model(layers=layers) self.model.initialize(self.in_shape, cost=self.cost)
# weight initialization init = Uniform(low=-0.08, high=0.08) init2 = Constant(val=train_set.be.array(train_set.bias_init)) # model initialization image_path = Sequential([Affine(hidden_size, init, bias=Constant(val=0.0))]) sent_path = Sequential([Affine(hidden_size, init, linear_name='sent')]) layers = [ MergeMultistream(layers=[image_path, sent_path], merge="recurrent"), Dropout(keep=0.5), LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True), Affine(train_set.vocab_size, init, bias=init2, activation=Softmax()) ] cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)) # configure callbacks checkpoint_model_path = "~/image_caption2.pickle" if args.callback_args['save_path'] is None: args.callback_args['save_path'] = checkpoint_model_path if args.callback_args['serialize'] is None: args.callback_args['serialize'] = 1 model = Model(layers=layers) callbacks = Callbacks(model, train_set, **args.callback_args) opt = RMSProp(decay_rate=0.997, learning_rate=0.0005, epsilon=1e-8, gradient_clip_value=1)
train_set = Ticker(ticker_task) # weight initialization init = Uniform(low=-0.08, high=0.08) output_size = 8 N = 120 # number of memory locations M = 8 # size of a memory location # model initialization layers = [ GRU(hidden_size, init, activation=Tanh(), gate_activation=Logistic()), Affine(train_set.nout, init, bias=init, activation=Logistic()) ] cost = GeneralizedCostMask(costfunc=CrossEntropyBinary()) model = Model(layers=layers) optimizer = RMSProp(gradient_clip_value=gradient_clip_value, stochastic_round=args.rounding) # configure callbacks callbacks = Callbacks(model, **args.callback_args) # we can use the training set as the validation set, # since the data is tickerally generated callbacks.add_watch_ticker_callback(train_set) # train model model.fit(train_set,
# setup model model = create_frcn_model(frcn_fine_tune) # setup optimizer opt_w = GradientDescentMomentum( 0.001 * learning_rate_scale, 0.9, wdecay=0.0005) opt_b = GradientDescentMomentum(0.002 * learning_rate_scale, 0.9) optimizer = MultiOptimizer({'default': opt_w, 'Bias': opt_b}) # if training a new model, seed the image model conv layers with pre-trained weights # otherwise, just load the model file if args.model_file is None: load_vgg_weights(model, args.data_dir) cost = Multicost(costs=[GeneralizedCostMask(costfunc=CrossEntropyMulti()), GeneralizedCostMask(costfunc=SmoothL1Loss())], weights=[1, 1]) callbacks = Callbacks(model, eval_set=test_set, **args.callback_args) model.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks) # Fast R-CNN model requires scale the bbox regression branch linear layer weights # before saving the model model = scale_bbreg_weights( model, train_set.bbtarget_means, train_set.bbtarget_stds) save_obj(model.serialize(keep_states=True), args.save_path)
opt_b = GradientDescentMomentum(0.002 * learning_rate_scale, 0.9) optimizer = MultiOptimizer({'default': opt_w, 'Bias': opt_b}) # setup model model = Model(layers=Tree([frcn_layers, bb_layers])) # if training a new model, seed the Alexnet conv layers with pre-trained weights # otherwise, just load the model file if args.model_file is None: load_imagenet_weights(model, args.data_dir) cost = Multicost(costs=[ GeneralizedCost(costfunc=CrossEntropyMulti()), GeneralizedCostMask(costfunc=SmoothL1Loss()) ], weights=[1, 1]) callbacks = Callbacks(model, **args.callback_args) model.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks) print 'running eval on the training set...' metric_train = model.eval(train_set, metric=ObjectDetection()) print 'Train: label accuracy - {}%, object deteciton SmoothL1Loss - {}'.format( metric_train[0] * 100, metric_train[1])
max_len=args.max_len_w, index_from=index_from) skip = SkipThought(vocab_size_layer, embed_dim, init_embed_dev, nhidden, rec_layer=GRU, init_rec=Orthonormal(), activ_rec=Tanh(), activ_rec_gate=Logistic(), init_ff=Uniform(low=-0.1, high=0.1), init_const=Constant(0.0)) model = Model(skip) if args.model_file and os.path.isfile(args.model_file): neon_logger.display("Loading saved weights from: {}".format(args.model_file)) model_dict = load_obj(args.model_file) model.deserialize(model_dict, load_states=True) elif args.model_file: neon_logger.display("Unable to find model file {}, restarting training.". format(args.model_file)) cost = Multicost(costs=[GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)), GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True))], weights=[1, 1]) optimizer = Adam(gradient_clip_norm=gradient_clip_norm) # metric valmetric = None # configure callbacks if valid_split and valid_split > 0.0: callbacks = MetricCallback(eval_set=valid_set, metric=valmetric, epoch_freq=args.eval_freq) else: callbacks = Callbacks(model, metric=valmetric, **args.callback_args) # train model model.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
height=args.height, rois_per_img=rpn_rois_per_img, inference=False) config['subset_fraction'] = float(args.subset_pct / 100.0) train_set = faster_rcnn.build_dataloader(config, frcn_rois_per_img) # build the Faster-RCNN model model = faster_rcnn.build_model(train_set, frcn_rois_per_img, inference=False) # set up cost different branches, respectively weights = 1.0 / (rpn_rois_per_img) roi_w = 1.0 / (frcn_rois_per_img) frcn_tree_cost = Multicost(costs=[ GeneralizedCostMask(costfunc=CrossEntropyMulti(), weights=roi_w), GeneralizedCostMask(costfunc=SmoothL1Loss(), weights=roi_w) ], weights=[1, 1]) cost = Multicost(costs=[ GeneralizedCostMask(costfunc=CrossEntropyMulti(), weights=weights), GeneralizedCostMask(costfunc=SmoothL1Loss(sigma=3.0), weights=weights), frcn_tree_cost, ], weights=[1, 1, 1]) # setup optimizer schedule_w = StepSchedule(step_config=[10], change=[0.001 / 10]) schedule_b = StepSchedule(step_config=[10], change=[0.002 / 10])