def __init__(self, num_actions, args): # remember parameters self.num_actions = num_actions self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.history_length = args.history_length self.screen_dim = (args.screen_height, args.screen_width) self.clip_error = args.clip_error self.min_reward = args.min_reward self.max_reward = args.max_reward self.batch_norm = args.batch_norm # create Neon backend self.be = gen_backend(backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, datatype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.history_length,) + self.screen_dim + (self.batch_size,) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model layers = self._createLayers(num_actions) self.model = Model(layers = layers) self.cost = GeneralizedCost(costfunc = SumSquared()) # Bug fix for l in self.model.layers.layers: l.parallelism = 'Disabled' self.model.initialize(self.input_shape[:-1], self.cost) if args.optimizer == 'rmsprop': self.optimizer = RMSProp(learning_rate = args.learning_rate, decay_rate = args.decay_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adam': self.optimizer = Adam(learning_rate = args.learning_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adadelta': self.optimizer = Adadelta(decay = args.decay_rate, stochastic_round = args.stochastic_round) else: assert false, "Unknown optimizer" # create target model self.train_iterations = 0 if args.target_steps: self.target_model = Model(layers = self._createLayers(num_actions)) # Bug fix for l in self.target_model.layers.layers: l.parallelism = 'Disabled' self.target_model.initialize(self.input_shape[:-1]) self.save_weights_prefix = args.save_weights_prefix else: self.target_model = self.model self.callback = None
def __init__(self, overlapping_classes=None, exclusive_classes=None, analytics_input=True, network_type='conv_net', num_words=60, width=100, lookup_size=0, lookup_dim=0, optimizer=Adam()): assert (overlapping_classes is not None) or (exclusive_classes is not None) self.width = width self.num_words = num_words self.overlapping_classes = overlapping_classes self.exclusive_classes = exclusive_classes self.analytics_input = analytics_input self.recurrent = network_type == 'lstm' self.lookup_size = lookup_size self.lookup_dim = lookup_dim init = GlorotUniform() activation = Rectlin(slope=1E-05) gate = Logistic() input_layers = self.input_layers(analytics_input, init, activation, gate) if self.overlapping_classes is None: output_layers = [ Affine(len(self.exclusive_classes), init, activation=Softmax()) ] elif self.exclusive_classes is None: output_layers = [ Affine(len(self.overlapping_classes), init, activation=Logistic()) ] else: output_branch = BranchNode(name='exclusive_overlapping') output_layers = Tree([[ SkipNode(), output_branch, Affine(len(self.exclusive_classes), init, activation=Softmax()) ], [ output_branch, Affine(len(self.overlapping_classes), init, activation=Logistic()) ]]) layers = [ input_layers, # this is where inputs meet, and where we may want to add depth or # additional functionality Dropout(keep=0.8), output_layers ] super(ClassifierNetwork, self).__init__(layers, optimizer=optimizer)
def prepare_model(ninputs=9600, nclass=5): """ Set up and compile the model architecture (Logistic regression) """ layers = [Affine(nout=nclass, init=Gaussian(loc=0.0, scale=0.01), activation=Softmax())] cost = GeneralizedCost(costfunc=CrossEntropyMulti()) opt = Adam() lrmodel = Model(layers=layers) return lrmodel, opt, cost
def test_multi_optimizer(backend_default): opt_gdm = GradientDescentMomentum(learning_rate=0.001, momentum_coef=0.9, wdecay=0.005) opt_ada = Adadelta() opt_adam = Adam() opt_rms = RMSProp() opt_rms_1 = RMSProp(gradient_clip_value=5) init_one = Gaussian(scale=0.01) l1 = Conv((11, 11, 64), strides=4, padding=3, init=init_one, bias=Constant(0), activation=Rectlin()) l2 = Affine(nout=4096, init=init_one, bias=Constant(1), activation=Rectlin()) l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh()) l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh()) layers = [l1, l2, l3, l4] layer_list = [] for layer in layers: if isinstance(layer, list): layer_list.extend(layer) else: layer_list.append(layer) opt = MultiOptimizer({ 'default': opt_gdm, 'Bias': opt_ada, 'Convolution': opt_adam, 'Linear': opt_rms, 'LSTM': opt_rms_1, 'GRU': opt_rms_1 }) map_list = opt._map_optimizers(layer_list) assert map_list[opt_adam][0].__class__.__name__ == 'Convolution' assert map_list[opt_ada][0].__class__.__name__ == 'Bias' assert map_list[opt_rms][0].__class__.__name__ == 'Linear' assert map_list[opt_gdm][0].__class__.__name__ == 'Activation' assert map_list[opt_rms_1][0].__class__.__name__ == 'LSTM' assert map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
def __init__(self, state_size, num_steers, num_speeds, args): # remember parameters self.state_size = state_size self.num_steers = num_steers self.num_speeds = num_speeds self.num_actions = num_steers + num_speeds self.num_layers = args.hidden_layers self.hidden_nodes = args.hidden_nodes self.batch_size = args.batch_size self.discount_rate = args.discount_rate self.clip_error = args.clip_error # create Neon backend self.be = gen_backend(backend = args.backend, batch_size = args.batch_size, rng_seed = args.random_seed, device_id = args.device_id, datatype = np.dtype(args.datatype).type, stochastic_round = args.stochastic_round) # prepare tensors once and reuse them self.input_shape = (self.state_size, self.batch_size) self.input = self.be.empty(self.input_shape) self.targets = self.be.empty((self.num_actions, self.batch_size)) # create model self.model = Model(layers = self._createLayers()) self.cost = GeneralizedCost(costfunc = SumSquared()) self.model.initialize(self.input_shape[:-1], self.cost) if args.optimizer == 'rmsprop': self.optimizer = RMSProp(learning_rate = args.learning_rate, decay_rate = args.decay_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adam': self.optimizer = Adam(learning_rate = args.learning_rate, stochastic_round = args.stochastic_round) elif args.optimizer == 'adadelta': self.optimizer = Adadelta(decay = args.decay_rate, stochastic_round = args.stochastic_round) else: assert false, "Unknown optimizer" # create target model self.target_steps = args.target_steps self.train_iterations = 0 if self.target_steps: self.target_model = Model(layers = self._createLayers()) self.target_model.initialize(self.input_shape[:-1]) self.save_weights_prefix = args.save_weights_prefix else: self.target_model = self.model
def fit_model(train_set, val_set, num_epochs=50): relu = Rectlin() conv_params = { 'strides': 1, 'padding': 1, 'init': Xavier(local=True), # Xavier sqrt(3)/num_inputs [CHECK THIS] 'bias': Constant(0), 'activation': relu } layers = [] layers.append(Conv((3, 3, 128), **conv_params)) # 3x3 kernel * 128 nodes layers.append(Pooling(2)) layers.append(Conv((3, 3, 128), **conv_params)) layers.append(Pooling(2)) # Highest value from 2x2 window. layers.append(Conv((3, 3, 128), **conv_params)) layers.append( Dropout(keep=0.5) ) # Flattens Convolution into a flat array, with probability 0.5 sets activation values to 0 layers.append( Affine(nout=128, init=GlorotUniform(), bias=Constant(0), activation=relu) ) # 1 value per conv kernel - Linear Combination of layers layers.append(Dropout(keep=0.5)) layers.append( Affine(nout=2, init=GlorotUniform(), bias=Constant(0), activation=Softmax(), name="class_layer")) # initialize model object cnn = Model(layers=layers) cost = GeneralizedCost(costfunc=CrossEntropyMulti()) optimizer = Adam() # callbacks = Callbacks(cnn) # out_fname = 'yarin_fdl_out_data.h5' callbacks = Callbacks(cnn, eval_set=val_set, eval_freq=1) # , output_file=out_fname cnn.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks) return cnn
def test_multi_optimizer(backend_default_mkl): """ A test for MultiOptimizer. """ opt_gdm = GradientDescentMomentum( learning_rate=0.001, momentum_coef=0.9, wdecay=0.005) opt_ada = Adadelta() opt_adam = Adam() opt_rms = RMSProp() opt_rms_1 = RMSProp(gradient_clip_value=5) init_one = Gaussian(scale=0.01) l1 = Conv((11, 11, 64), strides=4, padding=3, init=init_one, bias=Constant(0), activation=Rectlin()) l2 = Affine(nout=4096, init=init_one, bias=Constant(1), activation=Rectlin()) l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh()) l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh()) layers = [l1, l2, l3, l4] layer_list = [] for layer in layers: if isinstance(layer, list): layer_list.extend(layer) else: layer_list.append(layer) for l in layer_list: l.configure(in_obj=(16, 28, 28)) l.allocate() # separate layer_list into two, the last two recurrent layers and the rest layer_list1, layer_list2 = layer_list[:-2], layer_list[-2:] opt = MultiOptimizer({'default': opt_gdm, 'Bias': opt_ada, 'Convolution': opt_adam, 'Convolution_bias': opt_adam, 'Linear': opt_rms, 'LSTM': opt_rms_1, 'GRU': opt_rms_1}) layers_to_optimize1 = [l for l in layer_list1 if isinstance(l, ParameterLayer)] layers_to_optimize2 = [l for l in layer_list2 if isinstance(l, ParameterLayer)] opt.optimize(layers_to_optimize1, 0) assert opt.map_list[opt_adam][0].__class__.__name__ is 'Convolution_bias' assert opt.map_list[opt_rms][0].__class__.__name__ == 'Linear' opt.optimize(layers_to_optimize2, 0) assert opt.map_list[opt_rms_1][0].__class__.__name__ == 'LSTM' assert opt.map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
def _set_optimizer(self): """ Initializes the selected optimization algorithm. """ _logger.debug("Optimizer = %s" % str(self.args.optimizer)) if self.args.optimizer == 'rmsprop': self.optimizer = RMSProp( learning_rate = self.args.learning_rate, decay_rate = self.args.decay_rate, stochastic_round = self.args.stochastic_round) elif self.args.optimizer == 'adam': self.optimizer = Adam( learning_rate = self.args.learning_rate, stochastic_round = self.args.stochastic_round) elif self.args.optimizer == 'adadelta': self.optimizer = Adadelta( decay = self.args.decay_rate, stochastic_round = self.args.stochastic_round) else: assert false, "Unknown optimizer"
def __init__(self, args, max_action_no, batch_dimension): self.args = args self.train_batch_size = args.train_batch_size self.discount_factor = args.discount_factor self.use_gpu_replay_mem = args.use_gpu_replay_mem self.be = gen_backend(backend='gpu', batch_size=self.train_batch_size) self.input_shape = (batch_dimension[1], batch_dimension[2], batch_dimension[3], batch_dimension[0]) self.input = self.be.empty(self.input_shape) self.input.lshape = self.input_shape # HACK: needed for convolutional networks self.targets = self.be.empty((max_action_no, self.train_batch_size)) if self.use_gpu_replay_mem: self.history_buffer = self.be.zeros(batch_dimension, dtype=np.uint8) self.input_uint8 = self.be.empty(self.input_shape, dtype=np.uint8) else: self.history_buffer = np.zeros(batch_dimension, dtype=np.float32) self.train_net = Model(self.create_layers(max_action_no)) self.cost = GeneralizedCost(costfunc=SumSquared()) # Bug fix for l in self.train_net.layers.layers: l.parallelism = 'Disabled' self.train_net.initialize(self.input_shape[:-1], self.cost) self.target_net = Model(self.create_layers(max_action_no)) # Bug fix for l in self.target_net.layers.layers: l.parallelism = 'Disabled' self.target_net.initialize(self.input_shape[:-1]) if self.args.optimizer == 'Adam': # Adam self.optimizer = Adam(beta_1=args.rms_decay, beta_2=args.rms_decay, learning_rate=args.learning_rate) else: # Neon RMSProp self.optimizer = RMSProp(decay_rate=args.rms_decay, learning_rate=args.learning_rate) self.max_action_no = max_action_no self.running = True
def test_adam(backend_default): adam = Adam() param = np.random.rand(200, 128) param2 = copy.deepcopy(param) grad = 0.01 * np.random.rand(200, 128) grad2 = grad / 128. states = [0.01 * np.random.rand(200, 128), 0.01 * np.random.rand(200, 128)] states2 = [copy.deepcopy(states[0]), copy.deepcopy(states[1])] epoch = 1 t = 1 l = adam.learning_rate * np.sqrt(1. - adam.beta_2**t) / (1. - adam.beta_1**t) m, v = states2 m[:] = m * adam.beta_1 + (1. - adam.beta_1) * grad2 v[:] = v * adam.beta_2 + (1. - adam.beta_2) * grad2 * grad2 param2[:] -= l * m / (np.sqrt(v) + adam.epsilon) param_list = [((wrap(param), wrap(grad)), [wrap(states[0]), wrap(states[1])])] compare_tensors(adam, param_list, param2, tol=1e-7, epoch=epoch)
# create model and cost model, cost = create_model( dis_model='dc', gen_model='dc', cost_type='original' if args.original_cost else 'modified', im_size=64, n_chan=3, n_noise=100, n_gen_ftr=64, n_dis_ftr=64, n_extra_layers=0, batch_norm=True) # setup optimizer optimizer = Adam(learning_rate=2e-4, beta_1=0.5) # setup data provider train = make_loader(args.manifest['train'], args.manifest_root, model.be, args.subset_pct, random_seed) # configure callbacks callbacks = Callbacks(model, **args.callback_args) fdir = ensure_dirs_exist( os.path.join(os.path.dirname(os.path.realpath(__file__)), 'results/')) fname = os.path.splitext(os.path.basename(__file__))[0] +\ '_[' + datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + ']' im_args = dict(filename=os.path.join(fdir, fname), hw=64, num_samples=args.batch_size, nchan=3,
if options.rng_seed: options.rng_seed = int(time()) np.random.seed(options.rng_seed) # don't display errors writing to subset copies of DataFrames pd.options.mode.chained_assignment = None # for now, we don't trust the mkl backend if options.backend == 'mkl': print('Resetting mkl backend to cpu') options.backend = 'cpu' Config.options = options optimizer = Adam(learning_rate=options.learning_rate) overlapping_classes = options.overlapping_classes.strip(' \"\'').split() if options.sentiment_path is None else None exclusive_classes = options.exclusive_classes.strip(' \"\'').split() \ if options.sentiment_path is None else ['positive', 'negative'] if options.sentiment_path: classifier = TextClassifier(options.word_vectors, options.model_file, optimizer=optimizer, num_analytics_features=0, num_subject_words=0, num_body_words=60, overlapping_classes=overlapping_classes, exclusive_classes=exclusive_classes, network_type=options.network_type) # we will supercede the email classification function to test the content classification network only print('loading sentiment data from {}'.format(options.sentiment_path)) sdata = SentimentLoader(classifier, options.sentiment_path) if options.shuffle_test:
layers.append(Pooling(2, strides=2)) layers.append(Conv((3, 3, 256), **conv_params)) layers.append(Conv((3, 3, 256), **conv_params)) layers.append(Conv((3, 3, 256), **conv_params)) layers.append(Pooling(2, strides=2)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Pooling(2, strides=2)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Conv((3, 3, 512), **conv_params)) layers.append(Conv((3, 3, 512), **conv_params)) # not used after this layer model = Model(layers=layers) # cost = GeneralizedCost(costfunc=CrossEntropyBinary()) cost = GeneralizedCost(costfunc=SumSquared()) # fit and validate optimizer = Adam(learning_rate=0.001) # configure callbacks # callbacks = Callbacks(model, eval_set=eval_set) callbacks = Callbacks(model, train_set=train) model.fit(train, cost=cost, optimizer=optimizer, num_epochs=10, callbacks=callbacks)
init=init, batch_norm=False, activation=Logistic(shortcut=False)) ] layers = GenerativeAdversarial(generator=Sequential(G_layers, name="Generator"), discriminator=Sequential(D_layers, name="Discriminator")) # setup cost function as CrossEntropy cost = GeneralizedCost( costfunc=GANCost(cost_type="dis", original_cost=args.original_cost)) # setup optimizer optimizer = Adam(learning_rate=0.0005, beta_1=0.5) # initialize model noise_dim = (2, 7, 7) gan = GAN(layers=layers, noise_dim=noise_dim, k=args.kbatch) # configure callbacks callbacks = Callbacks(gan, eval_set=valid_set, **args.callback_args) callbacks.add_callback(GANPlotCallback(filename=splitext(__file__)[0], hw=27)) # run fit gan.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
skip = SkipThought(vocab_size_layer, embed_dim, init_embed_dev, nhidden, rec_layer=GRU, init_rec=Orthonormal(), activ_rec=Tanh(), activ_rec_gate=Logistic(), init_ff=Uniform(low=-0.1, high=0.1), init_const=Constant(0.0)) model = Model(skip) if args.model_file and os.path.isfile(args.model_file): neon_logger.display("Loading saved weights from: {}".format(args.model_file)) model_dict = load_obj(args.model_file) model.deserialize(model_dict, load_states=True) elif args.model_file: neon_logger.display("Unable to find model file {}, restarting training.". format(args.model_file)) cost = Multicost(costs=[GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True)), GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True))], weights=[1, 1]) optimizer = Adam(gradient_clip_norm=gradient_clip_norm) # metric valmetric = None # configure callbacks if valid_split and valid_split > 0.0: callbacks = MetricCallback(eval_set=valid_set, metric=valmetric, epoch_freq=args.eval_freq) else: callbacks = Callbacks(model, metric=valmetric, **args.callback_args) # train model model.fit(train_set, optimizer=optimizer, num_epochs=args.epochs, cost=cost, callbacks=callbacks)
def __init__(self, vocab_path, model_path, optimizer=Adam(), overlapping_classes=None, exclusive_classes=None, class_threshold=0.6, num_analytics_features=4, num_subject_words=8, num_body_words=52, network_type='conv_net', preserve_case=False, lookup_size=0, lookup_dim=0, regex=r"(:\s?\)|:-\)|\(\s?:|\(-:|:\'\)|" r":\s?D|:-D|x-?D|X-?D|" r";\)|;-\)|\(-;|\(;|;D|;-D|" r"<3|:\*|;\*|" r":\(|:-\(|\):|\)-:|" r":'\(|:,\(|:\"\(|" r"\(\(\(|\)\)\)|X-\(|\)-X|:\s?@|:-@|@\s?:|@-:|>:\(|\):<|" + '\U0001F620|' + '\U0001F595|' + '\U0001F612|' + '\U0001F608|' + '\U0001F480|' + '\U0001F4A2|' + '\U0001F4A3|' + '\u2620|' + '\uFE0F|' + r"\w+|[^\w\s\n]+)", name=str(uuid.uuid4())): """ :param vocab_path: :param model_path: :param optimizer: :param overlapping_classes: :param exclusive_classes: :param class_threshold: :param num_analytics_features: :param num_subject_words: :param num_body_words: :param network_type: :param lookup_size: if zero, we expect to be able to load a vocabulary, otherwise, we will make a lookup table that grows as we train :param lookup_dim: dimensions of a lookup table if we have one :param name: """ assert (overlapping_classes is not None) or (exclusive_classes is not None) self.name = name self.num_subject_words = num_subject_words self.num_body_words = num_body_words self.num_words = num_subject_words + num_body_words self.class_threshold = class_threshold self.lookup_dim = lookup_dim self.lookup_size = lookup_size self.preserve_case = preserve_case self.regex = regex self.vocab = Vocabularies.load_vocabulary(vocab_path) self.vocab_path = vocab_path if self.vocab is None: if lookup_size > 0: self.wordvec_dimensions = 1 else: self.wordvec_dimensions = 0 else: for wv in self.vocab.values(): self.wordvec_dimensions = len(wv) break assert self.wordvec_dimensions > 0 self.recurrent = network_type == 'lstm' self.exclusive_classes = exclusive_classes self.overlapping_classes = overlapping_classes self.neuralnet_params = { 'overlapping_classes': overlapping_classes, 'exclusive_classes': exclusive_classes, 'optimizer': optimizer, 'network_type': network_type, 'analytics_input': False if num_analytics_features == 0 else True, 'num_words': self.num_words, 'width': self.wordvec_dimensions, 'lookup_size': lookup_size, 'lookup_dim': lookup_dim } self.model_path = model_path self.network_type = network_type self.num_features = num_analytics_features self.neuralnet = None self.optimizer = optimizer self.initialize_neural_network()
# Model construction story_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] query_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)] layers = [ MergeMultistream(layers=[story_path, query_path], merge="stack"), Affine(babi.vocab_size, init=GlorotUniform(), activation=Softmax()) ] model = Model(layers=layers) # setup callbacks callbacks = Callbacks(model, train_set, eval_set=valid_set, **args.callback_args) # train model model.fit(train_set, optimizer=Adam(), num_epochs=args.epochs, cost=GeneralizedCost(costfunc=CrossEntropyMulti()), callbacks=callbacks) # output accuracies print('Train Accuracy = %.1f%%' % (model.eval(train_set, metric=Accuracy()) * 100)) print('Test Accuracy = %.1f%%' % (model.eval(valid_set, metric=Accuracy()) * 100))
if (layer.name != 'Input Layer'): layer.load_weights(params, load_states=False) # Don't load the state, just load the weights del trained_resnet PRETRAINED = True print('Pre-trained weights loaded.') cost = GeneralizedCost(costfunc=CrossEntropyBinary()) modelFileName = 'LUNA16_resnetHDF_subset{}.prm'.format(SUBSET) # #If model file exists, then load the it and start from there. # if (os.path.isfile(modelFileName)): # lunaModel = Model(modelFileName) optHead = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999) if PRETRAINED: optPretrained = Adam(learning_rate=0.0003, beta_1=0.9, beta_2=0.999) # Set a slow learning rate for ResNet layers else: optPretrained = optHead mapping = {'default': optPretrained, # default optimizer applied to the pretrained sections 'Input Layer' : optHead, # The layer named 'Input Layer' 'Custom Head 1' : optHead, 'Custom Head 2' : optHead, 'Affine': optHead} # all layers from the Affine class # use multiple optimizers