def __init__(self, corpus, words, **kwargs): self.batchsize = kwargs.get('batchsize', 128) self.epochs = kwargs.get('epochs', 100) self.kwargs = kwargs self.log = { ('training', 'loss'): [], ('training', 'perplexity'): [], ('training', 'throughput'): [], ('validation', 'loss'): [], ('validation', 'perplexity'): [], ('validation', 'throughput'): [] } self.memsize = kwargs.get('memsize', 100) self.model = Model(kwargs.get('edim', 150), kwargs.get('init_hid', 0.1), kwargs.get('init_std', 0.05), kwargs.get('lindim', 75), kwargs.get('memsize', 100), kwargs.get('nhop', 6), words) self.optimizer = optimizers.SGD(kwargs.get('sdt', 0.01)) self.vocabulary = Vocabulary(corpus, words) self.xp = np self.optimizer.setup(self.model) self.optimizer.add_hook( chainer.optimizer.GradientClipping(kwargs.get('maxgradnorm', 50)))
def check_LARS(self): w0 = self.target[0].param.data g0 = self.target[0].param.grad w1 = self.target[1].param.data g1 = self.target[1].param.grad xp = cuda.get_array_module(w0) threshold = 1e-2 weight_decay = 0.2 eps = 1e-9 p0_norm = xp.linalg.norm(w0) g0_norm = xp.linalg.norm(g0) clip_rate = p0_norm / (eps + g0_norm + weight_decay * p0_norm) expect0 = w0 - clip_rate * (g0 + weight_decay * w0) expect1 = w1 - 1.0 * (g1 + weight_decay * w1) opt = optimizers.SGD(lr=1) opt.setup(self.target) opt.add_hook( optimizer_hooks.GradientLARS(threshold=threshold, weight_decay=weight_decay, eps=eps)) opt.update() testing.assert_allclose(expect0, w0) testing.assert_allclose(expect1, w1)
def predictor(cnn_path,data,batch,gpu = 0): model = L.Classifier(vehicle_classify_CNN()) optimizer = optimizers.SGD() serializers.load_npz(os.path.join(cnn_path,"gradient_cnn.npz"), model) optimizer.setup(model) serializers.load_npz(os.path.join(cnn_path,"gradient_optimizer.npz"), optimizer) if gpu == 1: model.to_gpu() r = list(range(0, len(data), batch)) r.pop() # results = np.empty((0,1),int) # result = None for i in r: if gpu == 1:x = cuda.to_gpu(data[i:i+batch]) else:x = data[i:i+batch] result = F.softmax(model.predictor(x).data).data.argmax(axis=1) if gpu == 1:result = cuda.to_cpu(result) if i == 0: results = result else: results = np.concatenate((results, result), axis=0) if len(r) == 0:j=0 else:j = i + batch if gpu == 1:x = cuda.to_gpu(data[j:]) else:x = data[j:] result = F.softmax(model.predictor(x).data).data.argmax(axis=1) if gpu == 1: result = cuda.to_cpu(result) if len(r) == 0: results = result else: results = np.concatenate((results, result), axis=0) return results
def __init__(self, args=None): self.device_id = -1 self.optimizer = optimizers.SGD() self.nstep = 1 self.nstep_discount = 0.99 self.discount = 0.99 self.lambda_nstep = 1.0 self.qtype = QType.DQN self.create_model = lambda obs, act: ChainerMLP(obs, act, (64, 64)) if args: self.device_id = args.device_id self.discount = args.discount self.nstep = args.nstep self.nstep_discount = args.nstep_discount self.lambda_nstep = args.lambda_nstep self.optimizer = create_optimizer(args) self.create_model = lambda obs, act: create_model( args, obs, act) if args.qtype == "dqn": self.qtype = QType.DQN elif args.qtype == "ddqn": self.qtype = QType.DoubleDQN else: raise NotImplementedError
def setOptimizer(args, EncDecAtt): # optimizerを構築 if args.optimizer == 'SGD': optimizer = optimizers.SGD(lr=args.learning_rate) sys.stdout.write('# SET Learning %s: initial learning rate: %e\n' % (args.optimizer, optimizer.lr)) elif args.optimizer == 'Adam': # assert 0, "Currently Adam is not supported for asynchronous update" optimizer = optimizers.Adam(alpha=args.learning_rate) sys.stdout.write('# SET Learning %s: initial learning rate: %e\n' % (args.optimizer, optimizer.alpha)) elif args.optimizer == 'MomentumSGD': optimizer = optimizers.MomentumSGD(lr=args.learning_rate) sys.stdout.write('# SET Learning %s: initial learning rate: %e\n' % (args.optimizer, optimizer.lr)) elif args.optimizer == 'AdaDelta': optimizer = optimizers.AdaDelta(rho=args.learning_rate) sys.stdout.write('# SET Learning %s: initial learning rate: %e\n' % (args.optimizer, optimizer.rho)) else: assert 0, "ERROR" optimizer.setup(EncDecAtt.model) # ここでoptimizerにモデルを貼り付け if args.optimizer == 'Adam': optimizer.t = 1 # warning回避のちょっとしたhack 本来はするべきではない return optimizer
def get_opt(args): if args.opt_model == "SGD": alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0 return optimizers.SGD(lr=alpha0) if args.opt_model == "AdaGrad": alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0 return optimizers.AdaGrad(lr=alpha0) if args.opt_model == "AdaDelta": alpha0 = 0.95 if args.alpha0 == 0 else args.alpha0 alpha1 = 1e-06 if args.alpha1 == 0 else args.alpha1 return optimizers.AdaDelta(rho=alpha0, eps=alpha1) if args.opt_model == "Momentum": alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0 alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1 return optimizers.MomentumSGD(lr=alpha0, momentum=alpha1) if args.opt_model == "NAG": alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0 alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1 return optimizers.NesterovAG(lr=alpha0, momentum=alpha1) if args.opt_model == "RMS": return optimizers.RMSpropGraves() if args.opt_model == "SM": return optimizers.SMORMS3() if args.opt_model == "Adam": #default case alpha0 = 0.001 if args.alpha0 == 0 else args.alpha0 alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1 alpha2 = 0.999 if args.alpha2 == 0 else args.alpha2 alpha3 = 1e-08 if args.alpha3 == 0 else args.alpha3 return optimizers.Adam(alpha=alpha0, beta1=alpha1, beta2=alpha2, eps=alpha3) print('no such optimization method', args.opt_model) sys.exit(1)
def __init__(self, optimizer=None, vocab=None, n_input_units=1000, n_units=650, grad_clip=5, bproplen=35): if vocab is None: vocab = BatchTrainer.vocab self.vocab = vocab n_vocab = len(vocab) super(LSTM, self).__init__('LSTM') self.func = deel.model.lstm.RNNLM(n_input_units=n_input_units, n_vocab=n_vocab, n_units=n_units) self.func.compute_accuracy = False for param in self.func.params(): data = param.data data[:] = np.random.uniform(-0.1, 0.1, data.shape) if Deel.gpu >= 0: self.func.to_gpu() if optimizer is None: self.optimizer = optimizers.SGD(lr=1.) self.optimizer.setup(self.func) self.clip = chainer.optimizer.GradientClipping(grad_clip) self.optimizer.add_hook(self.clip) self.accum_loss = 0 self.cur_log_perp = Deel.xp.zeros(())
def train_task(args, train_name, model, epoch_num, train_dataset, test_dataset_dict, batch_size): optimizer = optimizers.SGD() optimizer.setup(model) train_iter = iterators.SerialIterator(train_dataset, batch_size) test_iter_dict = { name: iterators.SerialIterator(test_dataset, batch_size, repeat=False, shuffle=False) for name, test_dataset in test_dataset_dict.items() } updater = training.StandardUpdater(train_iter, optimizer) trainer = training.Trainer(updater, (epoch_num, 'epoch'), out=args.out) for name, test_iter in test_iter_dict.items(): trainer.extend(extensions.Evaluator(test_iter, model), name) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport( ['epoch', 'main/loss'] + [test + '/main/loss' for test in test_dataset_dict.keys()] + ['main/accuracy'] + [test + '/main/accuracy' for test in test_dataset_dict.keys()])) trainer.extend(extensions.ProgressBar()) trainer.extend( extensions.PlotReport( [test + "/main/accuracy" for test in test_dataset_dict.keys()], file_name=train_name + ".png")) trainer.run()
def init_model(vocab_size, char_type_size): model = FunctionSet( embed=F.EmbedID(vocab_size, embed_units), char_type_embed=F.EmbedID(char_type_size, char_type_embed_units), #dict_embed = F.Linear(12, dict_embed_units), hidden1=F.Linear( window * (embed_units + char_type_embed_units) * 3 + hidden_units, hidden_units), i_gate=F.Linear( window * (embed_units + char_type_embed_units) * 3 + hidden_units, hidden_units), f_gate=F.Linear( window * (embed_units + char_type_embed_units) * 3 + hidden_units, hidden_units), o_gate=F.Linear( window * (embed_units + char_type_embed_units) * 3 + hidden_units, hidden_units), output=F.Linear(hidden_units + 12, label_num), ) if opt_selection == 'Adagrad': opt = optimizers.AdaGrad(lr=learning_rate) elif opt_selection == 'SGD': opt = optimizers.SGD() elif opt_selection == 'Adam': opt = optimizers.Adam() else: opt = optimizers.AdaGrad(lr=learning_rate) print('Adagrad is chosen as defaut') opt.setup(model) return model, opt
def sample_4(): # create random input and output data x = Variable(X.copy()) y = Variable(Y.copy()) # create a network model = TwoLayerNet(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE) # create an optimizer optimizer = P.SGD(lr=LEARNING_RATE) # connect the optimizer with the network optimizer.setup(model) for t in range(EPOCHS): # forward pass: compute predicted y y_pred = model(x) # compute and print loss loss = F.mean_squared_error(y_pred, y) print(loss.data) # zero the gradients model.cleargrads() # backward loss.backward() # update weights optimizer.update()
def sample_5(): # make a iterator x = X.copy() y = Y.copy() dataset = D.TupleDataset(x, y) train_iter = Iter.SerialIterator(dataset, batch_size=DATA_SIZE, shuffle=False) # create a network model = TwoLayerNet(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE) loss_calculator = LossCalculator(model) # create an optimizer optimizer = P.SGD(lr=LEARNING_RATE) # connect the optimizer with the network optimizer.setup(loss_calculator) # make a updater updater = training.StandardUpdater(train_iter, optimizer) # make a trainer trainer = training.Trainer(updater, (EPOCHS, 'epoch'), out='result') trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'elapsed_time'])) trainer.run()
def setup_optimizer(self, optimizer_name, gradient_clipping=3, weight_decay=0.00001, **kwargs): # set optimizer if optimizer_name == "Adam": self.opt = optimizers.Adam(**kwargs) elif optimizer_name == "AdaDelta": self.opt = optimizers.AdaDelta(**kwargs) elif optimizer_name == "AdaGrad": self.opt = optimizers.AdaGrad(**kwargs) elif optimizer_name == "RMSprop": self.opt = optimizers.RMSprop(**kwargs) elif optimizer_name == "RMSpropGraves": self.opt = optimizers.RMSpropGraves(**kwargs) elif optimizer_name == "SGD": self.opt = optimizers.SGD(**kwargs) elif optimizer_name == "MomentumSGD": self.opt = optimizers.MomentumSGD(**kwargs) # self.opt.use_cleargrads() self.opt.setup(self) self.opt.add_hook(optimizer.GradientClipping(gradient_clipping)) self.opt.add_hook(optimizer.WeightDecay(weight_decay)) self.opt_params = { "optimizer_name": optimizer_name, "gradient_clipping": gradient_clipping, "weight_decay": weight_decay }
def __init__(self): self.joint_state = np.zeros((3), dtype=np.float32) # print self.joint_state self.action_num = 0 self.reward = 0.0 self.num_state = 0 self.num_step = 0 self.num_episode = 0 self.state = 0 self.next_state = 0 f = open( '/home/amsl/ros_catkin_ws/src/arm_q_learning/dqn_model/dqn_test8_dedede/dqn_arm_model_4500.dat', 'rb') self.model = pickle.load(f) if args.gpu >= 0: self.model.to_gpu() self.optimizer = optimizers.SGD() self.optimizer.setup(self.model) self.q_list = chainer.Variable(xp.zeros((1, 27), dtype=xp.float32)) self.action = 0 self.state = 0 self.next_state = 0 self.joint1 = self.init_state_joint1 self.joint3 = self.init_state_joint3 self.joint5 = self.init_state_joint5 self.next_joint1 = self.init_state_joint1 self.next_joint3 = self.init_state_joint3 self.next_joint5 = self.init_state_joint5
def __init__(self, n_pixels, g_hidden, d_hidden,d_learning_rate=0.01, g_learning_rate=0.05): g_input_size = n_pixels g_hidden_size = g_hidden g_output_size = n_pixels d_input_size = g_output_size d_hidden_size = d_hidden d_output_size = 1 self.D = DiscriminatorNetwork(d_hidden_size, d_output_size, n_input=d_input_size) self.G = GeneratorNetwork(g_hidden_size, g_output_size, n_input=g_input_size) self.d_optimizer = optimizers.SGD(lr=d_learning_rate) self.g_optimizer = optimizers.SGD(lr=g_learning_rate) self.d_optimizer.setup(self.D) self.g_optimizer.setup(self.G)
def __init__(self, nnpacker, logging=False): self.nnpacker = nnpacker model = FunctionSet(**nnpacker.getFunctions()) optimizer = optimizers.SGD() lossFunction = F.softmax_cross_entropy params = {'epoch': 20, 'batchsize': 100, 'logging': logging} NNmanager.__init__(self, model, optimizer, lossFunction, **params)
def init_net(self, config, randomize=True): """Prepare RNNLM model, defined in net.py""" if config["num_layers"] == 1: self.lm = net.RNNLM_1layer(len(self.vocab), config["hidden_size"], ratio=config["keep_prob"], train=True) elif config["num_layers"] == 2: self.lm = net.RNNLM_2layer(len(self.vocab), config["hidden_size"], ratio=config["keep_prob"], train=True) elif config["num_layers"] == 3: self.lm = net.RNNLM_3layer(len(self.vocab), config["hidden_size"], ratio=config["keep_prob"], train=True) else: raise KeyError("Num of layers could be only from 1 to 3") if self.hs: # or hierarchical softmax self.model = net.HSMmodel(len(self.vocab), config["hidden_size"], self.tree, ratio=config["keep_prob"], train=True) else: # softmax self.model = L.Classifier(self.lm) self.model.compute_accuracy = False # we only want the perplexity if randomize: for param in self.model.params(): param.data[:] = np.random.uniform(-config["init_scale"], config["init_scale"], param.data.shape) # Setup optimizer self.optimizer = optimizers.SGD(lr=config["learning_rate"]) # self.optimizer = optimizers.Adam() self.optimizer.setup(self.model) self.optimizer.add_hook(chainer.optimizer.GradientClipping(config["max_grad_norm"]))
def optimizer(opt_str): """ 入力文字列からオプティマイザを推定する """ if(opt_str.lower() == 'adam'): opt = O.Adam(amsgrad=True) elif(opt_str.lower() == 'ada_d'): opt = O.AdaDelta() elif(opt_str.lower() == 'ada_g'): opt = O.AdaGrad() elif(opt_str.lower() == 'm_sgd'): opt = O.MomentumSGD() elif(opt_str.lower() == 'n_ag'): opt = O.NesterovAG() elif(opt_str.lower() == 'rmsp'): opt = O.RMSprop() elif(opt_str.lower() == 'rmsp_g'): opt = O.RMSpropGraves() elif(opt_str.lower() == 'sgd'): opt = O.SGD() elif(opt_str.lower() == 'smorms'): opt = O.SMORMS3() else: opt = O.Adam(amsgrad=True) logger.warning('{}->{}'.format(opt_str, opt.__doc__.split('.')[0])) logger.debug('Optimizer: {}'.format(opt.__doc__.split('.')[0])) return opt
def make_map(self, floor, player, enemies, treasures): #引数を適切な形に変形 map_obj = np.asarray(enemies + treasures) player_status = np.array([[floor] + player.status_array() ]).astype(np.float32) #初期化されてないなら初期化 if self.model is None: self.x_len = len(player_status[0]) self.y_len = len(map_obj) self.model = DirectorChain(self.x_len, self.y_len) self.optimizer = optimizers.SGD() self.optimizer.setup(self.model) #イプシロンの確率でランダムに選ぶ、そうしないとすぐに収束してしまう if (self.random): ans = np.random.rand(self.y_len) else: #前向き計算、ディレクションを取得 xV = Variable(player_status) ans = self.model.fwd(xV).data[0] result_index = np.sort(ans.argsort()[:-3:-1]) result = map_obj[result_index] print("[" + ','.join(map(lambda t: t.name, result)) + "]") #記録 self.x_training.append(player_status) self.y_training.append(result_index) return result.tolist()
def check_gradient_noise(self): w = self.target.param.data g = self.target.param.grad xp = cuda.get_array_module(w) noise_value = xp.asarray(self.noise_value) expect = w - g - noise_value noise = mock.Mock(return_value=noise_value) opt = optimizers.SGD(lr=1) opt.setup(self.target) hook = optimizer_hooks.GradientNoise(self.eta, noise_func=noise) opt.add_hook(hook) opt.update() testing.assert_allclose(expect, self.target.param.data, rtol=0.4) testing.assert_allclose(expect, self.target.param2.data, rtol=0.4) self.assertEqual(noise.call_count, 2) call1 = mock.call(xp, (2, 3), np.dtype('float32'), hook, self.target.param.update_rule) call2 = mock.call(xp, (2, 3), np.dtype('float32'), hook, self.target.param2.update_rule) # Order does not matter assert (noise.mock_calls == [call1, call2] or noise.mock_calls == [call2, call1])
def test_can_create_valid_wrapper(self): optimizer = create_marked_profile_optimizer(optimizers.SGD(lr=1.0), sync=True) self.assertIsNotNone(optimizer) np.testing.assert_allclose([optimizer.lr], [1.0]) self.assertIsInstance(optimizer, _MarkedProfileOptimizer) self.assertIsInstance(optimizer.actual_optimizer, chainer.Optimizer)
def getOptimizer(opt_str): if (opt_str.lower() == 'adam'): opt = O.Adam() elif (opt_str.lower() == 'ada_d'): opt = O.AdaDelta() elif (opt_str.lower() == 'ada_g'): opt = O.AdaGrad() elif (opt_str.lower() == 'm_sgd'): opt = O.MomentumSGD() elif (opt_str.lower() == 'n_ag'): opt = O.NesterovAG() elif (opt_str.lower() == 'rmsp'): opt = O.RMSprop() elif (opt_str.lower() == 'rmsp_g'): opt = O.RMSpropGraves() elif (opt_str.lower() == 'sgd'): opt = O.SGD() elif (opt_str.lower() == 'smorms'): opt = O.SMORMS3() else: opt = O.Adam() print('\n[Warning] {0}\n\t{1}->{2}\n'.format( fileFuncLine(), opt_str, opt.__doc__.split('.')[0])) print('Optimizer:', opt.__doc__.split('.')[0]) return opt
def __init__(self): self.joint_state = np.zeros((3), dtype=np.float32) self.action_num = 0 self.reward = 0.0 self.target_point = PointCloud() self.target_init_y = 0.000 # self.target_init_x = 0.680 self.target_init_x = math.sqrt(self.L_2**2 - self.target_init_y**2) + 0.270 self.target_init_z = 0.960 # self.target_init_z = 0.900 self.num_step = 0 self.num_episode = 0 f = open( '/home/amsl/ros_catkin_ws/src/arm_q_learning/dqn_model/dqn_test29_dedede/dqn_arm_model_40000.dat', 'rb') self.model = pickle.load(f) if args.gpu >= 0: self.model.to_gpu() self.optimizer = optimizers.SGD(self.ALPHA) self.optimizer.setup(self.model) self.q_list = chainer.Variable(xp.zeros((1, 27), dtype=xp.float32)) self.action = 0 self.joint1 = self.init_joint1 self.joint2 = self.init_joint2 self.joint3 = self.init_joint3 self.next_joint1 = self.init_next_joint1 self.next_joint2 = self.init_next_joint2 self.next_joint3 = self.init_next_joint3
def make_map(self, floor, player, enemies, treasures): #引数を適切な形に変形 map_obj = np.asarray(enemies + treasures) player_status = np.array([[floor] + player.status_array() ]).astype(np.float32) #初期化されてないなら初期化 if self.model is None: self.x_len = len(player_status[0] + 2) self.y_len = len(map_obj) self.model = DirectorChain(self.x_len, self.y_len) self.optimizer = optimizers.SGD() self.optimizer.setup(self.model) if (self.random): #最初のうちは完全ランダム result_index = random.choices(range(len(map_obj)), k=2) else: #前向き計算、ディレクションを取得 xV = Variable(player_status) ans = self.model.fwd(xV).data[0] ans = ans - np.min(ans) #重み付きランダム result_index = random.choices(range(len(map_obj)), k=2, weights=ans) result_index = np.sort(result_index) result = map_obj[result_index] if random: #print("["+','.join(map(lambda t:t.name,result))+"]") pass #記録 self.x_training[self.learning_slot].append(player_status) self.y_training[self.learning_slot].append(result_index) return result.tolist()
def check_cleargrad(self): opt = optimizers.SGD(lr=1) opt.setup(self.target) opt.add_hook(CleargradHook(self)) opt.add_hook(DummyHook(self)) opt.update()
def __init__(self): self.joint_state = np.zeros((3), dtype=np.float32) # print self.joint_state self.action_num = 0 self.reward = 0.0 self.num_step = 0 self.num_episode = 0 self.model = chainer.FunctionSet( l1=F.Linear(3, 1024), l2=F.Linear(1024, 512), l3=F.Linear(512, 256), l4=F.Linear(256, 128), l5=F.Linear(128, 64), l6=F.Linear(64, 27, initialW=np.zeros((27, 64), dtype=np.float32)), ) if args.gpu >= 0: self.model.to_gpu() self.optimizer = optimizers.SGD() self.optimizer.setup(self.model) self.q_list = chainer.Variable(xp.zeros((1, 27), dtype=xp.float32)) self.action = 0 self.joint1 = self.init_state_joint1 self.joint3 = self.init_state_joint3 self.joint5 = self.init_state_joint5 self.next_joint1 = self.init_state_joint1 self.next_joint3 = self.init_state_joint3 self.next_joint5 = self.init_state_joint5
def init_model(): #Make models if use_pre2 == 'pre': pre_unit = 4 else: pre_unit = 0 if use_null == 'null': null_unit = 6 else: null_unit = 0 if args.phrase == 'phrase': phrase_unit = 4 model = chainer.FunctionSet( trainable=chainer.FunctionSet( w0=F.Linear(n_units * 2 + null_unit * 2, n_label), ww0=F.Linear( n_units * 2 + pre_unit + null_unit * 2 + phrase_unit, n_units + null_unit), ww1=F.Linear( n_units * 2 + pre_unit + null_unit * 2 + phrase_unit, n_units + null_unit), ), w1_f=F.Linear(n_units * 2 + null_unit * 2, n_units + null_unit), #source input w2_f=F.Linear(n_units + null_unit, n_units * 2 + null_unit * 2), #source output w1_e=F.Linear(n_units * 2 + null_unit * 2, n_units + null_unit), #target input w2_e=F.Linear(n_units + null_unit, n_units * 2 + null_unit * 2), #target output embed_f=F.EmbedID(vocab_f['len_vocab'], n_units), #source word embedding embed_e=F.EmbedID(vocab_e['len_vocab'], n_units), #target word embedding ) else: model = chainer.FunctionSet( trainable=chainer.FunctionSet(w0=F.Linear( n_units * 4 + null_unit * 4, n_label), ), w1_f=F.Linear(n_units * 2 + null_unit * 2, n_units + null_unit), #source input w2_f=F.Linear(n_units + null_unit, n_units * 2 + null_unit * 2), #source output w1_e=F.Linear(n_units * 2 + null_unit * 2, n_units + null_unit), #target input w2_e=F.Linear(n_units + null_unit, n_units * 2 + null_unit * 2), #target output embed_f=F.EmbedID(vocab_f['len_vocab'], n_units), #source word embedding embed_e=F.EmbedID(vocab_e['len_vocab'], n_units), #target word embedding ) if opt_name == 'SGD': optimizer = optimizers.SGD(lr=0.02) # (lr=opt_score) # lr=0.01 elif opt_name == 'AdaGrad': optimizer = optimizers.AdaGrad(lr=0.001) # (lr=opt_score) # lr=0.001 elif opt_name == 'AdaDelta': optimizer = optimizers.AdaDelta(rho=0.9) # (rho=opt_score) # rho=0.9 elif opt_name == 'Adam': optimizer = optimizers.Adam( alpha=0.0001) # (alpha=opt_score) # alpha=0.0001 optimizer.setup(model) # .collect_parameters() return model, optimizer
def fit(self, X, batchsize=100, n_iter=100, init_smooth=0.8, init_scale=0.1, lr=0.01, optimizer='Momentum'): L = np.array([len(seq) for seq in X]) self.max_length = np.max(L) init = X[np.where(L == self.centroid_length)[0]] init = np.unique(init) init = init[np.random.choice(len(init), self.n_centroid, replace=False)] print(init) init_seq = one_hot_encoding(init, self.dict_alphabet, self.max_length, init_smooth) init_seq[np.where(init_seq != 0)] = np.log( init_seq[np.where(init_seq != 0)]) noise = np.random.gumbel(0, 1, init_seq.shape) init_seq[np.where(init_seq != 0)] += noise[np.where(init_seq != 0)] init_seq *= init_scale init_seq = np.transpose( np.transpose(init_seq, (1, 0, 2)) - np.mean(init_seq, axis=1), (1, 0, 2)) self.model = Chain(kmeans=SoftKMeansLayer(self.n_centroid, self.centroid_length, init_W=init_seq, tau1=self.tau)) self.optimizer = { 'Adam': optimizers.Adam(lr), 'Momentum': optimizers.MomentumSGD(lr), 'SGD': optimizers.SGD(lr) }[optimizer] self.optimizer.setup(self.model) self.optimizer.add_hook(chainer.optimizer.WeightDecay(1e-6)) if self.use_gpu: self.model.to_gpu() with chainer.using_config('train', True): lcurve = [] for i in range(n_iter): self.model.cleargrads() indexes = np.random.choice(len(X), batchsize) x = X[indexes] x = one_hot_encoding(x, self.dict_alphabet, self.max_length) if self.use_gpu: x = cupy.array(x) loss = self.model.kmeans(x[indexes]) loss.backward() lcurve.append(float(loss.data)) self.optimizer.update() print(i, np.mean(lcurve[-10:])) return np.array(lcurve)
def get_model_optimizer(args): model = SVM(c=args.c, penalty=args.penalty) if args.gpu >= 0: model.to_gpu() optimizer = optimizers.SGD(lr=args.lr) optimizer.setup(model) return model, optimizer
def __init__(self): self.init_theta = 0.0 self.init_omega = 0.0 self.episode_time = 10.0 self.hz = 20.0 self.evaluation_freq = 100 self.state = xp.zeros((1, 5), dtype=xp.float32) self.next_state = xp.zeros((1, 5), dtype=xp.float32) self.reward = 0.0 self.action = 0.0 self.critic_model = chainer.FunctionSet( l1=F.Linear(5, 500), l2=F.Linear(500, 250), l3=F.Linear(250, 125), l4=F.Linear(125, 60), l5=F.Linear(60, 30), l6=F.Linear(30, 15), l7=F.Linear(15, 1, initialW=np.zeros((1, 15), dtype=np.float32)), ) self.actor_model = chainer.FunctionSet( l1=F.Linear(5, 500), l2=F.Linear(500, 250), l3=F.Linear(250, 125), l4=F.Linear(125, 60), l5=F.Linear(60, 30), l6=F.Linear(30, 15), l7=F.Linear(15, 1, initialW=np.zeros((1, 15), dtype=np.float32)), ) if args.gpu >= 0: self.critic_model.to_gpu() self.actor_model.to_gpu() self.critic_optimizer = optimizers.SGD(self.ALPHA) self.critic_optimizer.setup(self.critic_model) self.actor_optimizer = optimizers.SGD(self.ALPHA) self.actor_optimizer.setup(self.actor_model) self.oldact = 0.0 self.sigma = 10.0 self.limit_action = 5.0 self.min_action = -5.0 self.max_action = 5.0
def setup_optimizer(self): if self.opt_type == 'sgd': self.optimizer = optimizers.SGD(lr=self.opt_lr) elif self.opt_type == 'adagrad': self.optimizer = optimizers.AdaGrad(lr=self.opt_lr) elif self.opt_type == 'adam': self.optimizer = optimizers.Adam(alpha=self.opt_lr) self.optimizer.setup(self.network.collect_parameters())