def test_setup(self): create_update_rule = mock.MagicMock() target = self.target optimizer = self.optimizer optimizer.create_update_rule = create_update_rule optimizer.setup(target) self.assertEqual(create_update_rule.call_count, 2) self.assertEqual(create_update_rule.call_args_list[0], [(), {}]) self.assertEqual(create_update_rule.call_args_list[1], [(), {}])
def test_update(self, backend_config): target = self.target optimizer = self.optimizer target.to_device(backend_config.device) optimizer.setup(target) self.assertEqual(optimizer.t, 0) optimizer.update() self.assertEqual(optimizer.t, 1) param1 = target[0].param param2 = target[1].param param1.update_rule.update.assert_called_once_with(param1) param2.update_rule.update.assert_called_once_with(param2)
def test_update(self, backend_config): target = self.target optimizer = self.optimizer target.to_device(backend_config.device) optimizer.setup(target) optimizer.update() xp = backend.get_array_module(target[0].param) expected_data = xp.zeros(self.shape, dtype=self.dtype) rtol, atol = 1e-4, 1e-5 if self.dtype is np.float16: rtol, atol = 1e-1, 1e-2 for i in range(2): testing.assert_allclose(target[i].param.data, expected_data, rtol=rtol, atol=atol)
def test_update(self, backend_config): if backend_config.device.name == '@cupy:1': # TODO(niboshi): Fix it raise unittest.SkipTest( 'Loss scale does not work with cupy multi-device.') target = self.target optimizer = self.optimizer target.to_device(backend_config.device) optimizer.setup(target) optimizer.update() xp = backend.get_array_module(target[0].param) expected_data = xp.zeros(self.shape, dtype=self.dtype) rtol, atol = 1e-4, 1e-5 if self.dtype is np.float16: rtol, atol = 1e-1, 1e-2 for i in range(2): testing.assert_allclose( target[i].param.data, expected_data, rtol=rtol, atol=atol)
def test_update(self, backend_config): if backend_config.xp is chainerx: # ChainerX performs the loss scaling on its own backward # method, the optimizer should not divide back the parameters # This test is not actually creating a ChainerX # computation graph so no actual loss scale is being done self.optimizer.lr = 1.0 target = self.target optimizer = self.optimizer target.to_device(backend_config.device) optimizer.setup(target) optimizer.update() xp = backend.get_array_module(target[0].param) expected_data = xp.zeros(self.shape, dtype=self.dtype) rtol, atol = 1e-4, 1e-5 if self.dtype is np.float16: rtol, atol = 1e-1, 1e-2 for i in range(2): testing.assert_allclose( target[i].param.data, expected_data, rtol=rtol, atol=atol)
y = self.l3(h2) return y class Classifier(Chain): def __init__(self, predictor): super(Classifier, self).__init__(predictor=predictor) def __call__(self, x, t): y = self.predictor(x) self.loss = F.softmax_corss_entropy(y, t) self.accuracy = F.accuracy(y, t) return self.loss model = L.Classifier(MLP()) optimizer = optimizers.SGD() optimizer.setup(model) batch_size = 100 data_size = 60000 for epoch in range(5): print('epoch %d' % epoch) indexes = np.random.permutation(data_size) for index in range(0, data_size, batch_size): x = Variable(x_train[indexes[index : index + batch_size]]) t = Variable(y_train[indexes[index : index + batch_size]]) print(index + batch_size) print(x_train[indexes[index : index + batch_size]]) print(y_train[indexes[index : index + batch_size]]) optimizer.update(model, x, t) sum_loss, sum_accuracy = 0, 0
def learn(csvfile): train, publictest, _ = dataFromCsv.dataFromCsv(csvfile) train_iter = iterators.SerialIterator(train, batch_size=BATCH, shuffle=True) publictest_iter = iterators.SerialIterator(publictest, batch_size=BATCH, repeat=False, shuffle=False) #学習したいモデルを決定、ただしモデルの出力はsoftmaxである必要がある model = Resnet.ResNet(class_labels=9) #GPU設定、GPUを使わない場合はコメントアウト可能 chainer.cuda.get_device(GPU).use model.to_gpu() #最適化設定 optimizer = chainer.optimizers.MomentumSGD(LEARN_RATE) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(WEIGHT_DECAY)) #保存するモデル saved_model = model while train_iter.epoch < EPOCH: batch = train_iter.next() trainLossList = [] x_array, y_array = convert.concat_examples(batch, GPU) x = chainer.Variable(x_array) y = chainer.Variable(y_array) m = model(x) loss_train = myCrossEntropyError(m, y) model.cleargrads() loss_train.backward() optimizer.update() trainLossList.append(chainer.cuda.to_cpu(loss_train.data)) if train_iter.is_new_epoch: testLossList = [] #毎エポック後のmodelの精度を求め、publictest適用に置いて最良のmodelを保存 for batch in publictest_iter: x_array, y_array = convert.concat_examples(batch, GPU) x = chainer.Variable(x_array) y = chainer.Variable(y_array) m = model(x) loss_test = myCrossEntropyError(m, y) testLossList.append(chainer.cuda.to_cpu(loss_test.data)) if loss_test.data == np.min(testLossList): saved_model = model publictest_iter.reset() print("epo:" + str(train_iter.epoch) + " train_loss:" + str(np.mean(trainLossList)) + " test_loss:" + str(np.mean(testLossList))) serializers.save_npz(SAVE_MODEL, saved_model) return
# 教師データを変換 train_x, train_t = [], [] for i in range(len(train_data) - 1): train_x.append(train_data[i]) train_t.append(train_data[i + 1]) train_x = np.array(train_x, dtype="float32") train_t = np.array(train_t, dtype="float32") in_size = 1 out_size = 1 N = len(train_x) # モデルの定義 model = LSTM(in_size=in_size, hidden_size=HIDDEN_SIZE, out_size=out_size) optimizer = optimizers.Adam() optimizer.setup(model) # 学習開始 print("Train") st = datetime.datetime.now() for epoch in range(EPOCH_NUM): # ミニバッチ学習 x, t = [], [] # ミニバッチ学習データとして、時系列全体から、BATCH_COL_SIZE分の時系列を抜き出したものを、BATCH_ROW_SIZE個用意する for i in range(BATCH_ROW_SIZE): index = np.random.randint(0, N - BATCH_COL_SIZE + 1) # ランダムな箇所、ただしBATCH_COL_SIZE分だけ抜き取れる場所から選ぶ x.append(train_x[index:index + BATCH_COL_SIZE]) # BATCH_COL_SIZE分の時系列を取り出す t.append(train_t[index:index + BATCH_COL_SIZE])
def main(): if os.path.exists('./data/corpus/dictionary.dict'): corpus = ConvCorpus(create_flg=False, batch_size=BATCH_SIZE, size_filter=True) corpus.load(load_dir='./data/corpus/') else: corpus = ConvCorpus(create_flg=True, batch_size=BATCH_SIZE, size_filter=True) corpus.save(save_dir='./data/corpus/') model = Seq2Seq(vocab_size=len(corpus.dic.token2id), embed_size=EMBED_SIZE, hidden_size=HIDDEN_SIZE, batch_size=BATCH_SIZE, flag_gpu=FLAG_GPU) model.reset() if FLAG_GPU: cuda.get_device(0).use() model.to_gpu(0) ARR = cuda.cupy else: ARR = np optimizer = optimizers.Adam(alpha=0.001) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(5)) input_mat = [] output_mat = [] max_input_len = max_output_len = 0 for input_text, output_text in zip(corpus.rough_posts, corpus.rough_cmnts): output_text.append(corpus.dic.token2id['<eos>']) max_input_len = max(max_input_len, len(input_text)) max_output_len = max(max_output_len, len(output_text)) input_mat.append(input_text) output_mat.append(output_text) for li in input_mat: insert_num = max_input_len - len(li) for _ in range(insert_num): li.insert(0, corpus.dic.token2id['<pad>']) for li in output_mat: insert_num = max_output_len - len(li) for _ in range(insert_num): li.append(corpus.dic.token2id['<pad>']) input_mat = np.array(input_mat, dtype=np.int32).T output_mat = np.array(output_mat, dtype=np.int32).T accum_loss = 0 for num, epoch in enumerate(range(EPOCH_NUM)): total_loss = 0 batch_num = 0 perm = np.random.permutation(len(corpus.rough_posts)) #assert len(corpus.rough_posts)//BATCH_SIZE == 0 for i in range(0, len(corpus.rough_posts), BATCH_SIZE): input_batch = input_mat[:, perm[i:i + BATCH_SIZE]] output_batch = output_mat[:, perm[i:i + BATCH_SIZE]] model.reset() model.encode(input_batch) end_batch = ARR.array( [corpus.dic.token2id['<start>'] for _ in range(BATCH_SIZE)]) first_words = output_batch[0] loss, predict_mat = model.decode(end_batch, first_words, train=True) next_ids = first_words accum_loss += loss for w_ids in output_batch[1:]: loss, predict_mat = model.decode(next_ids, w_ids, train=True) next_ids = w_ids accum_loss += loss model.cleargrads() accum_loss.backward() optimizer.update() total_loss += float(accum_loss.data) batch_num += 1 print('Epoch:', num + 1, 'batch:', batch_num, 'batch loss:{:.2f}'.format(float(accum_loss.data))) accum_loss = 0 if (epoch + 1) % 2 == 0: serializers.save_hdf5('./data/model/{}.model'.format(epoch + 1), model) serializers.save_hdf5('./data/model/{}.state'.format(epoch + 1), optimizer)
def main(): # arguments parser = argparse.ArgumentParser() parser.add_argument('--data_dir', type=str, default='data/dazai') parser.add_argument('--checkpoint_dir', type=str, default='model') parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--rnn_size', type=int, default=128) parser.add_argument('--learning_rate', type=float, default=2e-3) parser.add_argument('--learning_rate_decay', type=float, default=0.97) parser.add_argument('--learning_rate_decay_after', type=int, default=10) parser.add_argument('--decay_rate', type=float, default=0.95) parser.add_argument('--dropout', type=float, default=0.0) parser.add_argument('--seq_length', type=int, default=50) parser.add_argument('--batchsize', type=int, default=50) parser.add_argument('--epochs', type=int, default=50) parser.add_argument('--grad_clip', type=int, default=5) parser.add_argument('--init_from', type=str, default='') parser.add_argument('--enable_checkpoint', type=bool, default=True) parser.add_argument('--file_name', type=str, default='input.txt') args = parser.parse_args() if not os.path.exists(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) n_epochs = args.epochs n_units = args.rnn_size batchsize = args.batchsize bprop_len = args.seq_length grad_clip = args.grad_clip xp = cuda.cupy if args.gpu >= 0 else np train_data, words, vocab = load_data(args.data_dir, args.file_name) pickle.dump(vocab, open('%s/vocab.bin' % args.data_dir, 'wb')) if len(args.init_from) > 0: model = pickle.load(open(args.init_from, 'rb')) else: model = CharRNN(len(vocab), n_units) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8) #optimizer = chainer.optimizers.SGD(lr=1.0) optimizer.setup(model) optimizer.add_hook( chainer.optimizer.GradientClipping(grad_clip)) #勾配の上限を設定 whole_len = train_data.shape[0] #jump = whole_len / batchsize jump = int(whole_len / batchsize) epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(n_units, batchsize=batchsize) if args.gpu >= 0: accum_loss = Variable(xp.zeros(()).astype(np.float32)) for key, value in state.items(): value.data = cuda.to_gpu(value.data) else: accum_loss = Variable(xp.zeros(()).astype(np.float32)) print('going to train {} iterations'.format(jump * n_epochs / bprop_len)) sum_perp = 0 count = 0 iteration = 0 for i in range(jump * n_epochs): x_batch = xp.array([ train_data[(jump * j + i) % whole_len] for j in xrange(batchsize) ]) y_batch = xp.array([ train_data[(jump * j + i + 1) % whole_len] for j in xrange(batchsize) ]) if args.gpu >= 0: x_batch = cuda.to_gpu(x_batch) y_batch = cuda.to_gpu(y_batch) state, loss_i = model.forward_one_step(x_batch, y_batch, state, dropout_ratio=args.dropout) accum_loss += loss_i count += 1 if (i + 1) % bprop_len == 0: # Run truncated BPTT iteration += 1 sum_perp += accum_loss.data now = time.time() #print('{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at)) print('{}/{}, train_loss = {}, time = {:.2f}'.format( (i + 1) / bprop_len, jump * n_epochs / bprop_len, accum_loss.data / bprop_len, now - cur_at)) cur_at = now model.cleargrads() #optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() # truncate #accum_loss = Variable(xp.zeros(()).astype(np.float32)) if args.gpu >= 0: accum_loss = Variable(xp.zeros(()).astype(np.float32)) #accum_loss = Variable(cuda.zeros(())) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) #optimizer.clip_grads(grad_clip) optimizer.update() if (i + 1) % 1000 == 0: print('epoch: ', epoch) print('iteration: ', iteration) print('training perplexity: ', np.exp(float(sum_perp) / count)) sum_perp = 0 count = 0 if args.enable_checkpoint: if (i + 1) % 10000 == 0: fn = ('%s/charrnn_epoch_%.2f.chainermodel' % (args.checkpoint_dir, float(i) / jump)) pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb')) pickle.dump( copy.deepcopy(model).to_cpu(), open('%s/latest.chainermodel' % (args.checkpoint_dir), 'wb')) if (i + 1) % jump == 0: epoch += 1 if epoch >= args.learning_rate_decay_after: optimizer.lr *= args.learning_rate_decay print('decayed learning rate by a factor {} to {}'.format( args.learning_rate_decay, optimizer.lr)) sys.stdout.flush()
def Training(str="", weight_load=False): print("\nTraining\n") # 教師データ df = pd.read_csv('nikkei-225-index-historical-chart-data.csv', header=8) #mat = df.query('date.str.match("^2019-")', engine='python') mat = df.query('date.str.match(' + str + ')', engine='python') train_data_t = mat[' value'].values print(train_data_t) train_data = np.arange(len(train_data_t), dtype="float32") for i in range(len(train_data_t) - 1): train_data[i] = train_data_t[i + 1] - train_data_t[i] gain = np.max(train_data) - np.min(train_data) gain = gain / 2 train_data = train_data / gain print(train_data) # 入力データと教師データを作成 train_x, train_t = [], [] for i in range(len(train_data) - 1): train_x.append(train_data[i]) train_t.append(train_data[i + 1]) train_x = np.array(train_x, dtype="float32") train_t = np.array(train_t, dtype="float32") Num = len(train_x) # モデル定義 model = LSTM(in_size=IN_SIZE, hidden_size=HIDDEN_SIZE, out_size=OUT_SIZE) optimizer = optimizers.Adam() optimizer.setup(model) if weight_load: serializers.load_npz("mymodel.npz", model) # 学習開始 print("Train") st = datetime.datetime.now() for epoch in range(EPOCH_NUM): # ミニバッチ学習 x, t = [], [] # ミニバッチ学習データとして、時系列全体から、BATCH_COL_SIZE分の時系列を抜き出したものを、BATCH_ROW_SIZE個用意する for i in range(BATCH_ROW_SIZE): # ランダムな箇所、ただしBATCH_COL_SIZE分だけ抜き取れる場所から選ぶ # (indexの末端がBATCH_COL_SIZEを超えない部分でリミットを掛ける) index = np.random.randint(0, Num - BATCH_COL_SIZE + 1) x.append(train_x[index:index + BATCH_COL_SIZE]) # BATCH_COL_SIZE分の時系列を取り出す t.append(train_t[index:index + BATCH_COL_SIZE]) x = np.array(x, dtype="float32") t = np.array(t, dtype="float32") loss = 0 total_loss = 0 model.reset() # 勾配とメモリの初期化 for i in range(BATCH_COL_SIZE): # 各時刻おきにBATCH_ROW_SIZEごと読み込んで損失を計算する x_ = np.array([x[j, i] for j in range(BATCH_ROW_SIZE)], dtype="float32")[:, np.newaxis] # 時刻iの入力値 t_ = np.array([t[j, i] for j in range(BATCH_ROW_SIZE)], dtype="float32")[:, np.newaxis] # 時刻i+1の値(=正解の予測値) loss += model(x=x_, t=t_, train=True) # 誤差合計 loss.backward() # 誤差逆伝播 loss.unchain_backward() total_loss += loss.data optimizer.update() if (epoch + 1) % 100 == 0: ed = datetime.datetime.now() print("epoch:\t{}\ttotal loss:\t{}\ttime:\t{}".format( epoch + 1, total_loss, ed - st)) st = datetime.datetime.now() serializers.save_npz("mymodel.npz", model) # npz形式で書き出し