window_size = 5 hidden_size = 100 batch_size = 100 max_epoch = 10 corpus, word_to_id, id_to_word = ptb.load_data('train') vocab_size = len(word_to_id) contexts, target = create_contexts_target(corpus, window_size) if config.GPU: contexts, target = to_gpu(contexts), to_gpu(target) model = CBOW(vocab_size, hidden_size, window_size, corpus) optimizer = Adam() trainer = Trainer(model, optimizer) trainer.fit(contexts, target, max_epoch, batch_size) trainer.plot() word_vecs = model.word_vecs if config.GPU: word_vecs = to_cpu(word_vecs) params = {} params['word_vecs'] = word_vecs.astype(np.float16) params['word_to_id'] = word_to_id params['id_to_word'] = id_to_word pkl_file = 'cbow_params.pkl' with open(pkl_file, 'wb') as f: pickle.dump(params, f, -1)
network = SimpleConvNet(input_dim=(1, 28, 28), conv_param={ 'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1 }, hidden_size=100, output_size=10, weight_init_std=0.01) trainer = Trainer(network, x_train, t_train, x_test, t_test, epochs=max_epochs, mini_batch_size=100, optimizer='Adam', optimizer_param={'lr': 0.001}, evaluate_sample_num_per_epoch=1000) trainer.train() # パラメータの保存 network.save_params("params.pkl") print("Saved Network Parameters!") # グラフの描画 markers = {'train': 'o', 'test': 's'} x = np.arange(max_epochs) plt.plot(x, trainer.train_acc_list, marker='o', label='train', markevery=2) plt.plot(x, trainer.test_acc_list, marker='s', label='test', markevery=2)
# Dropuoutの有無、割り合いの設定 ======================== use_dropout = False # Dropoutなしのときの場合はFalseに dropout_ratio = 0.2 # ==================================================== network = MultiLayerNetExtend(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, use_dropout=use_dropout, dropout_ration=dropout_ratio) trainer = Trainer(network, x_train, t_train, x_test, t_test, epochs=301, mini_batch_size=100, optimizer='sgd', optimizer_param={'lr': 0.01}, verbose=True) trainer.train() train_acc_list, test_acc_list = trainer.train_acc_list, trainer.test_acc_list # グラフの描画========== markers = {'train': 'o', 'test': 's'} x = np.arange(len(train_acc_list)) plt.plot(x, train_acc_list, marker='o', label='train', markevery=10) plt.plot(x, test_acc_list, marker='s', label='test', markevery=10) plt.xlabel("epochs") plt.ylabel("accuracy")
kwargs = dict(loss_fn=nn.L1Loss(), beta=configuration.beta, gamma=configuration.gamma, learn_beta=configuration.learn_beta) train_criterion = PoseNetCriterion(**kwargs) result_criterion = AbsolutePoseNetCriterion() if configuration.learn_beta: param_list.append( {'params': [train_criterion.beta, train_criterion.gamma]}) train_dataloader, valid_dataloader = get_posenet_train_dataloader( configuration) if configuration.optimizer == 'adam': optimizer = optim.Adam(param_list, lr=configuration.lr, weight_decay=5e-4) # Trainer print("Setup trainer...") trainer = Trainer(model=model, optimizer=optimizer, configuration=configuration, train_criterion=train_criterion, val_criterion=train_criterion, result_criterion=result_criterion, train_dataloader=train_dataloader, val_dataloader=valid_dataloader) trainer.run()
stride1 = 2 stride2 = 2 stride3 = 2 stride4 = 2 pool1 = 1 pool2 = 1 pool3 = 1 pool4 = 3 network = DeepConvNet(input_dim=(2, 99, 99), conv_param_1 = {'filter_num':FN1, 'filter_size':F1S, 'pad':PAD1, 'stride':stride1, 'pool':pool1}, #3 conv_param_2 = {'filter_num':FN2, 'filter_size':F2S, 'pad':PAD2, 'stride':stride2, 'pool':pool2}, conv_param_3 = {'filter_num':FN3, 'filter_size':F3S, 'pad':PAD3, 'stride':stride3, 'pool':pool3}, conv_param_4 = {'filter_num':FN4, 'filter_size':F4S, 'pad':PAD4, 'stride':stride4, 'pool':pool4}, hidden_size1=75, output_size=2, node_size = 63 #63 ) trainer = Trainer(network, x_train, t_train, x_test, t_test, epochs=300, mini_batch_size=30,#20, 100 optimizer='Adam', optimizer_param={'lr':0.0001}, evaluate_sample_num_per_epoch=200)#1000 ##### number of iteration = [(total_data/mini_batch_size)->(one epoch)] * [epochs] trainer.train() #Adam # パラメータの保存 network.save_params("deep_convnet_params_Aphtae_6_19.pkl") print("Saved Network Parameters!")
if args.name is not None: with open("logs/{}.log".format(args.name), "a") as f: f.write(str(args)) f.write("\nParameters : {}".format(n_parameters)) if hasattr(model, "n_filters"): f.write("\nFilters : {}".format(model.n_filters)) else: f.write("\nFilters : _ ") f.write("\n*******\n") print("-" * 80 + "\n") trainer1 = Trainer(device, model, dataset, optimizer, [CrossEntropy(), AlphaLoss()], name=args.name, topk=topk, checkpointFreq=args.checkpoint_freq) trainer1.temperature = args.starting_temp trainer1.callbacks.append(AlphaCallback(args.alpha)) if scheduler is not None: trainer1.callbacks.append(SchedulerCB(scheduler)) trainer1.train(args.epochs) torch.save(model.state_dict(), args.name + ".model") else: # arg.resume is not None model.load_state_dict(torch.load(args.resume))
# 데이터 로드 (X_train, Y_train), (X_test, Y_test) = load_mnist(flatten=False) # 테스트 시간을 줄이기 위해서 데이터 사이즈를 줄임. X_train, Y_train = X_train[:5000], Y_train[:5000] X_test, Y_test = X_test[:1000], Y_test[:1000] # CNN 생성 cnn = SimpleConvNet() # 테스트 도우미 클래스 trainer = Trainer(network=cnn, x_train=X_train, t_train=Y_train, x_test=X_test, t_test=Y_test, epochs=20, mini_batch_size=100, optimizer='Adam', optimizer_param={'lr': 0.01}, evaluate_sample_num_per_epoch=100) # 테스트 실행 trainer.train() # 학습이 끝난 후 파라미터들을 파일에 저장 cnn.save_params('cnn_params.pkl') # 그래프(x축 - epoch, y축 - 정확도(accuracy)) x = numpy.arange(20) plt.plot(x, trainer.train_acc_list, label='train accuracy') # trainer파일에 train_acc_list = [] 의 empty list가 있어서 값들을 자동을 append 해준다 plt.plot(x, trainer.test_acc_list, label='test accuracy')
x = nn.Variable((batch_size, sentence_length)) mask = get_mask(x) t = nn.Variable((batch_size, sentence_length)) with nn.parameter_scope('embedding'): h = PF.embed(x, vocab_size, embedding_size) * mask with nn.parameter_scope('lstm1'): h = lstm(h, hidden_size, mask=mask, return_sequences=True) with nn.parameter_scope('lstm2'): h = lstm(h, hidden_size, mask=mask, return_sequences=True) with nn.parameter_scope('output'): y = time_distributed(PF.affine)(h, vocab_size) mask = F.sum(mask, axis=2) # do not predict 'pad'. entropy = time_distributed_softmax_cross_entropy(y, expand_dims( t, axis=-1)) * mask # count = F.sum(mask, axis=1) # loss = F.mean(F.div2(F.sum(entropy, axis=1), count)) loss = F.sum(entropy) / F.sum(mask) # Create solver. solver = S.Momentum(1e-2, momentum=0.9) solver.set_parameters(nn.get_parameters()) trainer = Trainer(inputs=[x, t], loss=loss, metrics={'PPL': np.e**loss}, solver=solver) trainer.run(train_data_iter, valid_data_iter, epochs=max_epoch)
for j in range(0, len(weight_decay)): for k in range(0, len(learning_rate)): network = MultiLayerNetExtend( input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, activation='sigmoid', weight_init_std='xavier', weight_decay_lambda=weight_decay[j], use_dropout=use_dropout, dropout_ration=dropout_ratio[i]) trainer = Trainer(network, x_train, t_train, x_test, t_test, epochs=5, mini_batch_size=500, optimizer='adam', optimizer_param={'lr': learning_rate[k]}, verbose=False) trainer.train() test_acc = trainer.test_acc_list[-1] best_hp.append({ "test_acc": test_acc, "dropout_ratio": dropout_ratio[i], "weight_decay": weight_decay[j], "learning_rate": learning_rate[k] }) del network bar.update(25 * i + 5 * j + k)
'enable_bp_gradient_quantization': False }, hidden_size_1=4096, hidden_size_2=1000, output_size=10, enable_compensation_L2_regularization=True, compensation_L2_regularization_lambda=0.1, mini_batch_size=batch_size) # ======================================= Network Configuration ======================================= trainer = Trainer(network, x_train, t_train, x_test, t_test, epochs=max_epochs, mini_batch_size=batch_size, optimizer=optimizer, optimizer_param={'lr': learning_rate}, evaluate_sample_num_per_epoch=evaluate_sample_num, log_per_epoch=log_per_epoch, verbose=True) trainer.train(log_time=method_time_inspection) # Draw figure markers = {'train loss': '^', 'train acc': 'o', 'test acc': 's'} x = np.arange(len(trainer.train_loss_list)) plt.plot(x, trainer.train_loss_list, marker='^', label='train loss', markevery=2)
len(x_test), batch_size, shuffle=True, with_file_cache=False) x = nn.Variable((batch_size, max_len)) t = nn.Variable((batch_size, 1)) mask = get_mask(x) with nn.parameter_scope('embedding'): h = time_distributed(PF.embed)(x, vocab_size, embedding_size) * mask with nn.parameter_scope('lstm_layer'): h = lstm(h, hidden_size, mask=mask, return_sequences=False) with nn.parameter_scope('output'): y = F.sigmoid(PF.affine(h, 1)) accuracy = F.mean(F.equal(F.round(y), t)) loss = F.mean(F.binary_cross_entropy(y, t)) # Create solver. solver = S.Adam() solver.set_parameters(nn.get_parameters()) trainer = Trainer(inputs=[x, t], loss=loss, metrics={ 'cross entropy': loss, 'accuracy': accuracy }, solver=solver) trainer.run(train_data_iter, dev_data_iter, epochs=5, verbose=1)
import numpy as np import matplotlib.pyplot as plt from dataset.mnist import load_mnist from deep_convnet import DeepConvNet from common.trainer import Trainer opt = sys.argv[1] print(opt) if opt is None: opt = 'Adam' (x_train, t_train), (x_test, t_test) = load_mnist(flatten=False) network = DeepConvNet() trainer = Trainer(network, x_train, t_train, x_test, t_test, epochs=20, mini_batch_size=100, optimizer=opt, optimizer_param={}, evaluate_sample_num_per_epoch=1000, verbose2=True) trainer.train() # パラメータの保存 network.save_params(opt + "_deep_convnet_params.pkl") print("Saved Network Parameters!")
x = nn.Variable([batch_size, window_size * 2]) with nn.parameter_scope('W_in'): h = PF.embed(x, vocab_size, embedding_size) h = F.mean(h, axis=1) with nn.parameter_scope('W_out'): y = PF.affine(h, vocab_size, with_bias=False) t = nn.Variable((batch_size, 1)) entropy = F.softmax_cross_entropy(y, t) loss = F.mean(entropy) # Create solver. solver = S.Adam() solver.set_parameters(nn.get_parameters()) trainer = Trainer(inputs=[x, t], loss=loss, metrics=dict(PPL=np.e**loss), solver=solver) trainer.run(train_data_iter, valid_data_iter, epochs=max_epoch) with open('vectors.txt', 'w') as f: f.write('{} {}\n'.format(vocab_size - 1, embedding_size)) with nn.parameter_scope('W_in'): x = nn.Variable((1, 1)) y = PF.embed(x, vocab_size, embedding_size) for word, i in ptb_dataset.w2i.items(): x.d = np.array([[i]]) y.forward() str_vec = ' '.join(map(str, list(y.d.copy()[0][0]))) f.write('{} {}\n'.format(word, str_vec))
def cnn_constructor(): """ Referenced by https://github.com/oreilly-japan/deep-learning-from-scratch common modules referenced there too. """ global network, classes, imsize (x_train, t_train), (x_test, t_test), classes = dataset(image_dir="images", test_percentage=10, validation_percentage=10, imsize=imsize) x_train = chenneling(x_train) x_test = chenneling(x_test) train_num = x_train.shape[0] test_num = x_test.shape[0] x_train, t_train = shuffle_dataset(x_train, t_train) x_test, t_test = shuffle_dataset(x_test, t_test) net_param = "cnn_params" + str(imsize) + ".pkl" if not os.path.exists("params/"): os.makedirs("params/") # make convolution eural network # x_train.shape[1:] returns channel, height, width network = ConvNet(input_dim=(x_train.shape[1:]), conv_param={ 'filter_num': 20, 'filter_size': 3, 'pad': 0, 'stride': 1 }, hidden_size=32, output_size=classes, weight_init_std=0.001) trainer = Trainer(network, x_train, t_train, x_test, t_test, epochs=1, mini_batch_size=FLAGS.batch_size, optimizer='Adam', optimizer_param={'lr': 0.001}, evaluate_sample_num_per_epoch=train_num) params_loaded = False if not os.path.exists("params/"): os.makedirs("params/") if (os.path.exists("params/" + net_param)): network.load_params("params/" + net_param) params_loaded = True print("\n* Loaded Network Parameters! - " + net_param) if ((FLAGS.train_epochs > 0) or (params_loaded == False)): if (FLAGS.train_epochs <= 0): FLAGS.train_epochs = 10 # Training for ep in range(FLAGS.train_epochs): trainer.train() # Save parameters network.save_params("params/" + net_param) # plot graphs # Grpah 1: Accuracy markers = {'train': 'o', 'test': 's', 'loss': 'd'} x1 = np.arange(len(trainer.train_acc_list)) plt.clf() plt.plot(x1, trainer.train_acc_list, marker='o', label='train', markevery=1) plt.plot(x1, trainer.test_acc_list, marker='s', label='test', markevery=1) plt.xlabel("epochs") plt.ylabel("accuracy") plt.ylim(0, 1.1) plt.legend(loc='lower right') plt.title("Accuracy") now = datetime.now() filename = "params/" + now.strftime( '%Y%m%d_%H%M%S%f') + "_" + "ep" + ".png" plt.savefig(filename) #plt.show() # Graph 2: Loss x2 = np.arange(len(trainer.train_loss_list)) plt.clf() plt.plot(x2, trainer.train_loss_list, marker='o', label='loss', markevery=1) plt.xlabel("iter") plt.ylabel("loss") plt.legend(loc='lower right') plt.title("Cross entropy loss") now = datetime.now() filename = "params/" + now.strftime( '%Y%m%d_%H%M%S%f') + "_" + "ep" + ".png" plt.savefig(filename) #plt.show() print("\n* Saved Network Parameters! - " + net_param)
with nn.parameter_scope('embedding'): h = PF.embed(x, vocab_size, embedding_size) * mask with nn.parameter_scope('lstm_forward'): h_f = lstm(h, hidden_size, mask=mask, return_sequences=True) with nn.parameter_scope('lstm_backward'): h_b = lstm(h[:, ::-1, ], hidden_size, mask=mask, return_sequences=True)[:, ::-1, ] h_f = h_f[:, :-2, ] h_b = h_b[:, 2:, ] h = F.concatenate(h_f, h_b, axis=2) with nn.parameter_scope('output'): y = time_distributed(PF.affine)(h, vocab_size) mask = F.sum(get_mask(t), axis=2) # do not predict 'pad'. entropy = time_distributed_softmax_cross_entropy(y, expand_dims( t, axis=-1)) * mask count = F.sum(mask, axis=1) loss = F.mean(F.div2(F.sum(entropy, axis=1), count)) # Create solver. solver = S.Momentum(1e-2, momentum=0.9) solver.set_parameters(nn.get_parameters()) trainer = Trainer(inputs=[x, t], loss=loss, metrics={'PPL': np.e**loss}, solver=solver, save_path='bilstmlm') trainer.run(train_data_iter, valid_data_iter, epochs=max_epoch)