예제 #1
0
def train_mnist_by_cnn():
    # データの読み込み
    (x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)

    # 処理に時間のかかる場合はデータを削減
    x_train, t_train = x_train[:500], t_train[:500]
    x_test, t_test = x_test[:100], t_test[:100]

    max_epochs = 1

    network = SimpleConvNet(input_dim=(1, 28, 28),
                            conv_param={
                                'filter_num': 30,
                                'filter_size': 5,
                                'padding': 0,
                                'stride': 1
                            },
                            hidden_size=100,
                            output_size=10,
                            weight_init_std=0.01)

    trainer = Trainer(
        network,
        x_train,
        t_train,
        x_test,
        t_test,
        epoch_num=max_epochs,
        batch_size=100,
        optimizer='adagrad',
        optimizer_param={'lr': 0.001},
        evaluate_sample_num_per_epoch=None,
    )

    trainer.train()
예제 #2
0
def train_mnist_extend():
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

    # 過学習を再現するために、学習データを削減
    x_train = x_train[:300]
    t_train = t_train[:300]

    input_size = 784
    hidden_size_list = [100, 100, 100, 100, 100, 100]
    output_size = 10
    batch_size = 100
    epoch_num = 301

    network = MultiLayerNetExtend(input_size=input_size,
                                  hidden_size_list=hidden_size_list,
                                  output_size=output_size,
                                  dropout=True,
                                  dropout_ratio=0.45,
                                  batch_normal=True)
    logging.info(f'Layers: {network.layers.keys()}')

    trainer = Trainer(
        network=network,
        x_train=x_train,
        t_train=t_train,
        x_test=x_test,
        t_test=t_test,
        epoch_num=epoch_num,
        batch_size=batch_size,
        optimizer="adagrad",
    )
    trainer.train()
    logging.info(f'======== Train finished !! ========')
    train_acc_list, test_acc_list = trainer.train_acc_list, trainer.test_acc_list
    # グラフの描画==========
    markers = {'train': 'o', 'test': 's'}
    x = np.arange(len(train_acc_list))
    plt.plot(x, train_acc_list, marker='o', label='train', markevery=10)
    plt.plot(x, test_acc_list, marker='s', label='test', markevery=10)
    plt.xlabel("epochs")
    plt.ylabel("accuracy")
    plt.xlim(0, epoch_num)
    plt.ylim(0, 1.0)
    plt.legend(loc='lower right')
    plt.savefig("src/sample_data/mnist/params_backprop.png")
    logging.info(f'======== Figure saved !! ========')
예제 #3
0
def train_mnist_two_layer():
    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,
                                                      one_hot_label=True)
    network = TwoLayerNetExtend(input_size=784, hidden_size=50, output_size=10)
    iters_num = 10000
    train_size = x_train.shape[0]
    batch_size = 100
    learning_rate = 0.1
    train_loss_list = []
    train_acc_list = []
    test_acc_list = []

    iter_per_epoch = max(train_size / batch_size, 1)

    for i in range(iters_num):
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        # 勾配
        #grad = network.numerical_gradient(x_batch, t_batch)
        grad = network.gradient(x_batch, t_batch)

        # 更新
        for key in ('W1', 'b1', 'W2', 'b2'):
            network.params[key] -= learning_rate * grad[key]

        loss = network.loss(x_batch, t_batch)
        train_loss_list.append(loss)

        if i % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, t_train)
            test_acc = network.accuracy(x_test, t_test)
            train_acc_list.append(train_acc)
            test_acc_list.append(test_acc)
            print(train_acc, test_acc)
    joblib.dump(network.params, "src/sample_data/mnist/params_backprop.pkl")
def load_data():
    (x_train, t_train), (x_test, t_test) = mn.load_mnist(normalize=True,
                                                         one_hot_label=True)
    return x_train, t_train, x_test, t_test
예제 #5
0
def train_cnn_by_pytorch(): 
    network_path = "src/sample_data/mnist/convolution_network.pkl"
    batch_size = 100
    num_classes = 10
    epochs = 3

    (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=False)
    network = ConvolutionalNeuralNetwork(num_classes)
    
    # データのフォーマットを変換:PyTorchでの形式 = [画像数,チャネル数,高さ,幅]
    x_train = x_train.reshape(60000, 1, 28, 28)
    x_test = x_test.reshape(10000, 1, 28 ,28)

    # PyTorchのテンソルに変換
    x_train = torch.Tensor(x_train).float()
    x_test = torch.Tensor(x_test).float()
    t_train = torch.LongTensor(t_train) # labelにはint型(LongTensor型)を用いる ⇒ floatは×
    t_test = torch.LongTensor(t_test)

    # 学習用と評価用のデータセットを作成(60000枚分)
    train_dataset = TensorDataset(x_train, t_train)
    test_dataset = TensorDataset(x_test, t_test)
    
    # データセットをバッチサイズ毎に分割する(ex: dataset:100, batchsize:20 ⇒ tensor(20 × 5)に分割)
    train_batch = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_batch = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # パラメータ更新手法の定義
    optimizer = op.Adagrad(network.parameters(), lr=0.01, lr_decay=0, weight_decay=0.05, initial_accumulator_value=0, eps=1e-10)

    # 損失関数の定義
    loss_func = nn.CrossEntropyLoss() # CrossEntropy誤差はone_hot_vectorに対応していない
    if not os.path.exists(network_path):
        network.train()
        # 学習(エポック数3回 ⇒ パラメータは随時更新)
        for i in range(1, epochs+1):
            logging.info(f'===== START {i}th epoch !! =====')
            for i, (data, label) in enumerate(train_batch):
                optimizer.zero_grad()
                output = network(data)
                loss = loss_func(output, label)
                loss.backward()
                optimizer.step()
                if i % 100 == 0:
                    logging.info(f'{i}th iteration ⇒ loss: {loss.item()}')
        joblib.dump(network, network_path)
    else:
        network = joblib.load(network_path) 

    # 評価
    network.eval() # モデルを推論モードに変更
    count = 0
    for i, (data, label) in enumerate(test_batch):
        test_output = network(data)
        _, predicted = torch.max(test_output.data, 1)
        y_predicted = predicted.numpy() # tensor型 ⇒ numpy.array型
        label = label.numpy() # tensor型 ⇒ numpy.array型
        cnt = np.sum(y_predicted == label)
        count += cnt
    accuracy = count / len(t_test) * 100
    logging.info(f'========= Finaly accuracy is {accuracy} !! =========')