Esempio n. 1
0
    def update_core(self):
        itr_train = self.get_iterator('main')
        optimizer = self.get_optimizer('main')

        batch = itr_train.__next__()
        X_STF, y_STF = chainer.dataset.concat_examples(batch, self.device)

        optimizer.target.zerograds()
        optimizer.target.predictor.reset_state()
        loss = optimizer.target(Variable(X_STF), Variable(y_STF))

        loss.backward()
        optimizer.update()
Esempio n. 2
0
    def test_update(self, backend_config):
        target = self.target
        optimizer = self.optimizer
        target.to_device(backend_config.device)
        optimizer.setup(target)

        self.assertEqual(optimizer.t, 0)

        optimizer.update()

        self.assertEqual(optimizer.t, 1)

        param1 = target[0].param
        param2 = target[1].param
        param1.update_rule.update.assert_called_once_with(param1)
        param2.update_rule.update.assert_called_once_with(param2)
Esempio n. 3
0
 def test_update(self, backend_config):
     target = self.target
     optimizer = self.optimizer
     target.to_device(backend_config.device)
     optimizer.setup(target)
     optimizer.update()
     xp = backend.get_array_module(target[0].param)
     expected_data = xp.zeros(self.shape, dtype=self.dtype)
     rtol, atol = 1e-4, 1e-5
     if self.dtype is np.float16:
         rtol, atol = 1e-1, 1e-2
     for i in range(2):
         testing.assert_allclose(target[i].param.data,
                                 expected_data,
                                 rtol=rtol,
                                 atol=atol)
Esempio n. 4
0
    def update_core(self):
        loss = 0
        # When we pass one iterator and optimizer to StandardUpdater.__init__,
        # they are automatically named 'main'.
        train_iter = self.get_iterator('main')
        optimizer = self.get_optimizer('main')

        for i in range():
            img, batch = train_iter.__next__()
            x, t = self.converter(batch, self.device)
            optimizer.target.initial()

            optimizer.target()

        optimizer.cleargrads()
        loss.backward()
        loss.unchain_backward()
        optimizer.update()
Esempio n. 5
0
 def test_update(self, backend_config):
     if backend_config.device.name == '@cupy:1':
         # TODO(niboshi): Fix it
         raise unittest.SkipTest(
             'Loss scale does not work with cupy multi-device.')
     target = self.target
     optimizer = self.optimizer
     target.to_device(backend_config.device)
     optimizer.setup(target)
     optimizer.update()
     xp = backend.get_array_module(target[0].param)
     expected_data = xp.zeros(self.shape, dtype=self.dtype)
     rtol, atol = 1e-4, 1e-5
     if self.dtype is np.float16:
         rtol, atol = 1e-1, 1e-2
     for i in range(2):
         testing.assert_allclose(
             target[i].param.data, expected_data,
             rtol=rtol, atol=atol)
Esempio n. 6
0
 def test_update(self, backend_config):
     if backend_config.xp is chainerx:
         # ChainerX performs the loss scaling on its own backward
         # method, the optimizer should not divide back the parameters
         # This test is not actually creating a ChainerX
         # computation graph so no actual loss scale is being done
         self.optimizer.lr = 1.0
     target = self.target
     optimizer = self.optimizer
     target.to_device(backend_config.device)
     optimizer.setup(target)
     optimizer.update()
     xp = backend.get_array_module(target[0].param)
     expected_data = xp.zeros(self.shape, dtype=self.dtype)
     rtol, atol = 1e-4, 1e-5
     if self.dtype is np.float16:
         rtol, atol = 1e-1, 1e-2
     for i in range(2):
         testing.assert_allclose(
             target[i].param.data, expected_data,
             rtol=rtol, atol=atol)
Esempio n. 7
0
    def test_update(self, backend_config):
        device = backend_config.device
        override_pattern = self.override_pattern
        optimizer, call_record = self.create(device)

        optimizer.update()

        self.assertEqual(len(call_record), 3)

        # Detemine the expected method name that was called.
        if override_pattern == 'generic':
            method_name = 'update_core'
        elif override_pattern == 'cpu_gpu':
            if isinstance(device, backend.ChainerxDevice):
                xp = device.fallback_device.xp
            else:
                xp = device.xp

            if xp is np:
                method_name = 'update_core_cpu'
            else:
                assert xp is cuda.cupy
                method_name = 'update_core_gpu'
        elif override_pattern == 'cpu_gpu_chx':
            if isinstance(device, backend.ChainerxDevice):
                method_name = 'update_core_chainerx'
            elif device.xp is np:
                method_name = 'update_core_cpu'
            else:
                assert device.xp is cuda.cupy
                method_name = 'update_core_gpu'
        else:
            assert False, override_pattern

        # Check call record.
        # TODO(niboshi): Check the param argument as well.
        self.assertEqual(call_record[0][0], method_name)
        self.assertEqual(call_record[1][0], method_name)
        self.assertEqual(call_record[2][0], method_name)
Esempio n. 8
0
        return self.loss

model = L.Classifier(MLP())
optimizer = optimizers.SGD()
optimizer.setup(model)

batch_size = 100
data_size = 60000
for epoch in range(5):
    print('epoch %d' % epoch)
    indexes = np.random.permutation(data_size)
    for index in range(0, data_size, batch_size):
        x = Variable(x_train[indexes[index : index + batch_size]])
        t = Variable(y_train[indexes[index : index + batch_size]])
        print(index + batch_size)
        print(x_train[indexes[index : index + batch_size]])
        print(y_train[indexes[index : index + batch_size]])
        optimizer.update(model, x, t)

sum_loss, sum_accuracy = 0, 0
for index in range(0, 10000, batch_size):
    x = Variable(x_test[index : index + batch_size])
    t = Variable(y_test[index : index + batch_size])
    loss = model(x, t)
    sum_loss += loss.data * batch_size
    sum_accuracy += model.accuracy.data * batch_size

main_loss = sum_loss / 10000
main_accuracy = sum_accuracy / 10000
print(main_loss)
print(main_accuracy)
Esempio n. 9
0
def learn(csvfile):

    train, publictest, _ = dataFromCsv.dataFromCsv(csvfile)

    train_iter = iterators.SerialIterator(train,
                                          batch_size=BATCH,
                                          shuffle=True)
    publictest_iter = iterators.SerialIterator(publictest,
                                               batch_size=BATCH,
                                               repeat=False,
                                               shuffle=False)

    #学習したいモデルを決定、ただしモデルの出力はsoftmaxである必要がある
    model = Resnet.ResNet(class_labels=9)

    #GPU設定、GPUを使わない場合はコメントアウト可能
    chainer.cuda.get_device(GPU).use
    model.to_gpu()

    #最適化設定
    optimizer = chainer.optimizers.MomentumSGD(LEARN_RATE)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(WEIGHT_DECAY))

    #保存するモデル
    saved_model = model

    while train_iter.epoch < EPOCH:

        batch = train_iter.next()

        trainLossList = []

        x_array, y_array = convert.concat_examples(batch, GPU)

        x = chainer.Variable(x_array)
        y = chainer.Variable(y_array)
        m = model(x)

        loss_train = myCrossEntropyError(m, y)

        model.cleargrads()

        loss_train.backward()

        optimizer.update()

        trainLossList.append(chainer.cuda.to_cpu(loss_train.data))

        if train_iter.is_new_epoch:

            testLossList = []

            #毎エポック後のmodelの精度を求め、publictest適用に置いて最良のmodelを保存
            for batch in publictest_iter:
                x_array, y_array = convert.concat_examples(batch, GPU)
                x = chainer.Variable(x_array)
                y = chainer.Variable(y_array)
                m = model(x)

                loss_test = myCrossEntropyError(m, y)
                testLossList.append(chainer.cuda.to_cpu(loss_test.data))

                if loss_test.data == np.min(testLossList):
                    saved_model = model

            publictest_iter.reset()

            print("epo:" + str(train_iter.epoch) + " train_loss:" +
                  str(np.mean(trainLossList)) + " test_loss:" +
                  str(np.mean(testLossList)))

    serializers.save_npz(SAVE_MODEL, saved_model)

    return
Esempio n. 10
0
 def test_update(self, backend_config):
     target = self.target
     optimizer = self.optimizer
     target.to_device(backend_config.device)
     target.cleargrads()
     optimizer.update()
Esempio n. 11
0
        t.append(train_t[index:index + BATCH_COL_SIZE])
    x = np.array(x, dtype="float32")
    t = np.array(t, dtype="float32")
    loss = 0
    total_loss = 0
    model.reset()  # 勾配とメモリの初期化
    for i in range(BATCH_COL_SIZE):  # 各時刻おきにBATCH_ROW_SIZEごと読み込んで損失を計算する
        x_ = np.array([x[j, i] for j in range(BATCH_ROW_SIZE)],
                      dtype="float32")[:, np.newaxis]  # 時刻iの入力値
        t_ = np.array([t[j, i] for j in range(BATCH_ROW_SIZE)],
                      dtype="float32")[:, np.newaxis]  # 時刻i+1の値(=正解の予測値)
        loss += model(x=x_, t=t_, train=True)
    loss.backward()
    loss.unchain_backward()
    total_loss += loss.data
    optimizer.update()
    if (epoch + 1) % 100 == 0:
        ed = datetime.datetime.now()
        print("epoch:\t{}\ttotal loss:\t{}\ttime:\t{}".format(
            epoch + 1, total_loss, ed - st))
        st = datetime.datetime.now()

# 予測

print("\nPredict")
predict = np.empty(0)  # 予測時系列
inseq_size = 50
inseq = train_data[:inseq_size]  # 予測直前までの時系列
for _ in range(N - inseq_size):
    model.reset()  # メモリを初期化
    for i in inseq:  # モデルに予測直前までの時系列を読み込ませる
Esempio n. 12
0
def main():
    if os.path.exists('./data/corpus/dictionary.dict'):
        corpus = ConvCorpus(create_flg=False,
                            batch_size=BATCH_SIZE,
                            size_filter=True)
        corpus.load(load_dir='./data/corpus/')
    else:
        corpus = ConvCorpus(create_flg=True,
                            batch_size=BATCH_SIZE,
                            size_filter=True)
        corpus.save(save_dir='./data/corpus/')

    model = Seq2Seq(vocab_size=len(corpus.dic.token2id),
                    embed_size=EMBED_SIZE,
                    hidden_size=HIDDEN_SIZE,
                    batch_size=BATCH_SIZE,
                    flag_gpu=FLAG_GPU)
    model.reset()

    if FLAG_GPU:
        cuda.get_device(0).use()
        model.to_gpu(0)
        ARR = cuda.cupy
    else:
        ARR = np

    optimizer = optimizers.Adam(alpha=0.001)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(5))

    input_mat = []
    output_mat = []
    max_input_len = max_output_len = 0

    for input_text, output_text in zip(corpus.rough_posts, corpus.rough_cmnts):
        output_text.append(corpus.dic.token2id['<eos>'])

        max_input_len = max(max_input_len, len(input_text))
        max_output_len = max(max_output_len, len(output_text))

        input_mat.append(input_text)
        output_mat.append(output_text)

    for li in input_mat:
        insert_num = max_input_len - len(li)
        for _ in range(insert_num):
            li.insert(0, corpus.dic.token2id['<pad>'])
    for li in output_mat:
        insert_num = max_output_len - len(li)
        for _ in range(insert_num):
            li.append(corpus.dic.token2id['<pad>'])
    input_mat = np.array(input_mat, dtype=np.int32).T
    output_mat = np.array(output_mat, dtype=np.int32).T

    accum_loss = 0
    for num, epoch in enumerate(range(EPOCH_NUM)):
        total_loss = 0
        batch_num = 0
        perm = np.random.permutation(len(corpus.rough_posts))

        #assert len(corpus.rough_posts)//BATCH_SIZE == 0
        for i in range(0, len(corpus.rough_posts), BATCH_SIZE):
            input_batch = input_mat[:, perm[i:i + BATCH_SIZE]]
            output_batch = output_mat[:, perm[i:i + BATCH_SIZE]]

            model.reset()
            model.encode(input_batch)

            end_batch = ARR.array(
                [corpus.dic.token2id['<start>'] for _ in range(BATCH_SIZE)])
            first_words = output_batch[0]
            loss, predict_mat = model.decode(end_batch,
                                             first_words,
                                             train=True)
            next_ids = first_words
            accum_loss += loss
            for w_ids in output_batch[1:]:
                loss, predict_mat = model.decode(next_ids, w_ids, train=True)
                next_ids = w_ids
                accum_loss += loss

            model.cleargrads()
            accum_loss.backward()
            optimizer.update()
            total_loss += float(accum_loss.data)
            batch_num += 1
            print('Epoch:', num + 1, 'batch:', batch_num,
                  'batch loss:{:.2f}'.format(float(accum_loss.data)))
            accum_loss = 0

        if (epoch + 1) % 2 == 0:
            serializers.save_hdf5('./data/model/{}.model'.format(epoch + 1),
                                  model)
            serializers.save_hdf5('./data/model/{}.state'.format(epoch + 1),
                                  optimizer)
Esempio n. 13
0
def main():
    # arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir', type=str, default='data/dazai')
    parser.add_argument('--checkpoint_dir', type=str, default='model')
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--rnn_size', type=int, default=128)
    parser.add_argument('--learning_rate', type=float, default=2e-3)
    parser.add_argument('--learning_rate_decay', type=float, default=0.97)
    parser.add_argument('--learning_rate_decay_after', type=int, default=10)
    parser.add_argument('--decay_rate', type=float, default=0.95)
    parser.add_argument('--dropout', type=float, default=0.0)
    parser.add_argument('--seq_length', type=int, default=50)
    parser.add_argument('--batchsize', type=int, default=50)
    parser.add_argument('--epochs', type=int, default=50)
    parser.add_argument('--grad_clip', type=int, default=5)
    parser.add_argument('--init_from', type=str, default='')
    parser.add_argument('--enable_checkpoint', type=bool, default=True)
    parser.add_argument('--file_name', type=str, default='input.txt')
    args = parser.parse_args()

    if not os.path.exists(args.checkpoint_dir):
        os.mkdir(args.checkpoint_dir)

    n_epochs = args.epochs
    n_units = args.rnn_size
    batchsize = args.batchsize
    bprop_len = args.seq_length
    grad_clip = args.grad_clip

    xp = cuda.cupy if args.gpu >= 0 else np

    train_data, words, vocab = load_data(args.data_dir, args.file_name)
    pickle.dump(vocab, open('%s/vocab.bin' % args.data_dir, 'wb'))

    if len(args.init_from) > 0:
        model = pickle.load(open(args.init_from, 'rb'))
    else:
        model = CharRNN(len(vocab), n_units)

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()

    optimizer = optimizers.RMSprop(lr=args.learning_rate,
                                   alpha=args.decay_rate,
                                   eps=1e-8)
    #optimizer = chainer.optimizers.SGD(lr=1.0)
    optimizer.setup(model)
    optimizer.add_hook(
        chainer.optimizer.GradientClipping(grad_clip))  #勾配の上限を設定

    whole_len = train_data.shape[0]
    #jump         = whole_len / batchsize
    jump = int(whole_len / batchsize)
    epoch = 0
    start_at = time.time()
    cur_at = start_at
    state = make_initial_state(n_units, batchsize=batchsize)
    if args.gpu >= 0:
        accum_loss = Variable(xp.zeros(()).astype(np.float32))
        for key, value in state.items():
            value.data = cuda.to_gpu(value.data)
    else:
        accum_loss = Variable(xp.zeros(()).astype(np.float32))

    print('going to train {} iterations'.format(jump * n_epochs / bprop_len))
    sum_perp = 0
    count = 0
    iteration = 0
    for i in range(jump * n_epochs):
        x_batch = xp.array([
            train_data[(jump * j + i) % whole_len] for j in xrange(batchsize)
        ])
        y_batch = xp.array([
            train_data[(jump * j + i + 1) % whole_len]
            for j in xrange(batchsize)
        ])

        if args.gpu >= 0:
            x_batch = cuda.to_gpu(x_batch)
            y_batch = cuda.to_gpu(y_batch)

        state, loss_i = model.forward_one_step(x_batch,
                                               y_batch,
                                               state,
                                               dropout_ratio=args.dropout)
        accum_loss += loss_i
        count += 1

        if (i + 1) % bprop_len == 0:  # Run truncated BPTT
            iteration += 1
            sum_perp += accum_loss.data
            now = time.time()
            #print('{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at))
            print('{}/{}, train_loss = {}, time = {:.2f}'.format(
                (i + 1) / bprop_len, jump * n_epochs / bprop_len,
                accum_loss.data / bprop_len, now - cur_at))
            cur_at = now

            model.cleargrads()
            #optimizer.zero_grads()
            accum_loss.backward()
            accum_loss.unchain_backward()  # truncate
            #accum_loss = Variable(xp.zeros(()).astype(np.float32))
            if args.gpu >= 0:
                accum_loss = Variable(xp.zeros(()).astype(np.float32))
                #accum_loss = Variable(cuda.zeros(()))
            else:
                accum_loss = Variable(np.zeros((), dtype=np.float32))
            #optimizer.clip_grads(grad_clip)
            optimizer.update()

        if (i + 1) % 1000 == 0:
            print('epoch: ', epoch)
            print('iteration: ', iteration)
            print('training perplexity: ', np.exp(float(sum_perp) / count))
            sum_perp = 0
            count = 0

        if args.enable_checkpoint:
            if (i + 1) % 10000 == 0:
                fn = ('%s/charrnn_epoch_%.2f.chainermodel' %
                      (args.checkpoint_dir, float(i) / jump))
                pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb'))
                pickle.dump(
                    copy.deepcopy(model).to_cpu(),
                    open('%s/latest.chainermodel' % (args.checkpoint_dir),
                         'wb'))

        if (i + 1) % jump == 0:
            epoch += 1

            if epoch >= args.learning_rate_decay_after:
                optimizer.lr *= args.learning_rate_decay
                print('decayed learning rate by a factor {} to {}'.format(
                    args.learning_rate_decay, optimizer.lr))

        sys.stdout.flush()
def Training(str="", weight_load=False):
    print("\nTraining\n")

    # 教師データ
    df = pd.read_csv('nikkei-225-index-historical-chart-data.csv', header=8)
    #mat = df.query('date.str.match("^2019-")', engine='python')
    mat = df.query('date.str.match(' + str + ')', engine='python')
    train_data_t = mat[' value'].values
    print(train_data_t)

    train_data = np.arange(len(train_data_t), dtype="float32")

    for i in range(len(train_data_t) - 1):
        train_data[i] = train_data_t[i + 1] - train_data_t[i]

    gain = np.max(train_data) - np.min(train_data)
    gain = gain / 2

    train_data = train_data / gain

    print(train_data)
    # 入力データと教師データを作成
    train_x, train_t = [], []
    for i in range(len(train_data) - 1):
        train_x.append(train_data[i])
        train_t.append(train_data[i + 1])
    train_x = np.array(train_x, dtype="float32")
    train_t = np.array(train_t, dtype="float32")
    Num = len(train_x)

    # モデル定義
    model = LSTM(in_size=IN_SIZE, hidden_size=HIDDEN_SIZE, out_size=OUT_SIZE)
    optimizer = optimizers.Adam()
    optimizer.setup(model)

    if weight_load:
        serializers.load_npz("mymodel.npz", model)

    # 学習開始
    print("Train")
    st = datetime.datetime.now()
    for epoch in range(EPOCH_NUM):

        # ミニバッチ学習
        x, t = [], []
        #  ミニバッチ学習データとして、時系列全体から、BATCH_COL_SIZE分の時系列を抜き出したものを、BATCH_ROW_SIZE個用意する
        for i in range(BATCH_ROW_SIZE):
            # ランダムな箇所、ただしBATCH_COL_SIZE分だけ抜き取れる場所から選ぶ
            # (indexの末端がBATCH_COL_SIZEを超えない部分でリミットを掛ける)
            index = np.random.randint(0, Num - BATCH_COL_SIZE + 1)
            x.append(train_x[index:index +
                             BATCH_COL_SIZE])  # BATCH_COL_SIZE分の時系列を取り出す
            t.append(train_t[index:index + BATCH_COL_SIZE])
        x = np.array(x, dtype="float32")
        t = np.array(t, dtype="float32")
        loss = 0
        total_loss = 0
        model.reset()  # 勾配とメモリの初期化
        for i in range(BATCH_COL_SIZE):  # 各時刻おきにBATCH_ROW_SIZEごと読み込んで損失を計算する
            x_ = np.array([x[j, i] for j in range(BATCH_ROW_SIZE)],
                          dtype="float32")[:, np.newaxis]  # 時刻iの入力値
            t_ = np.array([t[j, i] for j in range(BATCH_ROW_SIZE)],
                          dtype="float32")[:, np.newaxis]  # 時刻i+1の値(=正解の予測値)
            loss += model(x=x_, t=t_, train=True)  # 誤差合計
        loss.backward()  # 誤差逆伝播
        loss.unchain_backward()
        total_loss += loss.data
        optimizer.update()
        if (epoch + 1) % 100 == 0:
            ed = datetime.datetime.now()
            print("epoch:\t{}\ttotal loss:\t{}\ttime:\t{}".format(
                epoch + 1, total_loss, ed - st))
            st = datetime.datetime.now()

    serializers.save_npz("mymodel.npz", model)  # npz形式で書き出し