Exemple #1
0
def decode(
        words: List[str],
        encoder_model,
        decoder_model,
        dictionary: corpora.Dictionary,
        order: int=1,
) -> List[str]:

    assert words[-1] is not config.END_SYMBOL

    input_words_with_s = tokens2ids(words, dictionary, verbose=True)
    # input words の hidden state を求める
    ys, state = encoder_model.predictor.forward(
        [Variable(
            decoder_model.xp.array([word], dtype=decoder_model.xp.int32)
        ) for word in input_words_with_s],
        state=None,
        dropout=False,
        train=False
    )

    word = dictionary.token2id[config.END_SYMBOL]
    lst = [config.END_SYMBOL]

    while True:
        y, state = decoder_model.predictor.forward_one(
            Variable(
                decoder_model.xp.array([word], dtype=decoder_model.xp.int32)
            ),
            state,
            dropout=False,
            train=False
        )
        if len(lst) == 1:
            word = y.data[0].argsort()[-order]
        else:
            word = y.data[0].argmax()

        lst.append(dictionary[word])
        if dictionary[word] == config.END_SYMBOL or len(lst) >= 100:
            return lst
Exemple #2
0
def decode(
    words: List[str],
    encoder_model,
    decoder_model,
    dictionary: corpora.Dictionary,
    order: int = 1,
) -> List[str]:

    assert words[-1] is not config.END_SYMBOL

    input_words_with_s = tokens2ids(words, dictionary, verbose=True)
    # input words の hidden state を求める
    ys, state = encoder_model.predictor.forward([
        Variable(decoder_model.xp.array([word], dtype=decoder_model.xp.int32))
        for word in input_words_with_s
    ],
                                                state=None,
                                                dropout=False,
                                                train=False)

    word = dictionary.token2id[config.END_SYMBOL]
    lst = [config.END_SYMBOL]

    while True:
        y, state = decoder_model.predictor.forward_one(Variable(
            decoder_model.xp.array([word], dtype=decoder_model.xp.int32)),
                                                       state,
                                                       dropout=False,
                                                       train=False)
        if len(lst) == 1:
            word = y.data[0].argsort()[-order]
        else:
            word = y.data[0].argmax()

        lst.append(dictionary[word])
        if dictionary[word] == config.END_SYMBOL or len(lst) >= 100:
            return lst
Exemple #3
0
def train_encoder_decoder(
    model,
    dictionary: corpora.Dictionary,
    conversation_file: str,
    decoder_model_dir: str,
    epoch_size: int=100,
    batch_size: int=30,
    dropout: bool=False,
    gpu: bool=False
) -> None:
    if gpu >= 0:
        model.to_gpu()
        print(model.xp)

    # setup SGD optimizer
    # opt = optimizers.SGD()
    opt = optimizers.Adam()
    opt.setup(model)

    # optimizer hooks
    clip_threshold = 5.0
    print("set optimizer clip threshold: {}".format(clip_threshold))
    opt.add_hook(chainer.optimizer.GradientClipping(clip_threshold))

    # load conversation sentences
    conversation = load_conversation(conversation_file, dictionary)
    data_size = len(conversation)

    print("data size: {}".format(data_size))
    for epoch in range(epoch_size):
        print("running epoch {}".format(epoch))
        indexes = np.random.permutation(range(data_size))
        epoch_loss = 0  # int

        for bat_i in range(0, data_size, batch_size):
            forward_start_time = datetime.now()

            for index in indexes[bat_i:bat_i + batch_size]:
                pair_words = conversation[index]

                # encoder input words
                orig_words = pair_words[0][:-1]  # remove END_SYMBOL
                reply_words = pair_words[1]
                if orig_words:
                    assert orig_words[-1] is not config.END_SYMBOL
                input_words_with_s = tokens2ids(orig_words, dictionary)
                ys, state = model.predictor.forward(
                    [Variable(
                        model.xp.array(
                            [word],
                            dtype=model.xp.int32
                        )
                    ) for word in input_words_with_s],
                    state=None,
                    dropout=dropout,
                    train=True
                )

                # decode
                assert reply_words[0] == config.END_SYMBOL
                assert reply_words[-1] == config.END_SYMBOL
                output_words_with_s = tokens2ids(reply_words, dictionary)
                batch_loss = Variable(model.xp.zeros((), dtype=np.float32))
                try:
                    new_loss = model(
                        output_words_with_s,
                        state=state,  # init_state を input の state にする
                        dropout=dropout,
                        train=True
                    )
                    batch_loss += new_loss
                except Exception:
                    print(index, input_words_with_s)
                    import traceback
                    traceback.print_exc()
            # 平均化
            batch_size_array = model.xp.array(
                batch_size,
                dtype=model.xp.float32
            )
            batch_loss = batch_loss / Variable(batch_size_array)
            epoch_loss += batch_loss.data

            # 時間計測
            forward_end_time = datetime.now()

            # 最適化
            opt_start_time = datetime.now()
            model.zerograds()
            batch_loss.backward()
            opt.update()
            opt_end_time = datetime.now()

            forward_delta = forward_end_time - forward_start_time
            opt_delta = opt_end_time - opt_start_time
            # print(
            #     ("decoder epoch {} batch {}: loss {}, "
            #      "forward {}, optimizer {},").format(
            #         epoch,
            #         int(bat_i / batch_size),
            #         batch_loss.data,
            #         forward_delta,
            #         opt_delta,
            #     )
            # )
            print_fmt = (
                "epoch {} batch {}: "
                "loss {}, grad L2 norm: {}, forward {}, optimizer {}"
            )
            print(print_fmt.format(
                epoch,
                int(bat_i / batch_size),
                batch_loss.data,
                opt.compute_grads_norm(),
                forward_delta,
                opt_delta,
            ))
            # save
            if ((bat_i / batch_size) + 1) % 100 == 0:
                serializers.save_npz(
                    os.path.join(
                        decoder_model_dir,
                        "model.npz"
                    ),
                    model
                )
            if ((bat_i / batch_size) + 1) % 1000 == 0:
                serializers.save_npz(
                    os.path.join(
                        decoder_model_dir,
                        "model_{}_{}_{}.npz".format(
                            epoch,
                            int(bat_i / batch_size) + 1,
                            datetime.now().strftime("%Y%m%d-%H%M%S")
                        )
                    ),
                    model
                )
        print("finish epoch {}, loss {}".format(
            epoch,
            epoch_loss / math.ceil(data_size / batch_size)
        ))
        # save
        serializers.save_npz(
            os.path.join(
                decoder_model_dir,
                "model.npz"
            ),
            model
        )
        serializers.save_npz(
            os.path.join(
                decoder_model_dir,
                "model_{}_{}_{}.npz".format(
                    epoch,
                    int(bat_i / batch_size) + 1,
                    datetime.now().strftime("%Y%m%d-%H%M%S")
                )
            ),
            model
        )
Exemple #4
0
def train_encoder(
    model,
    dictionary: corpora.Dictionary,
    sentence_file: str,
    model_dir: str,
    epoch_size: int=100,
    batch_size: int=30,
    dropout: bool=True,
    gpu: bool=False
) -> None:
    if gpu >= 0:
        model.to_gpu()
        print(model.xp)

    # setup SGD optimizer
    opt = optimizers.SGD()
    opt.setup(model)

    # optimizer hooks
    clip_threshold = 5.0
    print("set optimizer clip threshold: {}".format(clip_threshold))
    opt.add_hook(chainer.optimizer.GradientClipping(clip_threshold))

    # load conversation sentences
    sentences = load_sentence(sentence_file)
    data_size = len(sentences)

    print("data size: {}".format(data_size))
    for epoch in range(epoch_size):
        print("epoch {}".format(epoch))

        indexes = np.random.permutation(data_size)
        epoch_loss = 0  # int

        for bat_i in range(0, data_size, batch_size):
            forward_start_time = datetime.now()
            batch_loss = Variable(model.xp.zeros((), dtype=model.xp.float32))

            for index in indexes[bat_i:bat_i + batch_size]:
                input_words = sentences[index]
                # id のリストに変換する
                input_words_with_s = tokens2ids(
                    input_words,
                    dictionary,
                    verbose=False
                )

                # フォワード
                try:
                    new_loss = model(
                        input_words_with_s,
                        dropout=dropout,
                        state=None,
                        train=True
                    )
                    if model.xp.isnan(new_loss.data):
                        sys.exit(1)

                    batch_loss += new_loss
                except Exception:
                    print(index, input_words_with_s)
                    import traceback
                    traceback.print_exc()

            # 平均化
            batch_size_array = model.xp.array(
                batch_size,
                dtype=model.xp.float32
            )
            # if gpu:
            #     batch_size_array = cuda.to_gpu(batch_size_array)
            batch_loss = batch_loss / Variable(batch_size_array)
            epoch_loss += batch_loss.data

            # 時間計測
            forward_end_time = datetime.now()

            # 最適化
            opt_start_time = datetime.now()
            model.zerograds()
            batch_loss.backward()
            opt.update()
            opt_end_time = datetime.now()

            forward_delta = forward_end_time - forward_start_time
            opt_delta = opt_end_time - opt_start_time

            print_fmt = (
                "epoch {} batch {}: "
                "loss {}, grad L2 norm: {}, forward {}, optimizer {}"
            )
            print(print_fmt.format(
                epoch,
                int(bat_i / batch_size),
                batch_loss.data,
                opt.compute_grads_norm(),
                forward_delta,
                opt_delta,
            ))
            # save
            if ((bat_i / batch_size) + 1) % 100 == 0:
                serializers.save_npz(
                    os.path.join(
                        model_dir,
                        "model.npz"
                    ),
                    model
                )
            if ((bat_i / batch_size) + 1) % 1000 == 0:
                serializers.save_npz(
                    os.path.join(
                        model_dir,
                        "model_{}_{}_{}.npz".format(
                            epoch,
                            int(bat_i / batch_size) + 1,
                            datetime.now().strftime("%Y%m%d-%H%M%S")
                        )
                    ),
                    model
                )
        print("finish epoch {}, loss {}".format(
            epoch,
            epoch_loss / epoch_size
        ))
        # save
        serializers.save_npz(
            os.path.join(
                model_dir,
                "model.npz"
            ),
            model
        )
        serializers.save_npz(
            os.path.join(
                model_dir,
                "model_{}_{}_{}.npz".format(
                    epoch,
                    int(bat_i / batch_size) + 1,
                    datetime.now().strftime("%Y%m%d-%H%M%S")
                )
            ),
            model
        )
Exemple #5
0
def train_encoder_decoder(model,
                          dictionary: corpora.Dictionary,
                          conversation_file: str,
                          decoder_model_dir: str,
                          epoch_size: int = 100,
                          batch_size: int = 30,
                          dropout: bool = False,
                          gpu: bool = False) -> None:
    if gpu >= 0:
        model.to_gpu()
        print(model.xp)

    # setup SGD optimizer
    # opt = optimizers.SGD()
    opt = optimizers.Adam()
    opt.setup(model)

    # optimizer hooks
    clip_threshold = 5.0
    print("set optimizer clip threshold: {}".format(clip_threshold))
    opt.add_hook(chainer.optimizer.GradientClipping(clip_threshold))

    # load conversation sentences
    conversation = load_conversation(conversation_file, dictionary)
    data_size = len(conversation)

    print("data size: {}".format(data_size))
    for epoch in range(epoch_size):
        print("running epoch {}".format(epoch))
        indexes = np.random.permutation(range(data_size))
        epoch_loss = 0  # int

        for bat_i in range(0, data_size, batch_size):
            forward_start_time = datetime.now()

            for index in indexes[bat_i:bat_i + batch_size]:
                pair_words = conversation[index]

                # encoder input words
                orig_words = pair_words[0][:-1]  # remove END_SYMBOL
                reply_words = pair_words[1]
                if orig_words:
                    assert orig_words[-1] is not config.END_SYMBOL
                input_words_with_s = tokens2ids(orig_words, dictionary)
                ys, state = model.predictor.forward([
                    Variable(model.xp.array([word], dtype=model.xp.int32))
                    for word in input_words_with_s
                ],
                                                    state=None,
                                                    dropout=dropout,
                                                    train=True)

                # decode
                assert reply_words[0] == config.END_SYMBOL
                assert reply_words[-1] == config.END_SYMBOL
                output_words_with_s = tokens2ids(reply_words, dictionary)
                batch_loss = Variable(model.xp.zeros((), dtype=np.float32))
                try:
                    new_loss = model(
                        output_words_with_s,
                        state=state,  # init_state を input の state にする
                        dropout=dropout,
                        train=True)
                    batch_loss += new_loss
                except Exception:
                    print(index, input_words_with_s)
                    import traceback
                    traceback.print_exc()
            # 平均化
            batch_size_array = model.xp.array(batch_size,
                                              dtype=model.xp.float32)
            batch_loss = batch_loss / Variable(batch_size_array)
            epoch_loss += batch_loss.data

            # 時間計測
            forward_end_time = datetime.now()

            # 最適化
            opt_start_time = datetime.now()
            model.zerograds()
            batch_loss.backward()
            opt.update()
            opt_end_time = datetime.now()

            forward_delta = forward_end_time - forward_start_time
            opt_delta = opt_end_time - opt_start_time
            # print(
            #     ("decoder epoch {} batch {}: loss {}, "
            #      "forward {}, optimizer {},").format(
            #         epoch,
            #         int(bat_i / batch_size),
            #         batch_loss.data,
            #         forward_delta,
            #         opt_delta,
            #     )
            # )
            print_fmt = ("epoch {} batch {}: "
                         "loss {}, grad L2 norm: {}, forward {}, optimizer {}")
            print(
                print_fmt.format(
                    epoch,
                    int(bat_i / batch_size),
                    batch_loss.data,
                    opt.compute_grads_norm(),
                    forward_delta,
                    opt_delta,
                ))
            # save
            if ((bat_i / batch_size) + 1) % 100 == 0:
                serializers.save_npz(
                    os.path.join(decoder_model_dir, "model.npz"), model)
            if ((bat_i / batch_size) + 1) % 1000 == 0:
                serializers.save_npz(
                    os.path.join(
                        decoder_model_dir, "model_{}_{}_{}.npz".format(
                            epoch,
                            int(bat_i / batch_size) + 1,
                            datetime.now().strftime("%Y%m%d-%H%M%S"))), model)
        print("finish epoch {}, loss {}".format(
            epoch, epoch_loss / math.ceil(data_size / batch_size)))
        # save
        serializers.save_npz(os.path.join(decoder_model_dir, "model.npz"),
                             model)
        serializers.save_npz(
            os.path.join(
                decoder_model_dir, "model_{}_{}_{}.npz".format(
                    epoch,
                    int(bat_i / batch_size) + 1,
                    datetime.now().strftime("%Y%m%d-%H%M%S"))), model)
Exemple #6
0
def train_encoder(model,
                  dictionary: corpora.Dictionary,
                  sentence_file: str,
                  model_dir: str,
                  epoch_size: int = 100,
                  batch_size: int = 30,
                  dropout: bool = True,
                  gpu: bool = False) -> None:
    if gpu >= 0:
        model.to_gpu()
        print(model.xp)

    # setup SGD optimizer
    opt = optimizers.SGD()
    opt.setup(model)

    # optimizer hooks
    clip_threshold = 5.0
    print("set optimizer clip threshold: {}".format(clip_threshold))
    opt.add_hook(chainer.optimizer.GradientClipping(clip_threshold))

    # load conversation sentences
    sentences = load_sentence(sentence_file)
    data_size = len(sentences)

    print("data size: {}".format(data_size))
    for epoch in range(epoch_size):
        print("epoch {}".format(epoch))

        indexes = np.random.permutation(data_size)
        epoch_loss = 0  # int

        for bat_i in range(0, data_size, batch_size):
            forward_start_time = datetime.now()
            batch_loss = Variable(model.xp.zeros((), dtype=model.xp.float32))

            for index in indexes[bat_i:bat_i + batch_size]:
                input_words = sentences[index]
                # id のリストに変換する
                input_words_with_s = tokens2ids(input_words,
                                                dictionary,
                                                verbose=False)

                # フォワード
                try:
                    new_loss = model(input_words_with_s,
                                     dropout=dropout,
                                     state=None,
                                     train=True)
                    if model.xp.isnan(new_loss.data):
                        sys.exit(1)

                    batch_loss += new_loss
                except Exception:
                    print(index, input_words_with_s)
                    import traceback
                    traceback.print_exc()

            # 平均化
            batch_size_array = model.xp.array(batch_size,
                                              dtype=model.xp.float32)
            # if gpu:
            #     batch_size_array = cuda.to_gpu(batch_size_array)
            batch_loss = batch_loss / Variable(batch_size_array)
            epoch_loss += batch_loss.data

            # 時間計測
            forward_end_time = datetime.now()

            # 最適化
            opt_start_time = datetime.now()
            model.zerograds()
            batch_loss.backward()
            opt.update()
            opt_end_time = datetime.now()

            forward_delta = forward_end_time - forward_start_time
            opt_delta = opt_end_time - opt_start_time

            print_fmt = ("epoch {} batch {}: "
                         "loss {}, grad L2 norm: {}, forward {}, optimizer {}")
            print(
                print_fmt.format(
                    epoch,
                    int(bat_i / batch_size),
                    batch_loss.data,
                    opt.compute_grads_norm(),
                    forward_delta,
                    opt_delta,
                ))
            # save
            if ((bat_i / batch_size) + 1) % 100 == 0:
                serializers.save_npz(os.path.join(model_dir, "model.npz"),
                                     model)
            if ((bat_i / batch_size) + 1) % 1000 == 0:
                serializers.save_npz(
                    os.path.join(
                        model_dir, "model_{}_{}_{}.npz".format(
                            epoch,
                            int(bat_i / batch_size) + 1,
                            datetime.now().strftime("%Y%m%d-%H%M%S"))), model)
        print("finish epoch {}, loss {}".format(epoch,
                                                epoch_loss / epoch_size))
        # save
        serializers.save_npz(os.path.join(model_dir, "model.npz"), model)
        serializers.save_npz(
            os.path.join(
                model_dir, "model_{}_{}_{}.npz".format(
                    epoch,
                    int(bat_i / batch_size) + 1,
                    datetime.now().strftime("%Y%m%d-%H%M%S"))), model)