Exemple #1
0
def main():
    parser = argparse.ArgumentParser(description='learning main')
    parser.add_argument('--loop', '-l', default=0, type=int,
                        help='Set the number of steps to resume learning')
    parser.add_argument('--resume', '-r', type=str, default="",
                        help='set whether to resume learning')

    args = parser.parse_args()

    # train data作成
    word_list = get_word_lists("./aozora_text/files/files_all_rnp.txt")
    stop_word_list = get_word_lists(
        "./aozora_text/files/stop_files_all_rnp.txt")
    ds = DataShaping()

    if args.resume == "resume":
        seq2seq = Seq2Seq("resume")
    else:
        seq2seq = Seq2Seq("train")

    for i in range(args.loop, len(word_list)):
        train = ds.make_data_train(stop_word_list, i)
        teach, target = ds.make_data_teach_target(word_list, i)
        seq2seq.train(train, teach, target)
        seq2seq.save_model()
Exemple #2
0
def main():
    # train data作成
    word_list = get_word_lists("./aozora_text/files/files_all_tmp.txt")
    # stop_word_list = get_word_lists(
    #     "./aozora_text/files/stop_files_all_tmp.txt")
    ds = DataShaping()
    seq2seq = Seq2Seq("make")

    st = StringOperation()
    start_token = np.array([st.sentens_array_to_vec(["BOS"])])
    sentens = word_list[rand.randint(0, len(word_list) - 1)][1:]
    while('' in sentens):
        sentens.remove('')
    sentens = str(sentens)
    w = WakachiMethod(Wakachi)

    for _ in range(3):
        print("sentens:", sentens)
        sentens_rm_stop_word = w.remove_stopword(sentens)
        print("rm stop word sentens", sentens_rm_stop_word)
        sentens_vec = np.array([st.sentens_array_to_vec(sentens_rm_stop_word)])
        sentens_vec = seq2seq.make_sentens_vec(sentens_vec, start_token)
        sentens_vec = np.array(sentens_vec).reshape(len(sentens_vec), 128)
        sentens_arr = st.sentens_vec_to_sentens_arr_prob(sentens_vec)
        sentens = str(sentens_arr)
        print(sentens)
Exemple #3
0
def test():
    print('building model...')
    voc = Voc()
    seq2seq = Seq2Seq(voc.num_words).to(args.device)
    param_optimizer = args.optimiser(seq2seq.parameters(),
                                     lr=args.learning_rate)
    decoder_optimizer = args.optimiser(seq2seq.decoder.parameters(),
                                       lr=args.learning_rate *
                                       args.decoder_ratio)
    print('done')

    if args.param_file is None:
        print('please specify the saved param file.')
        exit(-1)
    else:
        print('loading saved parameters from ' + args.param_file + '...')
        checkpoint = torch.load(args.param_file)
        seq2seq.load_state_dict(checkpoint['model'])
        param_optimizer.load_state_dict(checkpoint['opt'])
        decoder_optimizer.load_state_dict(checkpoint['de_opt'])
        voc = checkpoint['voc']
        print('done')

    print('loading test data...')
    test_set = FruitSeqDataset(voc, dataset_file_path=args.test_file)
    print('done')

    test_seq_acc, test_tok_acc, test_loss = eval_model(seq2seq, test_set)
    print(
        "[TEST]Loss: {:.4f}; Seq-level Accuracy: {:.4f}; Tok-level Accuracy: {:.4f}"
        .format(test_loss, test_seq_acc * 100, test_tok_acc * 100))
Exemple #4
0
def test(args, vocab_size):
    device = t.device('cuda') if args.use_gpu else t.device('cpu')
    beam_size = args.beam_size
    topk = args.topk
    rev_model = args.load_model_path
    # print(rev_model)
    model = Seq2Seq(embed_size=args.embed_size,
                    enc_dec_output_size=args.enc_dec_output_size,
                    attn_size=args.attn_size,
                    num_layers=args.num_layers,
                    bidirectional=args.bidirectional,
                    use_gpu=args.use_gpu,
                    vocab_size=vocab_size).to(device)

    assert rev_model is not None

    # 读取已经保存的模型
    rev_path = os.path.join(model_dir, rev_model)
    if os.path.exists(rev_path):
        print('read in model from', rev_path)
        model.load(load_path=rev_path)

    batch_size = args.batch_size
    test_set = Set(read_data(args.test_data_root))
    test_loader = Loader(test_set,
                         batch_size,
                         shuffle=False,
                         use_gpu=args.use_gpu,
                         num_workers=args.num_workers).loader

    model.eval()
    with t.no_grad():
        recorder.epoch_start(0, 'test', len(test_set))
        for batch_id, batch in enumerate(test_loader):
            encoder_inputs, seq_len, decoder_inputs, weights = batch
            encoder_inputs = encoder_inputs.to(device)
            seq_len = seq_len.to(device)
            decoder_inputs = decoder_inputs.to(device)
            weights = weights.to(device)
            encoder_inputs.to(device)
            logits, output_symbols = model(
                encoder_inputs,
                seq_len,
                decoder_inputs[:, :-1],
                mode='test',
                max_len=args.max_len,
                beam_search=False if args.beam_size == 1 else True,
                beam_size=args.beam_size,
                topk=args.topk)

            nll_loss = compute_loss(logits, decoder_inputs[:, 1:], weights)
            ppl = perplexity(nll_loss)
            recorder.batch_end(batch_id, batch_size, nll_loss, ppl)
            recorder.log_text(encoder_inputs.tolist(),
                              decoder_inputs[:, 1:].tolist(),
                              output_symbols.tolist())
        recorder.epoch_end()
                            shuffle=True,
                            num_workers=16,
                            pin_memory=True)
    test_loader = DataLoader(test_set,
                             batch_size=batch_size,
                             shuffle=True,
                             num_workers=16,
                             pin_memory=True)
    # Create Model
    encoder = Encoder(lstm_hidden_size=enc_hid_dim, arch="resnet18").to(device)
    decoder = Decoder(output_dim=vocab_size,
                      emb_dim=emb_dim,
                      enc_hid_dim=enc_hid_dim,
                      dec_hid_dim=dec_hid_dim,
                      dropout=dropout).to(device)
    model = Seq2Seq(encoder=encoder, decoder=decoder, device=device).to(device)
    # Resume model
    if checkpoint is not None:
        start_epoch, best_wer = resume_model(model, checkpoint)
    # Run the model parallelly
    if torch.cuda.device_count() > 1:
        print("Using {} GPUs".format(torch.cuda.device_count()))
        model = nn.DataParallel(model)
    # Create loss criterion & optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           weight_decay=weight_decay)

    # Start evaluation
    print("Evaluation Started".center(60, '#'))
Exemple #6
0

if __name__ == "__main__":
    config = Config()
    device = config.device

    attn = Attention(config.s2s_enc_hid, config.s2s_dec_hid)

    enc = Encoder(config.s2s_emb_dim, config.s2s_enc_hid, config.s2s_dec_hid,
                  config.s2s_enc_dropout)

    dec = Decoder(len(config.class_char), config.s2s_emb_dim,
                  config.s2s_enc_hid, config.s2s_dec_hid,
                  config.s2s_enc_dropout, attn)

    model = Seq2Seq(enc, dec, device).to(device)

    model.apply(init_weights)
    model.load_state_dict(torch.load('weight/s2s.pt'))
    model.eval()

    data = gen(["data/test/96.json"], 1, config.max_box_num, device)

    with torch.no_grad():
        src, trg = next(data)
        output = model(src)
        output = output.permute(1, 0,
                                2).contiguous().view(-1,
                                                     len(config.class_char))
        output = torch.max(F.softmax(output, dim=1), 1)
        possible, label = output.values, output.indices
Exemple #7
0
def train(args, vocab_size):
    # opt._parse(kwarg)
    print('enter train func')
    device = t.device('cuda') if args.use_gpu else t.device('cpu')
    model = Seq2Seq(embed_size=args.embed_size,
                    enc_dec_output_size=args.enc_dec_output_size,
                    attn_size=args.attn_size,
                    num_layers=args.num_layers,
                    bidirectional=args.bidirectional,
                    use_gpu=args.use_gpu,
                    vocab_size=vocab_size).to(device)

    print('Model structure')
    print(model)
    print('The model has %d parameters' % count_parameters(model))

    if args.load_model_path is not None:
        rev_path = os.path.join(model_dir, args.load_model_path)
        if os.path.exists(rev_path):
            print('read in model from', rev_path)
            last_epoch = model.load(load_path=rev_path,
                                    return_list=['epoch'])[0]
            start_epoch = last_epoch + 1

    else:
        start_epoch = 1
        last_epoch = -1
    optimizer = Adam(model.parameters(), lr=args.lr)
    if args.scheduler_type == 'exponential':
        scheduler = lr_scheduler.ExponentialLR(optimizer,
                                               gamma=args.exponential_lr_decay,
                                               last_epoch=last_epoch)
    elif args.scheduler_type == 'step':
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=args.step_size,
                                        gamma=args.step_lr_decay)
    print('read in data')

    # 读取数据
    batch_size = args.batch_size
    train_set = Set(read_data(args.train_data_root))
    valid_set = Set(read_data(args.valid_data_root))
    # 构造dataloader
    train_loader = Loader(train_set,
                          batch_size,
                          shuffle=True,
                          use_gpu=args.use_gpu,
                          num_workers=args.num_workers).loader
    valid_loader = Loader(valid_set,
                          batch_size,
                          shuffle=False,
                          use_gpu=args.use_gpu,
                          num_workers=args.num_workers).loader

    # 统计数据量
    print('data scale:')
    print('train data:', len(train_set), "batch_nums:", len(train_loader))
    print('valid data:', len(valid_set), "batch_nums:", len(valid_loader))

    # train
    print('start training...')
    epochs = args.max_epoch

    for epoch in range(start_epoch, epochs + 1):
        model.train()
        # epoch开始前记录
        recorder.epoch_start(epoch, 'train', len(train_set))

        if args.scheduler_type is not None:
            print(epoch, 'lr={:.10f}'.format(scheduler.get_lr()[0]))
        for batch_id, batch in enumerate(train_loader):
            encoder_inputs, seq_len, decoder_inputs, weights = batch
            encoder_inputs = encoder_inputs.to(device)
            seq_len = seq_len.to(device)
            decoder_inputs = decoder_inputs.to(device)
            weights = weights.to(device)
            encoder_inputs.to(device)
            optimizer.zero_grad()

            # 第三个参数, 最长的句子最后一个token为EOS_I,不需要作为输入,这样可以减少一些计算
            logits, output_symbols = model(
                encoder_inputs,
                seq_len,
                decoder_inputs[:, :-1],
                mode='train',
                max_len=None,
                teacher_forcing_ratio=args.teacher_forcing_ratio)

            # print('train out',output_symbols)
            # 计算损失
            nll_loss = compute_loss(logits, decoder_inputs[:, 1:], weights)
            # 计算困惑度
            ppl = perplexity(nll_loss)
            # print(nll_loss.item(), ppl.item())
            # 反向传播,更新参数
            nll_loss.backward()
            # 减轻梯度爆炸 小trick
            nn.utils.clip_grad_norm_(model.parameters(),
                                     args.max_gradient_norm)
            optimizer.step()

            recorder.batch_end(batch_id, batch_size, nll_loss, ppl)

        if args.scheduler_type is not None:
            scheduler.step()
        recorder.epoch_end()
        # 保存模型
        if epoch % 5 == 0:
            model.save(os.path.join(
                model_dir,
                f'{args.project}_{datetime.datetime.now().strftime("%y_%m_%d_%H:%M:%S")}_{nll_loss.item()}_{ppl.item()}'
            ),
                       epoch=epoch)
        # 训练一轮后,在验证集上计算loss, ppl
        model.eval()
        with t.no_grad():
            recorder.epoch_start(epoch, 'eval', len(valid_set))
            for batch_id, batch in enumerate(valid_loader):
                encoder_inputs, seq_len, decoder_inputs, weights = batch
                encoder_inputs = encoder_inputs.to(device)
                seq_len = seq_len.to(device)
                decoder_inputs = decoder_inputs.to(device)
                weights = weights.to(device)
                encoder_inputs.to(device)
                logits, output_symbols = model(
                    encoder_inputs,
                    seq_len,
                    decoder_inputs[:, :-1],
                    mode='eval',
                    max_len=args.max_len,
                    beam_search=False if args.beam_size == 1 else True,
                    beam_size=args.beam_size,
                    topk=args.topk)
                # print('eval out: ', output_symbols)
                nll_loss = compute_loss(logits, decoder_inputs[:, 1:], weights)
                ppl = perplexity(nll_loss)
                recorder.batch_end(batch_id, batch_size, nll_loss, ppl)
                recorder.log_text(encoder_inputs.tolist(),
                                  decoder_inputs[:, 1:].tolist(),
                                  output_symbols.tolist())
            recorder.epoch_end()
Exemple #8
0
def train():
    print('building vocabulary...')
    voc = Voc()
    print('done')

    print('loading data and building batches...')
    train_set = FruitSeqDataset(voc, dataset_file_path=args.train_file)
    dev_set = FruitSeqDataset(voc, dataset_file_path=args.dev_file)
    # test_set = FruitSeqDataset(voc, dataset_file_path=TEST_FILE_PATH)
    print('done')

    print('building model...')
    seq2seq = Seq2Seq(voc.num_words).to(args.device)
    param_optimizer = args.optimiser(seq2seq.parameters(),
                                     lr=args.learning_rate)
    decoder_optimizer = args.optimiser(seq2seq.decoder.parameters(),
                                       lr=args.learning_rate *
                                       args.speaker_ratio)
    if args.param_file is not None:
        print('\tloading saved parameters from ' + args.param_file + '...')
        checkpoint = torch.load(args.param_file)
        seq2seq.load_state_dict(checkpoint['model'])
        param_optimizer.load_state_dict(checkpoint['opt'])
        decoder_optimizer.load_state_dict(checkpoint['de_opt'])
        voc = checkpoint['voc']
        print('\tdone')
    print('done')

    print('initialising...')
    start_iteration = 1
    print_loss = 0.
    print_seq_acc = 0.
    print_tok_acc = 0.
    max_dev_seq_acc = 0.
    training_losses = []
    training_tok_acc = []
    training_seq_acc = []
    training_sim = []
    eval_tok_acc = []
    eval_seq_acc = []
    print('done')

    print('training...')
    for iter in range(start_iteration, args.iter_num + 1):
        for idx, data_batch in enumerate(train_set):
            seq_acc, tok_acc, loss = train_epoch(seq2seq, data_batch,
                                                 param_optimizer,
                                                 decoder_optimizer)
            print_loss += loss
            print_seq_acc += seq_acc
            print_tok_acc += tok_acc

        if iter % args.print_freq == 0:
            print_loss_avg = print_loss / (args.print_freq * len(train_set))
            print_seq_acc_avg = print_seq_acc / (args.print_freq *
                                                 len(train_set))
            print_tok_acc_avg = print_tok_acc / (args.print_freq *
                                                 len(train_set))
            print(
                "Iteration: {}; Percent complete: {:.1f}%; Avg loss: {:.4f}; Avg seq acc: {:.4f}; Avg tok acc: {:.4f}"
                .format(iter, iter / args.iter_num * 100, print_loss_avg,
                        print_seq_acc_avg, print_tok_acc_avg))
            training_seq_acc.append(print_seq_acc_avg)
            training_tok_acc.append(print_tok_acc_avg)
            training_losses.append(print_loss_avg)
            print_seq_acc = 0.
            print_tok_acc = 0.
            print_loss = 0.

        if iter % args.eval_freq == 0:
            dev_seq_acc, dev_tok_acc, dev_loss = eval_model(seq2seq, dev_set)
            if dev_seq_acc > max_dev_seq_acc:
                max_dev_seq_acc = dev_seq_acc
            eval_seq_acc.append(dev_seq_acc)
            eval_tok_acc.append(dev_tok_acc)

            print(
                "[EVAL]Iteration: {}; Loss: {:.4f}; Avg Seq Acc: {:.4f}; Avg Tok Acc: {:.4f}; Best Seq Acc: {:.4f}"
                .format(iter, dev_loss, dev_seq_acc, dev_tok_acc,
                        max_dev_seq_acc))

        if iter % args.save_freq == 0:
            directory = os.path.join(args.save_dir, 'seq2seq')
            if not os.path.exists(directory):
                os.makedirs(directory)
            torch.save(
                {
                    'iteration': iter,
                    'model': seq2seq.state_dict(),
                    'opt': param_optimizer.state_dict(),
                    'de_opt': decoder_optimizer.state_dict(),
                    'loss': loss,
                    'voc': voc,
                    'args': args,
                    'records': {
                        'training_loss': training_losses,
                        'training_tok_acc': training_tok_acc,
                        'training_seq_acc': training_seq_acc,
                        'training_sim': training_sim,
                        'eval_tok_acc': eval_tok_acc,
                        'eval_seq_acc': eval_seq_acc
                    }
                },
                os.path.join(
                    directory, '{}_{}_{}.tar'.format(args.seed, iter,
                                                     'checkpoint')))
Exemple #9
0
                         shuffle=False,
                         batch_size=args.batch_size,
                         device=args.device,
                         is_train=False)

###############################
# get models
###############################
encoder = Encoder(train_loader.train_inputs_vocab.word_counts,
                  args.encoder_embedded_size,
                  args.encoder_hidden_size).to(args.device)
decoder = Decoder(train_loader.train_targets_vocab.word_counts,
                  args.decoder_embedded_size, args.decoder_hidden_size,
                  train_loader.SOS_IDX, train_loader.EOS_IDX,
                  args.teacher_forcing_ratio, args.device).to(args.device)
seq2seq = Seq2Seq(encoder, decoder, args.device)

###############################
# get optimizer
###############################
optimizer = torch.optim.Adam(seq2seq.parameters(), lr=args.learning_rate)

###############################
# check direcotories exist
###############################
os.makedirs(args.save_dir_path, exist_ok=True)


def main():
    global seq2seq
    if args.load_model:
Exemple #10
0
                        default=True,
                        help='Do you want to save the model plot'
                        ' after the train')
    parser.add_argument('--batch_size',
                        type=int,
                        default=constants.DEFAULT_BATCH_SIZE)

    args = parser.parse_args()

    model_props = {
        'hidden_units': args.hidden_units,
        'embedding_size': args.embedding_size,
        'trainable_embedding': args.train_emb,
        'save_model': args.save_model,
        'save_model_plot': args.save_plot_model,
        'epochs': args.epochs,
    }

    inp, output = load_preprocessed_data()

    X_train, X_test, y_train, y_test = train_test_split(inp,
                                                        output,
                                                        test_size=0.2,
                                                        random_state=42)

    config = load_config()
    seq2seq = Seq2Seq(config, model_props)
    seq2seq.build_model()

    seq2seq.fit(X_train, y_train, X_test, y_test, args.batch_size)