Example #1
0
def default_params():
    # create an xgboost model and fit it
    xgb = XGBoost(n_estimators=100, random_state=123)
    xgb.fit(x_train, y_train, eval_set=(x_eval, y_eval))

    # predict and calculate acc
    ypred_train = xgb.predict(x_train)
    ypred_eval = xgb.predict(x_eval)
    ypred_test = xgb.predict(x_test)
    print("train acc = {0}".format(acc(y_train, ypred_train)))
    print("eval acc = {0}".format(acc(y_eval, ypred_eval)))
    print("test acc = {0}".format(acc(y_test, ypred_test)))

    # plot learning curve to tune parameter
    xgb.plot_learning_curve()
def test(model, iterator, criterion):
    total_loss = 0
    iter_num = 0
    te_acc = 0
    model.eval()

    with torch.no_grad():
        for batch in iterator:
            enc_input, dec_input, enc_label = batch.text, batch.target_text, batch.SA
            dec_output = dec_input[:, 1:]
            dec_outputs = torch.zeros(dec_output.size(0),
                                      args.max_len).type_as(dec_input.data)

            # emotion 과 체를 반영
            enc_input, dec_input, dec_outputs = \
                styling(enc_input, dec_input, dec_output, dec_outputs, enc_label, args, TEXT, LABEL)

            y_pred = model(enc_input, dec_input)

            y_pred = y_pred.reshape(-1, y_pred.size(-1))
            dec_output = dec_outputs.view(-1).long()

            real_value_index = [dec_output != 1]  # <pad> == 1

            loss = criterion(y_pred[real_value_index],
                             dec_output[real_value_index])

            with torch.no_grad():
                test_acc = acc(y_pred, dec_output)
            total_loss += loss
            iter_num += 1
            te_acc += test_acc

    return total_loss.data.cpu().numpy() / iter_num, te_acc.data.cpu().numpy(
    ) / iter_num
Example #3
0
def tuned_params():
    # create an xgboost model and fit it
    xgb = XGBoost(n_estimators=100,
                  max_depth=6,
                  learning_rate=0.1,
                  objective='binary:logistic',
                  gamma=0,
                  reg_lambda=3,
                  subsample=1,
                  colsample=1,
                  random_state=123)
    xgb.fit(x_train,
            y_train,
            eval_set=(x_eval, y_eval),
            early_stopping_rounds=20)
    print('best iter: {}'.format(xgb.best_iter))

    # predict and calculate acc
    ypred_train = xgb.predict(x_train)
    ypred_eval = xgb.predict(x_eval)
    ypred_test = xgb.predict(x_test)
    print("train acc = {0}".format(acc(y_train, ypred_train)))
    print("eval acc = {0}".format(acc(y_eval, ypred_eval)))
    print("test acc = {0}".format(acc(y_test, ypred_test)))
def train(model, iterator, optimizer, criterion):
    total_loss = 0
    iter_num = 0
    tr_acc = 0
    model.train()

    for step, batch in enumerate(iterator):
        optimizer.zero_grad()

        enc_input, dec_input, enc_label = batch.text, batch.target_text, batch.SA

        dec_output = dec_input[:, 1:]
        dec_outputs = torch.zeros(dec_output.size(0),
                                  args.max_len).type_as(dec_input.data)

        # emotion 과 체를 반영
        enc_input, dec_input, dec_outputs = \
            styling(enc_input, dec_input, dec_output, dec_outputs, enc_label, args, TEXT, LABEL)

        y_pred = model(enc_input, dec_input)

        y_pred = y_pred.reshape(-1, y_pred.size(-1))
        dec_output = dec_outputs.view(-1).long()

        # padding 제외한 value index 추출
        real_value_index = [dec_output != 1]  # <pad> == 1

        # padding 은 loss 계산시 제외
        loss = criterion(y_pred[real_value_index],
                         dec_output[real_value_index])
        loss.backward()
        optimizer.step()

        with torch.no_grad():
            train_acc = acc(y_pred, dec_output)

        total_loss += loss
        iter_num += 1
        tr_acc += train_acc

        train_test(step, y_pred, dec_output, real_value_index, enc_input, args,
                   TEXT, LABEL)

    return total_loss.data.cpu().numpy() / iter_num, tr_acc.data.cpu().numpy(
    ) / iter_num
def main(parser):
    # Config
    args = parser.parse_args()
    data_dir = Path(args.data_dir)
    model_dir = Path(args.model_dir)
    data_config = Config(json_path=data_dir / 'config.json')
    model_config = Config(json_path=model_dir / 'config.json')

    # Vocab & Tokenizer
    with open(data_config.token2idx_vocab, mode='rb') as io:
        token2idx_vocab = json.load(io)
        print("token2idx_vocab: ", token2idx_vocab)
    vocab = Vocabulary(token2idx=token2idx_vocab)
    tokenizer = Tokenizer(vocab=vocab,
                          split_fn=mecab_token_pos_flat_fn,
                          pad_fn=keras_pad_fn,
                          maxlen=model_config.maxlen)
    model_config.vocab_size = len(vocab.token2idx)

    # Model & Model Params
    model = Transformer(config=model_config, vocab=vocab)

    # Train & Val Datasets
    tr_ds = ChatbotDataset(data_config.train,
                           tokenizer.list_of_string_to_arr_of_pad_token_ids)
    tr_dl = DataLoader(tr_ds,
                       batch_size=model_config.batch_size,
                       shuffle=True,
                       num_workers=4,
                       drop_last=False)

    val_ds = ChatbotDataset(data_config.validation,
                            tokenizer.list_of_string_to_arr_of_pad_token_ids)
    val_dl = DataLoader(val_ds,
                        batch_size=model_config.batch_size,
                        shuffle=True,
                        num_workers=4,
                        drop_last=False)

    # loss
    loss_fn = nn.CrossEntropyLoss(ignore_index=vocab.PAD_ID)  # nn.NLLLoss()

    # optim
    opt = optim.Adam(
        params=model.parameters(), lr=model_config.learning_rate
    )  # torch.optim.SGD(params=model.parameters(), lr=model_config.learning_rate)
    # scheduler = ReduceLROnPlateau(opt, patience=5)  # Check
    scheduler = GradualWarmupScheduler(opt,
                                       multiplier=8,
                                       total_epoch=model_config.epochs)
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)

    # save
    # writer = SummaryWriter('{}/runs'.format(model_dir))
    checkpoint_manager = CheckpointManager(model_dir)
    summary_manager = SummaryManager(model_dir)
    best_val_loss = 1e+10
    best_train_acc = 0

    # load
    if (model_dir / 'best.tar').exists():
        print("pretrained model exists")
        checkpoint = checkpoint_manager.load_checkpoint('best.tar')
        model.load_state_dict(checkpoint['model_state_dict'])

    # Train
    for epoch in tqdm(range(model_config.epochs),
                      desc='epoch',
                      total=model_config.epochs):
        scheduler.step(epoch)
        print("epoch : {}, lr: {}".format(epoch, opt.param_groups[0]['lr']))
        tr_loss = 0
        tr_acc = 0
        model.train()

        for step, mb in tqdm(enumerate(tr_dl), desc='steps', total=len(tr_dl)):
            opt.zero_grad()

            enc_input, dec_input, dec_output = map(lambda elm: elm.to(device),
                                                   mb)
            y_pred = model(enc_input, dec_input)
            y_pred_copy = y_pred.detach()
            dec_output_copy = dec_output.detach()

            # loss 계산을 위해 shape 변경
            y_pred = y_pred.reshape(-1, y_pred.size(-1))
            dec_output = dec_output.view(-1).long()

            # padding 제외한 value index 추출
            real_value_index = [dec_output != 0]

            # padding은 loss 계산시 제외
            mb_loss = loss_fn(
                y_pred[real_value_index],
                dec_output[real_value_index])  # Input: (N, C) Target: (N)
            mb_loss.backward()
            opt.step()

            with torch.no_grad():
                mb_acc = acc(y_pred, dec_output)

            tr_loss += mb_loss.item()
            tr_acc = mb_acc.item()
            tr_loss_avg = tr_loss / (step + 1)
            tr_summary = {'loss': tr_loss_avg, 'acc': tr_acc}
            total_step = epoch * len(tr_dl) + step

            # Eval
            if total_step % model_config.summary_step == 0 and total_step != 0:
                print("train: ")
                decoding_from_result(enc_input, y_pred_copy, dec_output_copy,
                                     tokenizer)

                model.eval()
                print("eval: ")
                val_summary = evaluate(model, val_dl, {
                    'loss': loss_fn,
                    'acc': acc
                }, device, tokenizer)
                val_loss = val_summary['loss']

                # writer.add_scalars('loss', {'train': tr_loss_avg,
                #                             'val': val_loss}, epoch * len(tr_dl) + step)

                tqdm.write(
                    'epoch : {}, step : {}, '
                    'tr_loss: {:.3f}, val_loss: {:.3f}, tr_acc: {:.2%}, val_acc: {:.2%}'
                    .format(epoch + 1, total_step, tr_summary['loss'],
                            val_summary['loss'], tr_summary['acc'],
                            val_summary['acc']))

                val_loss = val_summary['loss']
                # is_best = val_loss < best_val_loss # loss 기준
                is_best = tr_acc > best_train_acc  # acc 기준 (원래는 train_acc가 아니라 val_acc로 해야)

                # Save
                if is_best:
                    print(
                        "[Best model Save] train_acc: {}, train_loss: {}, val_loss: {}"
                        .format(tr_summary['acc'], tr_summary['loss'],
                                val_loss))
                    # CPU에서도 동작 가능하도록 자료형 바꾼 뒤 저장
                    state = {
                        'epoch':
                        epoch + 1,
                        'model_state_dict':
                        model.to(torch.device('cpu')).state_dict(),
                        'opt_state_dict':
                        opt.state_dict()
                    }
                    summary = {'train': tr_summary, 'validation': val_summary}

                    summary_manager.update(summary)
                    summary_manager.save('summary.json')
                    checkpoint_manager.save_checkpoint(state, 'best.tar')

                    best_val_loss = val_loss

                model.to(device)
                model.train()
            else:
                if step % 50 == 0:
                    print(
                        'epoch : {}, step : {}, tr_loss: {:.3f}, tr_acc: {:.2%}'
                        .format(epoch + 1, total_step, tr_summary['loss'],
                                tr_summary['acc']))
Example #6
0
def train_generator(generator, iterator, optimizer, discriminator, ignore_padid, tokenizer=None):
    model = generator.seq2seq
    model.train()
    tr_loss = 0
    tr_acc = 0
    for step, mb in tqdm(enumerate(iterator), desc='steps', total=len(iterator)):
        optimizer.zero_grad()
        mb_loss = 0
        
        enc_input, _, dec_output, reward = map(lambda elm: elm.to(device), mb)
        # print('[reward]: ', reward.shape)
        dec_input = torch.full((enc_input.shape[0],1),generator.vocab.token2idx[generator.vocab.START_TOKEN]).long().to(device)
        skip_row = []
        for i in range(generator.config.maxlen):
            # if i == generator.config.maxlen - 1:
            #     break
            # print('decode input: ',dec_input.shape)
            # print(dec_input)
            y_pred = model(enc_input, dec_input)
            # y_pred 第i个预测字符 [batch_size, vocab_size]
            # print('y_pred:',y_pred.shape)
            y_pred_copy = y_pred.detach()
            y_pred_ids = y_pred_copy.max(dim=-1)[1]

            # print('VVVVVVVVVVVV: ', y_pred_ids[:,-1].view(-1,1))
            # print('2222222222: ', y_pred.shape)
            y_pred_ids = y_pred_ids[:,-1].view(-1,1)
            # pred_values.append(y_pred[y_pred_ids[:,-1].view(-1,1)])
            # decoding_from_result(enc_input, y_pred, tokenizer)
            dec_input = torch.cat([dec_input, y_pred_ids], dim=1)

            # 保存训练得到的负样本到数组中, 为训练Discriminator做准备
            if tokenizer is not None:
                str_input, str_pred = decoding_to_pair(enc_input, y_pred_copy, tokenizer)
                # print('input: ', str_input)
                # print('pred: ',str_pred)
                # print('decinput: ', decoding_to_str(dec_input, tokenizer))

            # y_pred = y_pred.reshape(-1, y_pred.size(-1))
            dec_output = dec_output.view(-1).long()

            
            # padding 제외한 value index 추출
            # real_value_index = [dec_output != 0]

            # print(real_value_index)
            # print('=================')
            # print(y_pred.shape, dec_output.shape)
            # 根据log(P(y_t|Y_1:Y_{t-1})) * Q来计算loss
            for idx in range(y_pred.shape[0]):
                if idx in skip_row: continue
                if generator.is_end_token(y_pred_ids[idx][0]): skip_row.append(idx)
                pred_value = y_pred[idx][i][y_pred_ids[idx][0]]
                # pred_values.append(pred_value)
                mb_loss = -pred_value*reward[idx] # Input: (N, C) Target: (N)

            # print('reward:',reward.shape)
            # print('loss:',mb_loss.shape)
        mb_loss.backward()
        optimizer.step()

        with torch.no_grad():
            y_pred = y_pred.reshape(-1, y_pred.size(-1))
            # print(y_pred.shape, dec_output.shape)
            mb_acc = acc(y_pred, dec_output)

        tr_loss += mb_loss.item()
        tr_acc += mb_acc.item()
        tr_loss_avg =  tr_loss / (step + 1)
        tr_summary = {'loss': tr_loss_avg}
        # total_step = epoch * len(iterator) + step
        
    return tr_loss/len(iterator), tr_acc/len(iterator)