Example #1
0
def chat(model_params, sent='0'):
    tok_path = get_tokenizer()
    model, vocab = get_mxnet_kobert_model(ctx=ctx)
    tok = SentencepieceTokenizer(tok_path, num_best=0, alpha=0)
    kogptqa = KoGPT2Chat(model)
    kogptqa.load_parameters(model_params, ctx=ctx)
    sent_tokens = tok(sent)
    while 1:
        q = input('user > ').strip()
        if q == 'quit':
            break
        q_tok = tok(q)
        a = ''
        a_tok = []
        while 1:
            input_ids = mx.nd.array([vocab[U_TKN]] + vocab[q_tok] +
                                    vocab[EOS, SENT] + vocab[sent_tokens] +
                                    vocab[EOS, S_TKN] +
                                    vocab[a_tok]).expand_dims(axis=0)
            pred = kogptqa(input_ids.as_in_context(ctx))
            gen = vocab.to_tokens(
                mx.nd.argmax(
                    pred,
                    axis=-1).squeeze().astype('int').asnumpy().tolist())[-1]
            if gen == EOS:
                break
            a += gen.replace('▁', ' ')
            a_tok = tok(a)
        print("Simsimi > {}".format(a.strip()))
train = pd.read_csv(r'train.csv', encoding='utf-8')
train = train[['text', 'smishing']]

n_of_train = int(len(train) * 0.8)
n_of_val = int(len(train) - n_of_train)
print(n_of_train)
print(n_of_val)
train1 = np.asarray(train[:n_of_train])
valid1 = np.asarray(train[n_of_train:])
train2 = (train[:n_of_train])
valid2 = (train[n_of_train:])

print('start')
ctx = mx.gpu()
bert_base, vocab = get_mxnet_kobert_model(use_decoder=False,
                                          use_classifier=False,
                                          ctx=ctx)

#token----------------------------------------------------------------
tokenizer = get_tokenizer()
tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)
ds = gluon.data.SimpleDataset([['나는 이승준입니다.', '과제가 너무 많은걸요?']])
trans = nlp.data.BERTSentenceTransform(tok, max_seq_length=10)
list(ds.transform(trans))
print('token complete')


#Dataset----------------------------------------------------------------
class BERTDataset(mx.gluon.data.Dataset):
    def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer, max_len,
                 pad, pair):
Example #3
0
def train():
    tok_path = get_tokenizer()
    model, vocab = get_mxnet_kobert_model(ctx=ctx)
    # tok = SentencepieceTokenizer(tok_path, num_best=0, alpha=0)

    data = pd.read_csv('Chatbot_data/카페_qna.csv')

    #ddd
    max_len = opt.max_seq_len
    train_set = ChatDataset(data, tok_path, vocab, max_len=max_len)
    batch_size = opt.batch_size

    train_dataloader = mx.gluon.data.DataLoader(train_set,
                                                batch_size=batch_size,
                                                num_workers=5,
                                                shuffle=True)
    kogptqa = KoGPT2Chat(model)
    kogptqa.hybridize()

    # softmax cross entropy loss for classification
    loss_function = gluon.loss.SoftmaxCrossEntropyLoss()
    loss_function.hybridize()

    num_epochs = opt.num_epoch
    lr = 5e-5
    trainer = gluon.Trainer(kogptqa.collect_params(), 'bertadam', {
        'learning_rate': lr,
        'epsilon': 1e-8,
        'wd': 0.01
    })
    # LayerNorm과 Bias에는 Weight Decay를 적용하지 않는다.
    for _, v in kogptqa.collect_params('.*beta|.*gamma|.*bias').items():
        v.wd_mult = 0.0
    params = [
        p for p in kogptqa.collect_params().values() if p.grad_req != 'null'
    ]
    # learning rate warmup
    accumulate = opt.accumulate
    step_size = batch_size * accumulate if accumulate else batch_size
    num_train_examples = len(train_set)
    num_train_steps = int(num_train_examples / step_size * num_epochs)
    warmup_ratio = 0.1
    num_warmup_steps = int(num_train_steps * warmup_ratio)
    step_num = 0
    all_model_params = kogptqa.collect_params()

    log_interval = 50
    neg = -1e18
    # Set grad_req if gradient accumulation is required
    if accumulate and accumulate > 1:
        for p in params:
            p.grad_req = 'add'

    for epoch_id in range(num_epochs):
        step_loss = 0
        for batch_id, (token_ids, mask, label) in enumerate(train_dataloader):
            if step_num < num_warmup_steps:
                new_lr = lr * step_num / num_warmup_steps
            else:
                non_warmup_steps = step_num - num_warmup_steps
                offset = non_warmup_steps / (num_train_steps -
                                             num_warmup_steps)
                new_lr = lr - offset * lr
            trainer.set_learning_rate(new_lr)
            with mx.autograd.record():
                # load data to GPU or GPU
                token_ids = token_ids.as_in_context(ctx)
                mask = mask.as_in_context(ctx)
                label = label.as_in_context(ctx)
                # forward computation
                out = kogptqa(token_ids)
                masked_out = nd.where(
                    mask.expand_dims(axis=2).repeat(repeats=out.shape[2],
                                                    axis=2), out,
                    neg * nd.ones_like(out))
                # loss for responses exincluding MASK and PAD
                ls = loss_function(masked_out, label).sum() / mask.sum()
            # backward computation
            ls.backward()
            if not accumulate or (batch_id + 1) % accumulate == 0:
                trainer.allreduce_grads()
                nlp.utils.clip_grad_global_norm(params, 1)
                trainer.update(accumulate if accumulate else 1)
                step_num += 1
                if accumulate and accumulate > 1:
                    # set grad to zero for gradient accumulation
                    all_model_params.zero_grad()
            step_loss += ls.asscalar()
            if step_num % log_interval == 0 and step_num > 0:
                print(
                    '[Epoch {} Batch {}/{}] loss={:.4f}, lr={:.10f}, train ppl={:.3f}'
                    .format(epoch_id + 1, batch_id + 1, len(train_dataloader),
                            step_loss / log_interval, trainer.learning_rate,
                            math.exp(step_loss / log_interval)))
                step_loss = 0
    logging.info('saving model file to {}'.format(opt.model_params))
    kogptqa.save_parameters(opt.model_params)
def run_model(review_list):
    ctx = mx.gpu() if mx.context.num_gpus() else mx.cpu()

    class BERTClassifier(nn.Block):
        def __init__(self,
                     bert,
                     num_classes=2,
                     dropout=None,
                     prefix=None,
                     params=None):
            super(BERTClassifier, self).__init__(prefix=prefix, params=params)
            self.bert = bert

            with self.name_scope():
                self.classifier = nn.SymbolBlock.imports(
                    "electric.ckp-symbol.json", ['data'],
                    "electric.ckp-0009.params",
                    ctx=ctx)

        def forward(self, inputs, token_types, valid_length=None):
            _, pooler = self.bert(inputs, token_types, valid_length)
            return self.classifier(pooler)

    bert_base, vocab = get_mxnet_kobert_model(use_decoder=False,
                                              use_classifier=False,
                                              ctx=ctx)
    tokenizer = get_tokenizer()
    tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)

    ifptr = open("temp_file.txt", 'w', encoding='cp949')
    for review in review_list:
        review_parse = review.split(' ')

        sentence_length = 0
        new_sentence = ""

        for i in range(
                len(review_parse) - 1, max(-1,
                                           len(sentence_parse) - 8), -1):
            new_sentence = review_parse[i] + " " + new_sentence
            sentence_length += len(review_parse[i])

            if sentence_length >= 30:
                break

        new_sentence = new_sentence[:-1]

        data = str(0) + "\t" + new_sentence + "\t" + str(0) + "\n"
        ifptr.write(data)

    ifptr.close()

    dataset_test = nlp.data.TSVDataset("temp_file.txt",
                                       field_indices=[1, 2],
                                       num_discard_samples=1)

    max_len = 128
    data_test = BERTDataset(dataset_test, 0, 1, tok, max_len, True, False)
    model = BERTClassifier(bert_base)

    model.hybridize()
    batch_size = 50
    test_dataloader = mx.gluon.data.DataLoader(data_test,
                                               batch_size=int(batch_size))

    prediction = np.zeros(len(review_list))
    for batch_id, (token_ids, valid_length,
                   segment_ids) in enumerate(test_dataloader):
        with mx.autograd.record():
            # load data to GPU
            token_ids = token_ids.as_in_context(ctx)
            valid_length = valid_length.as_in_context(ctx)
            segment_ids = segment_ids.as_in_context(ctx)

            # forward computation
            out = model(token_ids, segment_ids, valid_length.astype('float32'))
            output = np.argmax(out, axis=1)
            prediction[batch_id * batch_size:min((batch_id + 1) *
                                                 batch_size, len(review_list))]

    return prediction