def chat(model_params, sent='0'): tok_path = get_tokenizer() model, vocab = get_mxnet_kobert_model(ctx=ctx) tok = SentencepieceTokenizer(tok_path, num_best=0, alpha=0) kogptqa = KoGPT2Chat(model) kogptqa.load_parameters(model_params, ctx=ctx) sent_tokens = tok(sent) while 1: q = input('user > ').strip() if q == 'quit': break q_tok = tok(q) a = '' a_tok = [] while 1: input_ids = mx.nd.array([vocab[U_TKN]] + vocab[q_tok] + vocab[EOS, SENT] + vocab[sent_tokens] + vocab[EOS, S_TKN] + vocab[a_tok]).expand_dims(axis=0) pred = kogptqa(input_ids.as_in_context(ctx)) gen = vocab.to_tokens( mx.nd.argmax( pred, axis=-1).squeeze().astype('int').asnumpy().tolist())[-1] if gen == EOS: break a += gen.replace('▁', ' ') a_tok = tok(a) print("Simsimi > {}".format(a.strip()))
train = pd.read_csv(r'train.csv', encoding='utf-8') train = train[['text', 'smishing']] n_of_train = int(len(train) * 0.8) n_of_val = int(len(train) - n_of_train) print(n_of_train) print(n_of_val) train1 = np.asarray(train[:n_of_train]) valid1 = np.asarray(train[n_of_train:]) train2 = (train[:n_of_train]) valid2 = (train[n_of_train:]) print('start') ctx = mx.gpu() bert_base, vocab = get_mxnet_kobert_model(use_decoder=False, use_classifier=False, ctx=ctx) #token---------------------------------------------------------------- tokenizer = get_tokenizer() tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False) ds = gluon.data.SimpleDataset([['나는 이승준입니다.', '과제가 너무 많은걸요?']]) trans = nlp.data.BERTSentenceTransform(tok, max_seq_length=10) list(ds.transform(trans)) print('token complete') #Dataset---------------------------------------------------------------- class BERTDataset(mx.gluon.data.Dataset): def __init__(self, dataset, sent_idx, label_idx, bert_tokenizer, max_len, pad, pair):
def train(): tok_path = get_tokenizer() model, vocab = get_mxnet_kobert_model(ctx=ctx) # tok = SentencepieceTokenizer(tok_path, num_best=0, alpha=0) data = pd.read_csv('Chatbot_data/카페_qna.csv') #ddd max_len = opt.max_seq_len train_set = ChatDataset(data, tok_path, vocab, max_len=max_len) batch_size = opt.batch_size train_dataloader = mx.gluon.data.DataLoader(train_set, batch_size=batch_size, num_workers=5, shuffle=True) kogptqa = KoGPT2Chat(model) kogptqa.hybridize() # softmax cross entropy loss for classification loss_function = gluon.loss.SoftmaxCrossEntropyLoss() loss_function.hybridize() num_epochs = opt.num_epoch lr = 5e-5 trainer = gluon.Trainer(kogptqa.collect_params(), 'bertadam', { 'learning_rate': lr, 'epsilon': 1e-8, 'wd': 0.01 }) # LayerNorm과 Bias에는 Weight Decay를 적용하지 않는다. for _, v in kogptqa.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 params = [ p for p in kogptqa.collect_params().values() if p.grad_req != 'null' ] # learning rate warmup accumulate = opt.accumulate step_size = batch_size * accumulate if accumulate else batch_size num_train_examples = len(train_set) num_train_steps = int(num_train_examples / step_size * num_epochs) warmup_ratio = 0.1 num_warmup_steps = int(num_train_steps * warmup_ratio) step_num = 0 all_model_params = kogptqa.collect_params() log_interval = 50 neg = -1e18 # Set grad_req if gradient accumulation is required if accumulate and accumulate > 1: for p in params: p.grad_req = 'add' for epoch_id in range(num_epochs): step_loss = 0 for batch_id, (token_ids, mask, label) in enumerate(train_dataloader): if step_num < num_warmup_steps: new_lr = lr * step_num / num_warmup_steps else: non_warmup_steps = step_num - num_warmup_steps offset = non_warmup_steps / (num_train_steps - num_warmup_steps) new_lr = lr - offset * lr trainer.set_learning_rate(new_lr) with mx.autograd.record(): # load data to GPU or GPU token_ids = token_ids.as_in_context(ctx) mask = mask.as_in_context(ctx) label = label.as_in_context(ctx) # forward computation out = kogptqa(token_ids) masked_out = nd.where( mask.expand_dims(axis=2).repeat(repeats=out.shape[2], axis=2), out, neg * nd.ones_like(out)) # loss for responses exincluding MASK and PAD ls = loss_function(masked_out, label).sum() / mask.sum() # backward computation ls.backward() if not accumulate or (batch_id + 1) % accumulate == 0: trainer.allreduce_grads() nlp.utils.clip_grad_global_norm(params, 1) trainer.update(accumulate if accumulate else 1) step_num += 1 if accumulate and accumulate > 1: # set grad to zero for gradient accumulation all_model_params.zero_grad() step_loss += ls.asscalar() if step_num % log_interval == 0 and step_num > 0: print( '[Epoch {} Batch {}/{}] loss={:.4f}, lr={:.10f}, train ppl={:.3f}' .format(epoch_id + 1, batch_id + 1, len(train_dataloader), step_loss / log_interval, trainer.learning_rate, math.exp(step_loss / log_interval))) step_loss = 0 logging.info('saving model file to {}'.format(opt.model_params)) kogptqa.save_parameters(opt.model_params)
def run_model(review_list): ctx = mx.gpu() if mx.context.num_gpus() else mx.cpu() class BERTClassifier(nn.Block): def __init__(self, bert, num_classes=2, dropout=None, prefix=None, params=None): super(BERTClassifier, self).__init__(prefix=prefix, params=params) self.bert = bert with self.name_scope(): self.classifier = nn.SymbolBlock.imports( "electric.ckp-symbol.json", ['data'], "electric.ckp-0009.params", ctx=ctx) def forward(self, inputs, token_types, valid_length=None): _, pooler = self.bert(inputs, token_types, valid_length) return self.classifier(pooler) bert_base, vocab = get_mxnet_kobert_model(use_decoder=False, use_classifier=False, ctx=ctx) tokenizer = get_tokenizer() tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False) ifptr = open("temp_file.txt", 'w', encoding='cp949') for review in review_list: review_parse = review.split(' ') sentence_length = 0 new_sentence = "" for i in range( len(review_parse) - 1, max(-1, len(sentence_parse) - 8), -1): new_sentence = review_parse[i] + " " + new_sentence sentence_length += len(review_parse[i]) if sentence_length >= 30: break new_sentence = new_sentence[:-1] data = str(0) + "\t" + new_sentence + "\t" + str(0) + "\n" ifptr.write(data) ifptr.close() dataset_test = nlp.data.TSVDataset("temp_file.txt", field_indices=[1, 2], num_discard_samples=1) max_len = 128 data_test = BERTDataset(dataset_test, 0, 1, tok, max_len, True, False) model = BERTClassifier(bert_base) model.hybridize() batch_size = 50 test_dataloader = mx.gluon.data.DataLoader(data_test, batch_size=int(batch_size)) prediction = np.zeros(len(review_list)) for batch_id, (token_ids, valid_length, segment_ids) in enumerate(test_dataloader): with mx.autograd.record(): # load data to GPU token_ids = token_ids.as_in_context(ctx) valid_length = valid_length.as_in_context(ctx) segment_ids = segment_ids.as_in_context(ctx) # forward computation out = model(token_ids, segment_ids, valid_length.astype('float32')) output = np.argmax(out, axis=1) prediction[batch_id * batch_size:min((batch_id + 1) * batch_size, len(review_list))] return prediction