def chat(model_params, sent='0'): tok_path = get_tokenizer() model, vocab = get_mxnet_kogpt2_model(ctx=ctx) tok = SentencepieceTokenizer(tok_path, num_best=0, alpha=0) kogptqa = KoGPT2Chat(model) kogptqa.load_parameters(model_params, ctx=ctx) sent_tokens = tok(sent) while 1: q = input('user > ').strip() if q == 'quit': break q_tok = tok(q) a = '' a_tok = [] while 1: input_ids = mx.nd.array([vocab[U_TKN]] + vocab[q_tok] + vocab[EOS, SENT] + vocab[sent_tokens] + vocab[EOS, S_TKN] + vocab[a_tok]).expand_dims(axis=0) pred = kogptqa(input_ids.as_in_context(ctx)) gen = vocab.to_tokens( mx.nd.argmax( pred, axis=-1).squeeze().astype('int').asnumpy().tolist())[-1] if gen == EOS: break a += gen.replace('▁', ' ') a_tok = tok(a) print("Simsimi > {}".format(a.strip()))
def Load_Model(): global vocab_global global sent_tokens_global global kogptqa_global global tok_global tok_path = get_tokenizer() model, vocab = get_mxnet_kogpt2_model(ctx=ctx) tok = SentencepieceTokenizer(tok_path, num_best=0, alpha=0) kogptqa = KoGPT2Chat(model) kogptqa.load_parameters("KoGPT2-chatbot\kogpt2_chat.params", ctx=ctx) sent_tokens = tok("0") vocab_global = vocab sent_tokens_global = sent_tokens kogptqa_global = kogptqa tok_global = tok
def train(): tok_path = get_tokenizer() model, vocab = get_mxnet_kogpt2_model(ctx=ctx) # tok = SentencepieceTokenizer(tok_path, num_best=0, alpha=0) data = pd.read_csv('Chatbot_data/ChatbotData.csv') max_len = opt.max_seq_len train_set = chat_data(data, tok_path, vocab, max_len=max_len) batch_size = opt.batch_size train_dataloader = mx.gluon.data.DataLoader(train_set, batch_size=batch_size, num_workers=5, shuffle=True) kogptqa = KoGPT2Chat(model) kogptqa.hybridize() # softmax cross entropy loss for classification loss_function = gluon.loss.SoftmaxCrossEntropyLoss() loss_function.hybridize() num_epochs = opt.num_epoch lr = 5e-5 trainer = gluon.Trainer(kogptqa.collect_params(), 'bertadam', { 'learning_rate': lr, 'epsilon': 1e-8, 'wd': 0.01 }) # LayerNorm과 Bias에는 Weight Decay를 적용하지 않는다. for _, v in kogptqa.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 params = [ p for p in kogptqa.collect_params().values() if p.grad_req != 'null' ] # learning rate warmup accumulate = opt.accumulate step_size = batch_size * accumulate if accumulate else batch_size num_train_examples = len(train_set) num_train_steps = int(num_train_examples / step_size * num_epochs) warmup_ratio = 0.1 num_warmup_steps = int(num_train_steps * warmup_ratio) step_num = 0 all_model_params = kogptqa.collect_params() log_interval = 50 neg = -1e18 # Set grad_req if gradient accumulation is required if accumulate and accumulate > 1: for p in params: p.grad_req = 'add' for epoch_id in range(num_epochs): step_loss = 0 for batch_id, (token_ids, mask, label) in enumerate(train_dataloader): if step_num < num_warmup_steps: new_lr = lr * step_num / num_warmup_steps else: non_warmup_steps = step_num - num_warmup_steps offset = non_warmup_steps / (num_train_steps - num_warmup_steps) new_lr = lr - offset * lr trainer.set_learning_rate(new_lr) with mx.autograd.record(): # load data to GPU or GPU token_ids = token_ids.as_in_context(ctx) mask = mask.as_in_context(ctx) label = label.as_in_context(ctx) # forward computation out = kogptqa(token_ids) masked_out = nd.where( mask.expand_dims(axis=2).repeat(repeats=out.shape[2], axis=2), out, neg * nd.ones_like(out)) # loss for responses exincluding MASK and PAD ls = loss_function(masked_out, label).sum() / mask.sum() # backward computation ls.backward() if not accumulate or (batch_id + 1) % accumulate == 0: trainer.allreduce_grads() nlp.utils.clip_grad_global_norm(params, 1) trainer.update(accumulate if accumulate else 1) step_num += 1 if accumulate and accumulate > 1: # set grad to zero for gradient accumulation all_model_params.zero_grad() step_loss += ls.asscalar() if step_num % log_interval == 0 and step_num > 0: print( '[Epoch {} Batch {}/{}] loss={:.4f}, lr={:.10f}, train ppl={:.3f}' .format(epoch_id + 1, batch_id + 1, len(train_dataloader), step_loss / log_interval, trainer.learning_rate, math.exp(step_loss / log_interval))) step_loss = 0 logging.info('saving model file to {}'.format(opt.model_params)) kogptqa.save_parameters(opt.model_params)
super(KoGPT2Chat, self).__init__(prefix=prefix, params=params) self.kogpt2 = kogpt2 def hybrid_forward(self, F, inputs): # (batch, seq_len, hiddens) output, _ = self.kogpt2(inputs) return output if mx.context.num_gpus() > 0: ctx = mx.gpu() else: ctx = mx.cpu() tok_path = get_tokenizer() model, vocab = get_mxnet_kogpt2_model(ctx=ctx) tok = SentencepieceTokenizer(tok_path, num_best=0, alpha=0) kogptqa = KoGPT2Chat(model) # kogptqa.load_parameters("Kogpt2_chat.params", ctx=ctx) def chat(text="",sent='0'): sent_tokens = tok(sent) cnt=0 q = text.strip() q_tok = tok(q) a = '' a_tok = [] while cnt>25: cnt+=1 input_ids = mx.nd.array([vocab[U_TKN]] + vocab[q_tok] + vocab[EOS, SENT] + vocab[sent_tokens] + vocab[EOS, S_TKN] +