def top_k_top_p_decode(self, dec_hidden, enc_out): batch_size = enc_out.size(0) outputs = [] dec_input = torch.zeros(enc_out.size(0), 1).fill_(2).long().to(self.device) input_feed = torch.zeros(batch_size, 1, enc_out.size(2), device=self.device) for t in range(0, self.max_len_sentence): dec_input = self.embedding(dec_input) # (batch size, 1, emb dim) dec_input = torch.cat((dec_input, input_feed), 2) dec_output, dec_hidden = self.decode_rnn(dec_input, dec_hidden, enc_out) out = self.gen(dec_output) out, probs = sample_sequence(out, self.top_k, self.top_p, self.temperature, False) if t < self.min_len_sentence and out.item( ) in self.special_tokens_ids: while out.item() in self.special_tokens_ids: out = torch.multinomial(probs, num_samples=1) if out.item() in self.special_tokens_ids: return outputs outputs.append(out.item()) dec_input = out.long().unsqueeze(1) input_feed = dec_output return outputs
def get_bot_response(): global history global personality userText = request.args.get('msg') history.append(tokenizer.encode(userText)) with torch.no_grad(): out_ids = sample_sequence(personality, history, tokenizer, model, args) history.append(out_ids) history = history[-(2 * args.max_history + 1):] out_text = tokenizer.decode(out_ids, skip_special_tokens=True) return out_text
def evaluate_ppl_gpt(args): """ Evaluate on raw text, use this with GPT which has its own tokenizer """ if args.expanded_dataset: path = ".data/stories/story_commonsense/torchtext_expanded" else: path = ".data/stories/story_commonsense/torchtext" # Data test_src = [line.rstrip('\n') for line in open(path + "/test.src")] test_trg = [line.rstrip('\n') for line in open(path + "/test.trg")] # Model enc = GPT2Tokenizer.from_pretrained('gpt2') model = GPT2LMHeadModel.from_pretrained('gpt2') model.to(device) model.eval() loss = 0 batch_size = 1 print("Evaluating test set with GPT2") for i in trange(len(test_src)): src, trg = test_src[i], test_trg[i] context = enc.encode(src) target = enc.encode(trg) length = len(target) # Generate prediction out = utils.sample_sequence(model, length, batch_size=1, context=context, top_k=10, device=device) out = out[:, len(context):] # Get model loss target = torch.tensor([target]).to(device) with torch.no_grad(): #pred, past = model(out) l = model(out, labels=target) loss += float(l) av_loss = loss / len(loss) print(f"ppl: {math.exp(av_loss):.04f}")
def api(): """ Handle request and output model score in json format""" if args == None: initialize() history_text = None # Handle GET requests: if request.method == "GET": if request.args: history_text = request.args.getlist("input") # print(history_text) if history_text is not None: print( f"Received valid request through API - \"input\": {history_text}") else: return jsonify({ "error": "Invalid JSON request. Provide GET request as {\"input\": \"<your dialogue history as list>\"}" }) personality = [] history = [tokenizer.encode(t) for t in history_text] with torch.no_grad(): out_ids = sample_sequence(personality, history, tokenizer, model, args, current_output=None) out_text = tokenizer.decode(out_ids, skip_special_tokens=True) # save_to_db(title, out_text) return jsonify({"history": history_text, "response": out_text})
def train(config): # determine the filename (to be used for saving results, checkpoints, models, etc.) filename = Path(config.txt_file).stem # Initialize the device which to run the model on if config.device == 'cuda': if torch.cuda.is_available(): device = torch.device(config.device) else: device = torch.device('cpu') else: device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset( filename=config.txt_file, seq_length=config.seq_length ) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # get the vocabulary size and int2char and char2int dictionaries for use later VOCAB_SIZE = dataset.vocab_size # Initialize the model that we are going to use model = TextGenerationModel( batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=VOCAB_SIZE, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, device=device, batch_first=config.batch_first, dropout=1.0-config.dropout_keep_prob ) # Setup the loss and optimizer and learning rate scheduler criterion = nn.CrossEntropyLoss() optimizer = optim.Adam( model.parameters(), config.learning_rate ) # Load the latest checkpoint, if any exist checkpoints = list(CHECKPOINTS_DIR.glob(f'{model.__class__.__name__}_{filename}_checkpoint_*.pt')) if len(checkpoints) > 0: # load the latest checkpoint checkpoints.sort(key=os.path.getctime) latest_checkpoint_path = checkpoints[-1] start_step, results, sequences = load_checkpoint(latest_checkpoint_path, model, optimizer) else: # initialize the epoch, results and best_accuracy start_step = 0 results = { 'step': [], 'accuracy': [], 'loss': [], } sequences = { 'step': [], 't': [], 'temperature': [], 'sequence': [] } for step in range(start_step, int(config.train_steps)): # reinitialize the data_loader iterater if we have iterated over all available mini-batches if step % len(data_loader) == 0 or step == start_step: data_iter = iter(data_loader) # get the mini-batch batch_inputs, batch_targets = next(data_iter) # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### # put the model in training mode model.train() # convert the data and send to device X = torch.stack(batch_inputs, dim=1) X = X.to(device) Y = torch.stack(batch_targets, dim=1) Y = Y.to(device) # forward pass the mini-batch Y_out, _ = model.forward(X) Y_pred = Y_out.argmax(dim=-1) # (re)set the optimizer gradient to 0 optimizer.zero_grad() # compute the accuracy and the loss accuracy = get_accuracy(Y_pred, Y) loss = criterion.forward(Y_out.transpose(2, 1), Y) # backwards propogate the loss loss.backward() # clip the gradients (to preven them from exploding) torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) # tune the model parameters optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % config.print_every == 0: print(f'[{datetime.now().strftime("%Y-%m-%d %H:%M")}], Train Step {step:04d}/{int(config.train_steps):04d}, Batch Size = {config.batch_size}, Examples/Sec = {examples_per_second:.2f}, Accuracy = {accuracy:.2f}, Loss = {loss:.3f}') # append the accuracy and loss to the results results['step'].append(step) results['accuracy'].append(accuracy.item()) results['loss'].append(loss.item()) if step % config.sample_every == 0: for T in [20, 30, 60, 120]: for temperature in [0.0, 0.5, 1.0, 2.0]: # Generate some sentences by sampling from the model sequence = sample_sequence( model=model, vocab_size=VOCAB_SIZE, T=T, char=None, temperature=temperature, device=device ) sequence_str = dataset.convert_to_string(sequence) print(f'Generated sample sequence (T={T}, temp={temperature}): {sequence_str}') # append the generated sequence to the sequences sequences['step'].append(step) sequences['t'].append(T) sequences['temperature'].append(temperature) sequences['sequence'].append(sequence_str) if step % config.checkpoint_every == 0: # create a checkpoint create_checkpoint(CHECKPOINTS_DIR, filename, step, model, optimizer, results, sequences) # save the results save_results(RESULTS_DIR, filename, results, sequences, model) # save the model save_model(MODELS_DIR, filename, model) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
avg_loss += loss.item() loss.backward() optimizer.step() global_step += 1 if idx % args.every == 0 and idx > 0: tb_writer.add_scalar("perplexity", math.exp(avg_loss / args.every), global_step) fake_inputs = caption gt_inputs = trg_out.cpu().data.numpy() #samples = model.sample(fake_inputs, tabfeat, caption, highlight_idx, bert) samples = sample_sequence(model, 30, fake_inputs, []) samples = samples[:, caption.shape[1]:] samples = samples.cpu().data.numpy() for s, gt in zip(samples, gt_inputs): text = tokenizer.decode( s, clean_up_tokenization_spaces=True) text = text[:text.find(tokenizer.eos_token)] print("PREDICTION |||||| ", text) text = tokenizer.decode( gt, clean_up_tokenization_spaces=True) text = text[:text.find(tokenizer.eos_token)] print("GROUNDTRUH |||||| ", text) break avg_loss = 0
# print stats p_ = tf.unstack(output['present'], axis=1) for i in range(len(p_)): print('%% Output of stack {}: {}'.format(i, p_[i])) # make loss loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=context[:, 1:], logits=output['logits'][:, :-1])) tf.summary.scalar('loss', loss) print('** loss:', loss) # sample sequence tf_sample = sample_sequence(config=config, length=config.num_context, start_token=sp_model.bos_id(), temprature=config.temprature, top_k=config.top_k) print('** tf_sample:', tf_sample) # train vars and train steps train_vars = [v for v in tf.trainable_variables() if 'model' in v.name] opt = tf.train.AdamOptimizer(config.learning_rate) train_step = opt.minimize(loss, var_list=train_vars) # saver and file writer saver_ = tf.train.Saver(max_to_keep=2) # encoder function def encode(string):
def run(self): self.model.eval() total_bleu = 0 total_f1 = 0 total_dist1 = 0 total_dist2 = 0 total_loss = 0 print('Run eval...') with torch.no_grad(): for batch_idx, feature in enumerate(self.test_iter): utils.feature_to_device(feature, self.device) out, out_lm = self.model(feature) print(self.vocab.itos(out[3, 0].argmax(dim=0).item()), self.vocab.itos(out_lm[3, 0].argmax(dim=0).item())) loss, loss_lm = models.AR.loss(self.out_loss_fn, out, out_lm, feature.resp, feature.lm.y) print(loss, loss_lm) loss = loss + self.model_config.alpha * loss_lm total_loss += loss.item() # target include w1, w2...[EOS], len: max_seq_length + 1 target = copy.deepcopy(feature.resp[1:]) # feature will be changed pred, pred_padded = utils.sample_sequence( feature, self.vocab, self.model, self.args) pred_tokens = [[self.vocab.itos(k) for k in ks] for ks in pred] target_tokens = [[[self.vocab.itos(k) for k in ks]] for ks in target.T.tolist()] print('----------------------------------') print( 'Context: ', ''.join([ self.vocab.itos(k) for k in feature.context.T.tolist()[0] ])) print( 'LM x: ', ''.join([ self.vocab.itos(k) for k in feature.lm.x.T.tolist()[0] ])) print( 'LM y: ', ''.join([ self.vocab.itos(k) for k in feature.lm.y.T.tolist()[0] ])) print( 'Pred: ', ''.join([ self.vocab.itos(k) for k in pred_padded.T.tolist()[0] ])) print('Target: ', ''.join(target_tokens[0][0])) print( 'Pred: ', ''.join([ self.vocab.itos(k) for k in pred_padded.T.tolist()[-1] ])) print('Target: ', ''.join(target_tokens[-1][0])) print('----------------------------------') bleu = metrics.bleu_score(pred_tokens, target_tokens) f1 = metrics.f1_score(pred_padded.T.to('cpu'), target.T.to('cpu')) # dist1 = metrics.distinct_score([v[:-1] for v in pred]) dist1 = metrics.distinct_score(pred_tokens) dist2 = metrics.distinct_score(pred_tokens, 2) total_bleu += bleu total_f1 += f1 total_dist1 += dist1 total_dist2 += dist2 l = len(self.test_iter) bleu = total_bleu / l f1 = total_f1 / l dist1 = total_dist1 / l dist2 = total_dist2 / l # https://stackoverflow.com/questions/59209086/calculate-perplexity-in-pytorch # see per-word perplexity: # https://github.com/huggingface/transfer-learning-conv-ai/blob/master/convai_evaluation.py#L161 # https://github.com/facebookresearch/ParlAI/blob/56d46551190a7ffaedccd13534412d43bc7076e5/parlai/scripts/eval_ppl.py ppl = math.exp(total_loss / l) print(f'\tBleu: {bleu:.8f} | F1: {f1:.8f} | ' f'Dist1: {dist1:.3f} | Dist2: {dist2:.3f} | PPL: {ppl:7.3f}')
loss.backward() optimizer.step() global_step += 1 if args.local_rank in [-1, 0 ] and idx % args.every == 0 and idx > 0: tb_writer.add_scalar("perplexity", math.exp(avg_loss / args.every), global_step) fake_inputs = caption gt_inputs = trg_out.cpu().data.numpy() #samples = model.sample(fake_inputs, tabfeat, caption, highlight_idx, bert) samples = sample_sequence(model, 30, fake_inputs, []) samples = samples[:, caption.shape[1]:] samples = samples.cpu().data.numpy() for s, gt in zip(samples, gt_inputs): text = tokenizer.decode( s, clean_up_tokenization_spaces=True) text = text[:text.find(tokenizer.eos_token)] print("PREDICTION |||||| ", text) text = tokenizer.decode( gt, clean_up_tokenization_spaces=True) text = text[:text.find(tokenizer.eos_token)] print("GROUNDTRUH |||||| ", text) break avg_loss = 0
def test_one_to_one(task_load, task_eval, model, score_dict): logger.info("start to test { task: %s (load) %s (eval), seq train type: %s }" % (task_load, task_eval, args.seq_train_type)) test_qadata = QADataset(TASK_DICT[task_eval]["test"] , "test", SPECIAL_TOKEN_IDS[task_load]).sort() max_a_len = test_qadata.max_a_len test_dataloader = create_dataloader(test_qadata, "test") n_examples = len(test_qadata) logger.info("len of test dataset: {}".format(n_examples)) need_process = OrderedDict() qa_results = [0 for _ in range(n_examples)] all_pasts = [[0 for _ in range(n_examples)] for __ in range(MODEL_CONFIG.n_layer)] max_tot_lens = [0 for _ in range(n_examples)] cnt = 0 for n_steps, (cqs, len_cqs, _, _, _, _, _) in enumerate(test_dataloader): # assume n_gpus == 1 cqs = cqs[0] len_cqs = len_cqs[0] n_inputs = cqs.shape[0] all_outputs = model(input_ids=cqs.cuda()) outputs = all_outputs[0] if args.model_name == "gpt2": pasts = all_outputs[1] next_logits = outputs[range(n_inputs), len_cqs-1, :] / args.temperature_qa next_tokens = logits_to_tokens(next_logits).cpu() for i in range(n_inputs): max_tot_lens[cnt] = max_a_len + test_qadata[cnt][1] qa_results[cnt] = cqs[i][:len_cqs[i]] if next_tokens[i] != SPECIAL_TOKEN_IDS["eos_token"]: qa_results[cnt] = torch.cat((cqs[i][:len_cqs[i]], next_tokens[i])) if len(qa_results[cnt]) not in [max_tot_lens[cnt], args.max_len]: need_process.update([[cnt, None]]) if args.model_name == "gpt2": for layer_id in range(MODEL_CONFIG.n_layer): all_pasts[layer_id][cnt] = pasts[layer_id][:, i, ..., :len_cqs[i], :].type(torch.float32 if args.fp32 else torch.half) cnt += 1 if len(need_process) > int(12 * args.memory_sizes[0] / cqs.shape[1]): # dynamic threshold to avoid out of memory sample_sequence(model, need_process, qa_results, all_pasts, max_tot_lens) sample_sequence(model, need_process, qa_results, all_pasts, max_tot_lens) if task_eval in ['wikisql','woz.en','multinli.in.out']: ids = test_qadata.get_indices() test_qadata.sort_by_index() qa_results = [x[1] for x in sorted([(i, g) for i, g in zip(ids, qa_results)])] for i in range(len(test_qadata)): _, len_cq, _, _, Y, _, _, _ = test_qadata[i] if task_eval in ['wikisql','woz.en']: Y = test_qadata.answers[i] else: Y = list(filter(lambda x: x != -1, Y))[:-1] # remove eos Y = ' '.join([str(y) for y in Y]).split(str(SPECIAL_TOKEN_IDS["pad_token"])) Y = [TOKENIZER.decode(list(map(int, y.split()))) for y in Y] qa_results[i] = [TOKENIZER.decode(qa_results[i].tolist()[len_cq:]), Y] get_test_score(task_eval, qa_results, score_dict) model_dir = model.model_dir ep = model.ep results_path = os.path.join(model_dir,"qa_{}_{}.csv".format(task_eval,ep+1)) if not args.debug: with open(results_path, "w",encoding="utf-8") as f: qa_writer = csv.writer(f,delimiter=',') qa_writer.writerow(["y","pred"]) for pred, y in qa_results: if task_eval == 'wikisql': y = y["answer"] elif task_eval == 'woz.en': y = y[1] qa_writer.writerow([y,pred]) return model, score_dict
def run(): parser = ArgumentParser() parser.add_argument( "--dataset_path", type=str, default="", help="Path or url of the dataset. If empty download from S3.") parser.add_argument( "--dataset_cache", type=str, default='./dataset_cache/dataset_cache_OpenAIGPTTokenizer', help="Path or url of the dataset cache") parser.add_argument("--model_checkpoint", type=str, default="./Model", help="Path, url or short name of the model") parser.add_argument( "--max_history", type=int, default=2, help="Number of previous utterances to keep in history") parser.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu", help="Device (cuda or cpu)") parser.add_argument("--no_sample", action='store_true', help="Set to use greedy decoding instead of sampling") parser.add_argument("--max_length", type=int, default=20, help="Maximum length of the output utterances") parser.add_argument("--min_length", type=int, default=1, help="Minimum length of the output utterances") parser.add_argument("--seed", type=int, default=0, help="Seed") parser.add_argument("--temperature", type=int, default=0.7, help="Sampling softmax temperature") parser.add_argument( "--top_k", type=int, default=0, help="Filter top-k tokens before sampling (<=0: no filtering)") parser.add_argument( "--top_p", type=float, default=0.9, help="Nucleus filtering (top-p) before sampling (<=0.0: no filtering)") args = parser.parse_args() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__file__) logger.info(pformat(args)) if args.seed != 0: random.seed(args.seed) torch.random.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) #Loading model class and tokenizer logger.info("Get pretrained model and tokenizer") tokenizer_class, model_class = OpenAIGPTTokenizer, OpenAIGPTLMHeadModel tokenizer = tokenizer_class.from_pretrained(args.model_checkpoint) model = model_class.from_pretrained(args.model_checkpoint) model.to(args.device) add_special_tokens_(model, tokenizer) logger.info("Sample a personality") dataset = torch.load(args.dataset_cache) personalities = [ dialog["personality"] for dataset in dataset.values() for dialog in dataset ] personality = random.choice(personalities) logger.info("Selected personality: %s", tokenizer.decode(chain(*personality))) history = [] while True: raw_text = input(">>> ") while not raw_text: print('Prompt should not be empty!') raw_text = input(">>> ") history.append(tokenizer.encode(raw_text)) with torch.no_grad(): out_ids = sample_sequence(personality, history, tokenizer, model, args) history.append(out_ids) history = history[-(2 * args.max_history + 1):] out_text = tokenizer.decode(out_ids, skip_special_tokens=True) print(out_text)
avg_loss += loss.item() loss.backward() optimizer.step() global_step += 1 if idx % args.every == 0 and idx > 0: tb_writer.add_scalar("perplexity", math.exp(avg_loss / args.every), global_step) fake_inputs = caption gt_inputs = trg_out.cpu().data.numpy() samples = sample_sequence(model, 50, fake_inputs, []) samples = samples[:, caption.shape[1]:] samples = samples.cpu().data.numpy() for s, gt in zip(samples, gt_inputs): print("EPOCH {}; FINISHED {}/{}".format( epoch_idx, idx, dataset.train_len())) text = tokenizer.decode( s, clean_up_tokenization_spaces=True) text = text[:text.find(tokenizer.eos_token)] print("PREDICTION |||||| ", text) text = tokenizer.decode( gt, clean_up_tokenization_spaces=True) text = text[:text.find(tokenizer.eos_token)] print("GROUNDTRUH |||||| ", text) break