Example #1
0
    def top_k_top_p_decode(self, dec_hidden, enc_out):
        batch_size = enc_out.size(0)
        outputs = []

        dec_input = torch.zeros(enc_out.size(0),
                                1).fill_(2).long().to(self.device)
        input_feed = torch.zeros(batch_size,
                                 1,
                                 enc_out.size(2),
                                 device=self.device)

        for t in range(0, self.max_len_sentence):
            dec_input = self.embedding(dec_input)  # (batch size, 1, emb dim)
            dec_input = torch.cat((dec_input, input_feed), 2)

            dec_output, dec_hidden = self.decode_rnn(dec_input, dec_hidden,
                                                     enc_out)

            out = self.gen(dec_output)

            out, probs = sample_sequence(out, self.top_k, self.top_p,
                                         self.temperature, False)
            if t < self.min_len_sentence and out.item(
            ) in self.special_tokens_ids:
                while out.item() in self.special_tokens_ids:
                    out = torch.multinomial(probs, num_samples=1)

            if out.item() in self.special_tokens_ids:
                return outputs
            outputs.append(out.item())
            dec_input = out.long().unsqueeze(1)
            input_feed = dec_output

        return outputs
Example #2
0
def get_bot_response():
    global history
    global personality
    userText = request.args.get('msg')
    history.append(tokenizer.encode(userText))
    with torch.no_grad():
        out_ids = sample_sequence(personality, history, tokenizer, model, args)
    history.append(out_ids)
    history = history[-(2 * args.max_history + 1):]
    out_text = tokenizer.decode(out_ids, skip_special_tokens=True)
    return out_text
Example #3
0
def evaluate_ppl_gpt(args):
    """
    Evaluate on raw text, use this with GPT which has its own tokenizer
    """
    if args.expanded_dataset:
        path = ".data/stories/story_commonsense/torchtext_expanded"
    else:
        path = ".data/stories/story_commonsense/torchtext"
    # Data
    test_src = [line.rstrip('\n') for line in open(path + "/test.src")]
    test_trg = [line.rstrip('\n') for line in open(path + "/test.trg")]

    # Model
    enc = GPT2Tokenizer.from_pretrained('gpt2')
    model = GPT2LMHeadModel.from_pretrained('gpt2')
    model.to(device)
    model.eval()
    loss = 0
    batch_size = 1

    print("Evaluating test set with GPT2")
    for i in trange(len(test_src)):
        src, trg = test_src[i], test_trg[i]
        context = enc.encode(src)
        target = enc.encode(trg)
        length = len(target)

        # Generate prediction
        out = utils.sample_sequence(model,
                                    length,
                                    batch_size=1,
                                    context=context,
                                    top_k=10,
                                    device=device)
        out = out[:, len(context):]

        # Get model loss
        target = torch.tensor([target]).to(device)
        with torch.no_grad():
            #pred, past  = model(out)
            l = model(out, labels=target)
            loss += float(l)
    av_loss = loss / len(loss)
    print(f"ppl: {math.exp(av_loss):.04f}")
Example #4
0
def api():
    """ Handle request and output model score in json format"""
    if args == None:
        initialize()

    history_text = None

    # Handle GET requests:
    if request.method == "GET":
        if request.args:
            history_text = request.args.getlist("input")
            # print(history_text)

    if history_text is not None:
        print(
            f"Received valid request through API - \"input\": {history_text}")
    else:
        return jsonify({
            "error":
            "Invalid JSON request. Provide GET request as {\"input\": \"<your dialogue history as list>\"}"
        })

    personality = []
    history = [tokenizer.encode(t) for t in history_text]
    with torch.no_grad():
        out_ids = sample_sequence(personality,
                                  history,
                                  tokenizer,
                                  model,
                                  args,
                                  current_output=None)
    out_text = tokenizer.decode(out_ids, skip_special_tokens=True)

    # save_to_db(title, out_text)

    return jsonify({"history": history_text, "response": out_text})
Example #5
0
def train(config):
    # determine the filename (to be used for saving results, checkpoints, models, etc.)
    filename = Path(config.txt_file).stem

    # Initialize the device which to run the model on
    if config.device == 'cuda':
        if torch.cuda.is_available():
            device = torch.device(config.device)
        else:
            device = torch.device('cpu')
    else:
        device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(
        filename=config.txt_file,
        seq_length=config.seq_length
    )
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # get the vocabulary size and int2char and char2int dictionaries for use later
    VOCAB_SIZE = dataset.vocab_size

    # Initialize the model that we are going to use
    model = TextGenerationModel(
        batch_size=config.batch_size,
        seq_length=config.seq_length,
        vocabulary_size=VOCAB_SIZE,
        lstm_num_hidden=config.lstm_num_hidden,
        lstm_num_layers=config.lstm_num_layers,
        device=device,
        batch_first=config.batch_first,
        dropout=1.0-config.dropout_keep_prob
    )

    # Setup the loss and optimizer and learning rate scheduler
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(
        model.parameters(),
        config.learning_rate
    )

    # Load the latest checkpoint, if any exist
    checkpoints = list(CHECKPOINTS_DIR.glob(f'{model.__class__.__name__}_{filename}_checkpoint_*.pt'))
    if len(checkpoints) > 0:
        # load the latest checkpoint
        checkpoints.sort(key=os.path.getctime)
        latest_checkpoint_path = checkpoints[-1]
        start_step, results, sequences = load_checkpoint(latest_checkpoint_path, model, optimizer)
    else:
         # initialize the epoch, results and best_accuracy
        start_step = 0
        results = {
            'step': [],
            'accuracy': [],
            'loss': [],
        }
        sequences = {
            'step': [],
            't': [],
            'temperature': [],
            'sequence': []
        }

    for step in range(start_step, int(config.train_steps)):
        # reinitialize the data_loader iterater if we have iterated over all available mini-batches
        if step % len(data_loader) == 0 or step == start_step:
            data_iter = iter(data_loader)
        
        # get the mini-batch
        batch_inputs, batch_targets = next(data_iter)

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################
        # Add more code here ...
        #######################################################

        # put the model in training mode
        model.train()

        # convert the data and send to device
        X = torch.stack(batch_inputs, dim=1)
        X = X.to(device)

        Y = torch.stack(batch_targets, dim=1)
        Y = Y.to(device)

        # forward pass the mini-batch
        Y_out, _ = model.forward(X)
        Y_pred = Y_out.argmax(dim=-1)

        # (re)set the optimizer gradient to 0
        optimizer.zero_grad()

        # compute the accuracy and the loss
        accuracy = get_accuracy(Y_pred, Y)
        loss = criterion.forward(Y_out.transpose(2, 1), Y)

        # backwards propogate the loss
        loss.backward()

        # clip the gradients (to preven them from exploding)
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)

        # tune the model parameters
        optimizer.step()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        if step % config.print_every == 0:
            print(f'[{datetime.now().strftime("%Y-%m-%d %H:%M")}], Train Step {step:04d}/{int(config.train_steps):04d}, Batch Size = {config.batch_size}, Examples/Sec = {examples_per_second:.2f}, Accuracy = {accuracy:.2f}, Loss = {loss:.3f}')

            # append the accuracy and loss to the results
            results['step'].append(step)
            results['accuracy'].append(accuracy.item())
            results['loss'].append(loss.item())

        if step % config.sample_every == 0:
            for T in [20, 30, 60, 120]:
                for temperature in [0.0, 0.5, 1.0, 2.0]:
                    # Generate some sentences by sampling from the model
                    sequence = sample_sequence(
                        model=model,
                        vocab_size=VOCAB_SIZE,
                        T=T,
                        char=None,
                        temperature=temperature,
                        device=device
                    )
                    sequence_str = dataset.convert_to_string(sequence)
                    print(f'Generated sample sequence (T={T}, temp={temperature}): {sequence_str}')

                    # append the generated sequence to the sequences
                    sequences['step'].append(step)
                    sequences['t'].append(T)
                    sequences['temperature'].append(temperature)
                    sequences['sequence'].append(sequence_str)

        if step % config.checkpoint_every == 0:
            # create a checkpoint
            create_checkpoint(CHECKPOINTS_DIR, filename, step, model, optimizer, results, sequences)

            # save the results
            save_results(RESULTS_DIR, filename, results, sequences, model)

            # save the model
            save_model(MODELS_DIR, filename, model)

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
                avg_loss += loss.item()

                loss.backward()
                optimizer.step()
                global_step += 1

                if idx % args.every == 0 and idx > 0:
                    tb_writer.add_scalar("perplexity",
                                         math.exp(avg_loss / args.every),
                                         global_step)

                    fake_inputs = caption
                    gt_inputs = trg_out.cpu().data.numpy()

                    #samples = model.sample(fake_inputs, tabfeat, caption, highlight_idx, bert)
                    samples = sample_sequence(model, 30, fake_inputs, [])
                    samples = samples[:, caption.shape[1]:]
                    samples = samples.cpu().data.numpy()

                    for s, gt in zip(samples, gt_inputs):
                        text = tokenizer.decode(
                            s, clean_up_tokenization_spaces=True)
                        text = text[:text.find(tokenizer.eos_token)]
                        print("PREDICTION |||||| ", text)
                        text = tokenizer.decode(
                            gt, clean_up_tokenization_spaces=True)
                        text = text[:text.find(tokenizer.eos_token)]
                        print("GROUNDTRUH |||||| ", text)
                        break

                    avg_loss = 0
Example #7
0
# print stats
p_ = tf.unstack(output['present'], axis=1)
for i in range(len(p_)):
    print('%% Output of stack {}: {}'.format(i, p_[i]))

# make loss
loss = tf.reduce_mean(
    tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=context[:, 1:], logits=output['logits'][:, :-1]))
tf.summary.scalar('loss', loss)
print('** loss:', loss)

# sample sequence
tf_sample = sample_sequence(config=config,
                            length=config.num_context,
                            start_token=sp_model.bos_id(),
                            temprature=config.temprature,
                            top_k=config.top_k)
print('** tf_sample:', tf_sample)

# train vars and train steps
train_vars = [v for v in tf.trainable_variables() if 'model' in v.name]
opt = tf.train.AdamOptimizer(config.learning_rate)
train_step = opt.minimize(loss, var_list=train_vars)

# saver and file writer
saver_ = tf.train.Saver(max_to_keep=2)


# encoder function
def encode(string):
Example #8
0
    def run(self):
        self.model.eval()

        total_bleu = 0
        total_f1 = 0
        total_dist1 = 0
        total_dist2 = 0
        total_loss = 0

        print('Run eval...')
        with torch.no_grad():
            for batch_idx, feature in enumerate(self.test_iter):
                utils.feature_to_device(feature, self.device)

                out, out_lm = self.model(feature)
                print(self.vocab.itos(out[3, 0].argmax(dim=0).item()),
                      self.vocab.itos(out_lm[3, 0].argmax(dim=0).item()))
                loss, loss_lm = models.AR.loss(self.out_loss_fn, out, out_lm,
                                               feature.resp, feature.lm.y)
                print(loss, loss_lm)
                loss = loss + self.model_config.alpha * loss_lm
                total_loss += loss.item()

                # target include w1, w2...[EOS], len: max_seq_length + 1
                target = copy.deepcopy(feature.resp[1:])
                # feature will be changed
                pred, pred_padded = utils.sample_sequence(
                    feature, self.vocab, self.model, self.args)

                pred_tokens = [[self.vocab.itos(k) for k in ks] for ks in pred]
                target_tokens = [[[self.vocab.itos(k) for k in ks]]
                                 for ks in target.T.tolist()]
                print('----------------------------------')
                print(
                    'Context: ', ''.join([
                        self.vocab.itos(k)
                        for k in feature.context.T.tolist()[0]
                    ]))
                print(
                    'LM x: ', ''.join([
                        self.vocab.itos(k) for k in feature.lm.x.T.tolist()[0]
                    ]))
                print(
                    'LM y: ', ''.join([
                        self.vocab.itos(k) for k in feature.lm.y.T.tolist()[0]
                    ]))
                print(
                    'Pred: ', ''.join([
                        self.vocab.itos(k) for k in pred_padded.T.tolist()[0]
                    ]))
                print('Target: ', ''.join(target_tokens[0][0]))
                print(
                    'Pred: ', ''.join([
                        self.vocab.itos(k) for k in pred_padded.T.tolist()[-1]
                    ]))
                print('Target: ', ''.join(target_tokens[-1][0]))
                print('----------------------------------')
                bleu = metrics.bleu_score(pred_tokens, target_tokens)
                f1 = metrics.f1_score(pred_padded.T.to('cpu'),
                                      target.T.to('cpu'))
                # dist1 = metrics.distinct_score([v[:-1] for v in pred])
                dist1 = metrics.distinct_score(pred_tokens)
                dist2 = metrics.distinct_score(pred_tokens, 2)

                total_bleu += bleu
                total_f1 += f1
                total_dist1 += dist1
                total_dist2 += dist2

        l = len(self.test_iter)
        bleu = total_bleu / l
        f1 = total_f1 / l
        dist1 = total_dist1 / l
        dist2 = total_dist2 / l
        # https://stackoverflow.com/questions/59209086/calculate-perplexity-in-pytorch
        # see per-word perplexity:
        # https://github.com/huggingface/transfer-learning-conv-ai/blob/master/convai_evaluation.py#L161
        # https://github.com/facebookresearch/ParlAI/blob/56d46551190a7ffaedccd13534412d43bc7076e5/parlai/scripts/eval_ppl.py
        ppl = math.exp(total_loss / l)

        print(f'\tBleu: {bleu:.8f} | F1: {f1:.8f} | '
              f'Dist1: {dist1:.3f} | Dist2: {dist2:.3f} | PPL: {ppl:7.3f}')
Example #9
0
                loss.backward()
                optimizer.step()
                global_step += 1

                if args.local_rank in [-1, 0
                                       ] and idx % args.every == 0 and idx > 0:
                    tb_writer.add_scalar("perplexity",
                                         math.exp(avg_loss / args.every),
                                         global_step)

                    fake_inputs = caption
                    gt_inputs = trg_out.cpu().data.numpy()

                    #samples = model.sample(fake_inputs, tabfeat, caption, highlight_idx, bert)
                    samples = sample_sequence(model, 30, fake_inputs, [])
                    samples = samples[:, caption.shape[1]:]
                    samples = samples.cpu().data.numpy()

                    for s, gt in zip(samples, gt_inputs):
                        text = tokenizer.decode(
                            s, clean_up_tokenization_spaces=True)
                        text = text[:text.find(tokenizer.eos_token)]
                        print("PREDICTION |||||| ", text)
                        text = tokenizer.decode(
                            gt, clean_up_tokenization_spaces=True)
                        text = text[:text.find(tokenizer.eos_token)]
                        print("GROUNDTRUH |||||| ", text)
                        break

                    avg_loss = 0
Example #10
0
def test_one_to_one(task_load, task_eval, model, score_dict):

    logger.info("start to test { task: %s (load) %s (eval), seq train type: %s }" % (task_load, task_eval, args.seq_train_type))

    test_qadata = QADataset(TASK_DICT[task_eval]["test"] , "test", SPECIAL_TOKEN_IDS[task_load]).sort()
    max_a_len = test_qadata.max_a_len
    test_dataloader = create_dataloader(test_qadata, "test")
    n_examples = len(test_qadata)
    logger.info("len of test dataset: {}".format(n_examples))

    need_process = OrderedDict()
    qa_results = [0 for _ in range(n_examples)]
    all_pasts = [[0 for _ in range(n_examples)] for __ in range(MODEL_CONFIG.n_layer)]
    max_tot_lens = [0 for _ in range(n_examples)]

    cnt = 0
    for n_steps, (cqs, len_cqs, _, _, _, _, _) in enumerate(test_dataloader):
        # assume n_gpus == 1
        cqs = cqs[0]
        len_cqs = len_cqs[0]
        n_inputs = cqs.shape[0]
        all_outputs = model(input_ids=cqs.cuda())
        outputs = all_outputs[0]
        if args.model_name == "gpt2":
            pasts = all_outputs[1]
        next_logits = outputs[range(n_inputs), len_cqs-1, :] / args.temperature_qa
        next_tokens = logits_to_tokens(next_logits).cpu()

        for i in range(n_inputs):
            max_tot_lens[cnt] = max_a_len + test_qadata[cnt][1]
            qa_results[cnt] = cqs[i][:len_cqs[i]]
            if next_tokens[i] != SPECIAL_TOKEN_IDS["eos_token"]:
                qa_results[cnt] = torch.cat((cqs[i][:len_cqs[i]], next_tokens[i]))
                if len(qa_results[cnt]) not in [max_tot_lens[cnt], args.max_len]:
                    need_process.update([[cnt, None]])
                    if args.model_name == "gpt2":
                        for layer_id in range(MODEL_CONFIG.n_layer):
                            all_pasts[layer_id][cnt] = pasts[layer_id][:, i, ..., :len_cqs[i], :].type(torch.float32 if args.fp32 else torch.half)
            cnt += 1

        if len(need_process) > int(12 * args.memory_sizes[0] / cqs.shape[1]):  # dynamic threshold to avoid out of memory
            sample_sequence(model, need_process, qa_results, all_pasts, max_tot_lens)
    sample_sequence(model, need_process, qa_results, all_pasts, max_tot_lens)

    if task_eval in ['wikisql','woz.en','multinli.in.out']:
        ids = test_qadata.get_indices()
        test_qadata.sort_by_index()
        qa_results = [x[1] for x in sorted([(i, g) for i, g in zip(ids, qa_results)])]
    for i in range(len(test_qadata)):
        _, len_cq, _, _, Y, _, _, _ = test_qadata[i]
        if task_eval in ['wikisql','woz.en']:
            Y = test_qadata.answers[i]
        else:
            Y = list(filter(lambda x: x != -1, Y))[:-1]  # remove eos
            Y = ' '.join([str(y) for y in Y]).split(str(SPECIAL_TOKEN_IDS["pad_token"]))
            Y = [TOKENIZER.decode(list(map(int, y.split()))) for y in Y]
        qa_results[i] = [TOKENIZER.decode(qa_results[i].tolist()[len_cq:]), Y]
    get_test_score(task_eval, qa_results, score_dict)

    model_dir = model.model_dir
    ep = model.ep
    results_path = os.path.join(model_dir,"qa_{}_{}.csv".format(task_eval,ep+1))
    if not args.debug:
        with open(results_path, "w",encoding="utf-8") as f:
            qa_writer = csv.writer(f,delimiter=',')
            qa_writer.writerow(["y","pred"])
            for pred, y in qa_results:
                if task_eval == 'wikisql': 
                    y = y["answer"]
                elif task_eval == 'woz.en': 
                    y = y[1]
                qa_writer.writerow([y,pred])

    return model, score_dict
def run():
    parser = ArgumentParser()
    parser.add_argument(
        "--dataset_path",
        type=str,
        default="",
        help="Path or url of the dataset. If empty download from S3.")
    parser.add_argument(
        "--dataset_cache",
        type=str,
        default='./dataset_cache/dataset_cache_OpenAIGPTTokenizer',
        help="Path or url of the dataset cache")
    parser.add_argument("--model_checkpoint",
                        type=str,
                        default="./Model",
                        help="Path, url or short name of the model")
    parser.add_argument(
        "--max_history",
        type=int,
        default=2,
        help="Number of previous utterances to keep in history")
    parser.add_argument("--device",
                        type=str,
                        default="cuda" if torch.cuda.is_available() else "cpu",
                        help="Device (cuda or cpu)")

    parser.add_argument("--no_sample",
                        action='store_true',
                        help="Set to use greedy decoding instead of sampling")
    parser.add_argument("--max_length",
                        type=int,
                        default=20,
                        help="Maximum length of the output utterances")
    parser.add_argument("--min_length",
                        type=int,
                        default=1,
                        help="Minimum length of the output utterances")
    parser.add_argument("--seed", type=int, default=0, help="Seed")
    parser.add_argument("--temperature",
                        type=int,
                        default=0.7,
                        help="Sampling softmax temperature")
    parser.add_argument(
        "--top_k",
        type=int,
        default=0,
        help="Filter top-k tokens before sampling (<=0: no filtering)")
    parser.add_argument(
        "--top_p",
        type=float,
        default=0.9,
        help="Nucleus filtering (top-p) before sampling (<=0.0: no filtering)")
    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__file__)
    logger.info(pformat(args))

    if args.seed != 0:
        random.seed(args.seed)
        torch.random.manual_seed(args.seed)
        torch.cuda.manual_seed(args.seed)

    #Loading model class and tokenizer
    logger.info("Get pretrained model and tokenizer")
    tokenizer_class, model_class = OpenAIGPTTokenizer, OpenAIGPTLMHeadModel
    tokenizer = tokenizer_class.from_pretrained(args.model_checkpoint)
    model = model_class.from_pretrained(args.model_checkpoint)
    model.to(args.device)
    add_special_tokens_(model, tokenizer)

    logger.info("Sample a personality")
    dataset = torch.load(args.dataset_cache)
    personalities = [
        dialog["personality"] for dataset in dataset.values()
        for dialog in dataset
    ]
    personality = random.choice(personalities)
    logger.info("Selected personality: %s",
                tokenizer.decode(chain(*personality)))

    history = []
    while True:
        raw_text = input(">>> ")
        while not raw_text:
            print('Prompt should not be empty!')
            raw_text = input(">>> ")
        history.append(tokenizer.encode(raw_text))
        with torch.no_grad():
            out_ids = sample_sequence(personality, history, tokenizer, model,
                                      args)
        history.append(out_ids)
        history = history[-(2 * args.max_history + 1):]
        out_text = tokenizer.decode(out_ids, skip_special_tokens=True)
        print(out_text)
                avg_loss += loss.item()

                loss.backward()
                optimizer.step()
                global_step += 1

                if idx % args.every == 0 and idx > 0:
                    tb_writer.add_scalar("perplexity",
                                         math.exp(avg_loss / args.every),
                                         global_step)

                    fake_inputs = caption
                    gt_inputs = trg_out.cpu().data.numpy()

                    samples = sample_sequence(model, 50, fake_inputs, [])
                    samples = samples[:, caption.shape[1]:]
                    samples = samples.cpu().data.numpy()

                    for s, gt in zip(samples, gt_inputs):
                        print("EPOCH {}; FINISHED {}/{}".format(
                            epoch_idx, idx, dataset.train_len()))
                        text = tokenizer.decode(
                            s, clean_up_tokenization_spaces=True)
                        text = text[:text.find(tokenizer.eos_token)]
                        print("PREDICTION |||||| ", text)
                        text = tokenizer.decode(
                            gt, clean_up_tokenization_spaces=True)
                        text = text[:text.find(tokenizer.eos_token)]
                        print("GROUNDTRUH |||||| ", text)
                        break