Beispiel #1
0
    def __init__(self, args):
        self.args = args
        self.device = torch.device("cuda:{}".format(self.args.GPU) if torch.
                                   cuda.is_available() else "cpu")
        self.data = READ(self.args)
        glove = self.data.WORD.vocab.vectors
        char_size = len(self.data.CHAR.vocab)

        self.model = BiDAF(self.args, char_size, glove).to(self.device)
        self.optimizer = optim.Adadelta(self.model.parameters(),
                                        lr=self.args.Learning_Rate)
        self.ema = EMA(self.args.Exp_Decay_Rate)

        if APEX_AVAILABLE:  # Mixed Precision
            self.model, self.optimizer = amp.initialize(self.model,
                                                        self.optimizer,
                                                        opt_level='O2')

        for name, param in self.model.named_parameters():
            if param.requires_grad:
                self.ema.register(name, param.data)

        self.parameters = filter(lambda p: p.requires_grad,
                                 self.model.parameters())
Beispiel #2
0
    def onSubmit():
        bidef = model.BiDAF(400, 250, 20)
        file_name = f'../model/bidaf250_3{str(var.get())}.h5'
        bidef.load_bidaf(file_name)
        global answer_level
        global button_forward
        global button_back
        answer_level.grid_forget()
        submit_str.set('Predicting....')
        context = context_area.get("1.0", END)
        question = question_area.get("1.0", END)
        if len(context) <= 1:
            messagebox.showwarning('Question Answering App',
                                   'Context Field is Empty!')
            submit_str.set('Predict Answer')
            return
        if len(question) <= 1:
            messagebox.showwarning('Question Answering App',
                                   'Question Field is Empty!')
            submit_str.set('Predict Answer')
            return

        process = Preprocess(context, question)
        c, q = process.processForModel()
        p1, p2 = bidef.predict(c, q)
        context = process.preprocess(context)
        answers = PostProcess(context, p1, p2).postProcess()
        answer_list[0] = answers
        answer_level = get_level(root, answers[0], 6, 0, 3, 14)
        print(answers)
        submit_str.set('Predict Answer')
        status = get_level(root, f'Answer 1 of {len(answers)}', 7, 0, 3, 14)
        button_forward = Button(root, text=">>", command=lambda: forward(2))
        button_back = Button(root, text="<<", command=back, state=DISABLED)
        button_back.grid(row=8, column=0, padx=5, pady=10)
        button_forward.grid(row=8, column=2, padx=5, pady=10)
Beispiel #3
0
def train_val_model(pipeline_cfg, model_cfg, train_cfg):
    data_pipeline = DataPipeline(**pipeline_cfg)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if model_cfg['cxt_emb_pretrained'] is not None:
        model_cfg['cxt_emb_pretrained'] = torch.load(
            model_cfg['cxt_emb_pretrained'])
    bidaf = BiDAF(word_emb=data_pipeline.word_type.vocab.vectors, **model_cfg)
    ema = EMA(train_cfg['exp_decay_rate'])
    for name, param in bidaf.named_parameters():
        if param.requires_grad:
            ema.register(name, param.data)
    parameters = filter(lambda p: p.requires_grad, bidaf.parameters())
    optimizer = optim.Adadelta(parameters, lr=train_cfg['lr'])
    criterion = nn.CrossEntropyLoss()

    result = {'best_f1': 0.0, 'best_model': None}

    num_epochs = train_cfg['num_epochs']
    for epoch in range(1, num_epochs + 1):
        print('Epoch {}/{}'.format(epoch, num_epochs))
        print('-' * 10)
        for phase in ['train', 'val']:
            val_answers = dict()
            val_f1 = 0
            val_em = 0
            val_cnt = 0
            val_r = 0

            if phase == 'train':
                bidaf.train()
            else:
                bidaf.eval()
                backup_params = EMA(0)
                for name, param in bidaf.named_parameters():
                    if param.requires_grad:
                        backup_params.register(name, param.data)
                        param.data.copy_(ema.get(name))

            with torch.set_grad_enabled(phase == 'train'):
                for batch_num, batch in enumerate(
                        data_pipeline.data_iterators[phase]):
                    optimizer.zero_grad()
                    p1, p2 = bidaf(batch)
                    loss = criterion(p1, batch.s_idx) + criterion(
                        p2, batch.e_idx)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        for name, param in bidaf.named_parameters():
                            if param.requires_grad:
                                ema.update(name, param.data)
                        if batch_num % train_cfg['batch_per_disp'] == 0:
                            batch_loss = loss.item()
                            print('batch %d: loss %.3f' %
                                  (batch_num, batch_loss))

                    if phase == 'val':
                        batch_size, c_len = p1.size()
                        val_cnt += batch_size
                        ls = nn.LogSoftmax(dim=1)
                        mask = (torch.ones(c_len, c_len) * float('-inf')).to(device).tril(-1). \
                            unsqueeze(0).expand(batch_size, -1, -1)
                        score = (ls(p1).unsqueeze(2) +
                                 ls(p2).unsqueeze(1)) + mask
                        score, s_idx = score.max(dim=1)
                        score, e_idx = score.max(dim=1)
                        s_idx = torch.gather(s_idx, 1,
                                             e_idx.view(-1, 1)).squeeze()

                        for i in range(batch_size):
                            answer = (s_idx[i], e_idx[i])
                            gt = (batch.s_idx[i], batch.e_idx[i])
                            val_f1 += f1_score(answer, gt)
                            val_em += exact_match_score(answer, gt)
                            val_r += r_score(answer, gt)

            if phase == 'val':
                val_f1 = val_f1 * 100 / val_cnt
                val_em = val_em * 100 / val_cnt
                val_r = val_r * 100 / val_cnt
                print('Epoch %d: %s f1 %.3f | %s em %.3f |  %s rouge %.3f' %
                      (epoch, phase, val_f1, phase, val_em, phase, val_r))
                if val_f1 > result['best_f1']:
                    result['best_f1'] = val_f1
                    result['best_em'] = val_em
                    result['best_model'] = copy.deepcopy(bidaf.state_dict())
                    torch.save(result, train_cfg['ckpoint_file'])
                    # with open(train_cfg['val_answers'], 'w', encoding='utf-8') as f:
                    #     print(json.dumps(val_answers), file=f)
                for name, param in bidaf.named_parameters():
                    if param.requires_grad:
                        param.data.copy_(backup_params.get(name))
Beispiel #4
0
class SOLVER():
    def __init__(self, args):
        self.args = args
        self.device = torch.device("cuda:{}".format(self.args.GPU) if torch.
                                   cuda.is_available() else "cpu")
        self.data = READ(self.args)
        glove = self.data.WORD.vocab.vectors
        char_size = len(self.data.CHAR.vocab)

        self.model = BiDAF(self.args, char_size, glove).to(self.device)
        self.optimizer = optim.Adadelta(self.model.parameters(),
                                        lr=self.args.Learning_Rate)
        self.ema = EMA(self.args.Exp_Decay_Rate)

        if APEX_AVAILABLE:  # Mixed Precision
            self.model, self.optimizer = amp.initialize(self.model,
                                                        self.optimizer,
                                                        opt_level='O2')

        for name, param in self.model.named_parameters():
            if param.requires_grad:
                self.ema.register(name, param.data)

        self.parameters = filter(lambda p: p.requires_grad,
                                 self.model.parameters())

    def train(self):

        criterion = nn.NLLLoss()
        criterion = criterion.to(self.device)

        self.model.train()

        max_dev_em, max_dev_f1 = -1, -1
        num_batches = len(self.data.train_iter)

        logging.info("Begin Training")

        self.model.zero_grad()

        loss = 0.0

        for epoch in range(self.args.Epoch):

            self.model.train()

            for i, batch in enumerate(self.data.train_iter):

                i += 1
                p1, p2 = self.model(batch)
                batch_loss = criterion(
                    p1, batch.start_idx.to(self.device)) + criterion(
                        p2, batch.end_idx.to(self.device))

                if APEX_AVAILABLE:
                    with amp.scale_loss(batch_loss,
                                        self.optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    batch_loss.backward()
                loss = batch_loss.item()

                self.optimizer.step()
                del p1, p2, batch_loss

                for name, param in self.model.named_parameters():
                    if param.requires_grad:
                        self.ema.update(name, param.data)

                self.model.zero_grad()

                logging.info("Epoch [{}/{}] Step [{}/{}] Train Loss {}".format(epoch+1, self.args.Epoch, \
                                                                               i, int(num_batches) +1, round(loss,3)))

                if epoch > 7:
                    if i % 100 == 0:
                        dev_em, dev_f1 = self.evaluate()
                        logging.info("Epoch [{}/{}] Dev EM {} Dev F1 {}".format(epoch + 1, self.args.Epoch, \
                                                                                        round(dev_em,3), round(dev_f1,3)))
                        self.model.train()

                        if dev_f1 > max_dev_f1:
                            max_dev_f1 = dev_f1
                            max_dev_em = dev_em

            dev_em, dev_f1 = self.evaluate()
            logging.info("Epoch [{}/{}] Dev EM {} Dev F1 {}".format(epoch + 1, self.args.Epoch, \
                                                                               round(dev_em,3), round(dev_f1,3)))
            self.model.train()

            if dev_f1 > max_dev_f1:
                max_dev_f1 = dev_f1
                max_dev_em = dev_em

        logging.info('Max Dev EM: {} Max Dev F1: {}'.format(
            round(max_dev_em, 3), round(max_dev_f1, 3)))

    def evaluate(self):

        logging.info("Evaluating on Dev Dataset")
        answers = dict()

        self.model.eval()

        temp_ema = EMA(0)

        for name, param in self.model.named_parameters():
            if param.requires_grad:
                temp_ema.register(name, param.data)
                param.data.copy_(self.ema.get(name))

        with torch.no_grad():
            for _, batch in enumerate(self.data.dev_iter):

                p1, p2 = self.model(batch)
                batch_size, _ = p1.size()

                _, s_idx = p1.max(dim=1)
                _, e_idx = p2.max(dim=1)

                for i in range(batch_size):
                    qid = batch.qid[i]
                    answer = batch.c_word[0][i][s_idx[i]:e_idx[i] + 1]
                    answer = ' '.join(
                        [self.data.WORD.vocab.itos[idx] for idx in answer])
                    answers[qid] = answer

            for name, param in self.model.named_parameters():
                if param.requires_grad:
                    param.data.copy_(temp_ema.get(name))

        results = evaluate(self.args, answers)

        return results['exact_match'], results['f1']
Beispiel #5
0
def train_bidaf(config, data):
    # train BiDAF model
    device = torch.device(f"cuda:{0}" if torch.cuda.is_available() else "cpu")
    model_bidaf = BiDAF(config, train_data)
    ema = EMA(args.exp_decay_rate)
    for name, param in model.named_parameters():
        if param.requires_grad:
            ema.register(name, param.data)
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adadelta(parameters, lr=args.learning_rate)
    criterion = nn.CrossEntropyLoss()

    writer = SummaryWriter(log_dir='runs/' + args.model_time)

    model.train()
    loss, last_epoch = 0, -1
    max_dev_exact, max_dev_f1 = -1, -1

    iterator = data.train_iter
    for i, batch in enumerate(iterator):
        present_epoch = int(iterator.epoch)
        if present_epoch == args.epoch:
            break
        if present_epoch > last_epoch:
            print('epoch:', present_epoch + 1)
        last_epoch = present_epoch

        p1, p2 = model(batch)

        optimizer.zero_grad()
        batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx)
        loss += batch_loss.item()
        batch_loss.backward()
        optimizer.step()

        for name, param in model.named_parameters():
            if param.requires_grad:
                ema.update(name, param.data)

        if (i + 1) % args.print_freq == 0:
            dev_loss, dev_exact, dev_f1 = test(model, ema, args, data)
            c = (i + 1) // args.print_freq

            writer.add_scalar('loss/train', loss, c)
            writer.add_scalar('loss/dev', dev_loss, c)
            writer.add_scalar('exact_match/dev', dev_exact, c)
            writer.add_scalar('f1/dev', dev_f1, c)
            print(f'train loss: {loss:.3f} / dev loss: {dev_loss:.3f}'
                  f' / dev EM: {dev_exact:.3f} / dev F1: {dev_f1:.3f}')

            if dev_f1 > max_dev_f1:
                max_dev_f1 = dev_f1
                max_dev_exact = dev_exact
                best_model_bidaf = copy.deepcopy(model)

            loss = 0
            model.train()

    writer.close()
    print(f'max dev EM: {max_dev_exact:.3f} / max dev F1: {max_dev_f1:.3f}')

    return best_model_bidaf
train_dataloader = DataLoader(train_dataset,
                              shuffle=True,
                              batch_size=hyper_params["batch_size"],
                              num_workers=4)

valid_dataloader = DataLoader(valid_dataset,
                              shuffle=True,
                              batch_size=hyper_params["batch_size"],
                              num_workers=4)

print("Length of training data loader is:", len(train_dataloader))
print("Length of valid data loader is:", len(valid_dataloader))

# load the model
model = BiDAF(word_vectors=word_embedding_matrix,
              char_vectors=char_embedding_matrix,
              hidden_size=hyper_params["hidden_size"],
              drop_prob=hyper_params["drop_prob"])
if hyper_params["pretrained"]:
    model.load_state_dict(torch.load(os.path.join(experiment_path, "model.pkl"))["state_dict"])
model.to(device)

# define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adadelta(model.parameters(), hyper_params["learning_rate"], weight_decay=1e-4)

# best loss so far
if hyper_params["pretrained"]:
    best_valid_loss = torch.load(os.path.join(experiment_path, "model.pkl"))["best_valid_loss"]
    epoch_checkpoint = torch.load(os.path.join(experiment_path, "model_last_checkpoint.pkl"))["epoch"]
    print("Best validation loss obtained after {} epochs is: {}".format(epoch_checkpoint, best_valid_loss))
else:
        p2_corr = a_end.cpu() == p2_pred
        
        p1_acc += torch.sum(p1_corr).data[0]
        p2_acc += torch.sum(p2_corr).data[0]
        
        exact_match += torch.sum(p1_corr * p2_corr).data[0]
        total += batch_size
        if i % 10 == 0:
            print('current acc: {:.3f}%'.format(100*exact_match/total))

    print('======== Test result ========')
    print('p1 acc: {:.3f}%, p2 acc: {:.3f}%, EM: {:.3f}'.format(100.*p1_acc/total, 100.*p2_acc/total, 100.*exact_match/total))
# }}}

#create model
model = BiDAF(args)

if torch.cuda.is_available():
    print('use cuda')
    model.cuda()


#resume
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))
if os.path.isfile(args.resume):
    print("=> loading checkpoint '{}'".format(args.resume))
    checkpoint = torch.load(args.resume)
    args.start_epoch = checkpoint['epoch']
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
Beispiel #8
0
def get_textfield(root, height, r):
    text_area = scrolledtext.ScrolledText(root,
                                          wrap=WORD,
                                          width=50,
                                          height=height,
                                          font=("Times New Roman", 15),
                                          bd=5)

    text_area.grid(column=0, row=r, pady=10, padx=20, columnspan=3)
    text_area.focus()
    return text_area


if __name__ == '__main__':
    bidef = model.BiDAF(400, 250, 20)
    root = Tk()
    root.title('QA App')
    root.iconbitmap('../images/2.ico')
    root.geometry('600x700+700+20')
    answer_list = ['abcdef']

    title = get_level(root, "Question Answer Model", 0, 0, 3, 16)
    context_title = get_level(root, 'Enter Context(within 250 words)', 2, 0, 3,
                              14)
    question_title = get_level(root, 'Enter Question(within 20 words)', 4, 0,
                               3, 14)
    answer_level = get_level(root, '', 6, 0, 3, 14)
    status = get_level(root, "Answer 0 of 0", 7, 0, 3,
                       14)  # bd=1, relief=SUNKEN, anchor=E
# load dataset
test_dataset = SquadDataset(d_w_context, d_c_context, d_w_question,
                            d_c_question, d_labels)

# load data generator
test_dataloader = DataLoader(test_dataset,
                             shuffle=True,
                             batch_size=hyper_params["batch_size"],
                             num_workers=4)

print("Length of test data loader is:", len(test_dataloader))

# load the model
model = BiDAF(word_vectors=word_embedding_matrix,
              char_vectors=char_embedding_matrix,
              hidden_size=hyper_params["hidden_size"],
              drop_prob=hyper_params["drop_prob"])
try:
    if config.cuda:
        model.load_state_dict(
            torch.load(os.path.join(config.squad_models,
                                    "model_final.pkl"))["state_dict"])
    else:
        model.load_state_dict(
            torch.load(
                os.path.join(config.squad_models, "model_final.pkl"),
                map_location=lambda storage, loc: storage)["state_dict"])
    print("Model weights successfully loaded.")
except:
    print("Model weights not found, initialized model with random weights.")
model.to(device)
Beispiel #10
0
from collections import Counter

# prepare data
print('prepare data')
# config = Config()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
pre_trained_ = load('pre_data/embed_pre.json')
pre_trained = torch.Tensor(pre_trained_[0])
del pre_trained_
print('loading train_dataset')
train_dataset = SQuADData('pre_data/input/train')
dev_dataset = SQuADData('pre_data/input/dev')

# define model
print('define model')
model = BiDAF(pre_trained)
# model = BiDAF(pre_trained, 128)
# model = torch.load('model/model.pt')
model = model.to(device)
lr = config.learning_rate
base_lr = 1.0
warm_up = config.lr_warm_up_num
cr = lr / log2(warm_up)
optimizer = torch.optim.Adam(lr=config.learning_rate,
                             betas=(config.beta1, config.beta2),
                             eps=config.eps,
                             weight_decay=3e-7,
                             params=model.parameters())
scheduler = optim.lr_scheduler.LambdaLR(optimizer,
                                        lr_lambda=lambda ee: cr * log2(ee + 1)
                                        if ee < warm_up else lr)
Beispiel #11
0
def main(argv):
    config = Config()
    config.load_user_config()
    config.log.info("finish loading user config")

    train_file = config.args["train_file"]
    dev_file = config.args["dev_file"]
    old_glove_file = config.args["glove_file"]
    new_glove_file = config.args["glove_file"] + ".subset"

    # TODO(demi): switch "overwrite" to False
    train_data_raw, dev_data_raw, i2w, w2i, i2c, c2i, new_glove_file, glove_dim, vocab_size, char_vocab_size\
         = squad_read_data(config, train_file, dev_file, old_glove_file, new_glove_file, overwrite=True)
    config.log.info("finish reading squad data in raw formats")

    config.update_batch([("glove_file", new_glove_file),
                   ("glove_dim", glove_dim),
                   ("vocab_size", vocab_size),
                   ("char_vocab_size", char_vocab_size)])


    config.log.warning("reminder: now we only support train/fake mode")
    assert config.args["mode"] in ["train", "fake"], "mode (%s) not found" % config.args["mode"]

    train_id_conversion, train_data = make_dataset(config, train_data_raw, w2i, c2i)
    dev_id_conversion, dev_data = make_dataset(config, dev_data_raw, w2i, c2i)
    config.log.info("finish making datasets: reformatting raw data")

    train_data = QnADataset(train_data, config)
    dev_data = QnADataset(dev_data, config)
    config.log.info("finish generating datasets")

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=True, **config.kwargs)
    dev_loader = torch.utils.data.DataLoader(dev_data, batch_size=1, **config.kwargs)
    config.log.info("finish generating data loader")


    model = BiDAF(config, i2w)
    config.log.info("finish creating model")
    if config.args["use_cuda"]:
        model.cuda()

    # log config and model
    config.log.info(config.format_string())
    config.log.info("model:{}".format(model))

    if config.args['optimizer'] == "Adam":
        optimizer = optim.Adam(model.get_train_parameters(), lr=config.args['lr'], weight_decay=config.args['weight_decay'])
    if config.args['optimizer'] == "Adamax":
        optimizer = optim.Adamax(model.get_train_parameters(), lr=config.args['lr'], weight_decay=config.args['weight_decay'])
    if config.args['optimizer'] == "SGD":
        optimizer = torch.optim.SGD(model.get_train_parameters(), lr=config.args['lr'], momentum=0.9, weight_decay=config.args['weight_decay'])
    if config.args['optimizer'] == "Adadelta":
        optimizer = torch.optim.Adadelta(model.get_train_parameters(), lr=config.args["lr"])
    #if config.args['optimizer'] == "Adagrad":



    config.log.info("model = %s" % model)
    config.log.info("config = %s" % config.format_string())

    trainer = Trainer(config)
    evaluator = Evaluator(config)

    """ save model checkpoint """
    def save_checkpoint(epoch):
        checkpoint = {"model_state_dict": model.state_dict(),
                      "config_args" : config.args}
        if config.args["optimizer"] != "YF":  # YF can't save state dict right now
            checkpoint["optimizer_state_dict"] = optimizer.state_dict()
        checkpoint_file = config.args["model_dir"] + config.args["model_name"] + "-EPOCH%d" % epoch
        torch.save(checkpoint, checkpoint_file)
        config.log.info("saving checkpoint: {}".format(checkpoint_file))


    for epoch in range(1, config.args["max_epoch"] + 1):
        config.log.info("training: epoch %d" % epoch)
        # QS(demi): do i need to return model & optimizer?
        model, optimizer, train_avg_loss, train_answer_dict = trainer.run(model, train_id_conversion[0], train_loader, optimizer, mode="train")
        model, optimizer, dev_avg_loss, dev_answer_dict = trainer.run(model, dev_id_conversion[0], dev_loader, optimizer, mode="dev")

        # loss is a float tensor with size 1
        config.log.info("[EPOCH %d] LOSS = (train)%.5lf | (dev)%.5lf" % (epoch, train_avg_loss[0], dev_avg_loss[0]))

        answer_filename = "{}/{}-EPOCH{}".format(config.args["model_dir"], config.args["model_name"], epoch)
        config.log.info("[EVAUATION] TRAIN EVAL")
        evaluator.eval("official", train_file, train_answer_dict, "{}/answer.train".format(config.args["model_dir"], answer_filename))
        config.log.info("[EVAUATION] DEV EVAL")
        evaluator.eval("official", dev_file, dev_answer_dict, "{}/answer.dev".format(config.args["model_dir"], answer_filename))

        save_checkpoint(epoch)
def eval(context, question):
    with open(os.path.join(config.data_dir, "train", "word2idx.pkl"), "rb") as wi, \
         open(os.path.join(config.data_dir, "train", "char2idx.pkl"), "rb") as ci, \
         open(os.path.join(config.data_dir, "train", "word_embeddings.pkl"), "rb") as wb, \
         open(os.path.join(config.data_dir, "train", "char_embeddings.pkl"), "rb") as cb:
        word2idx = pickle.load(wi)
        char2idx = pickle.load(ci)
        word_embedding_matrix = pickle.load(wb)
        char_embedding_matrix = pickle.load(cb)

    # transform them into Tensors
    word_embedding_matrix = torch.from_numpy(
        np.array(word_embedding_matrix)).type(torch.float32)
    char_embedding_matrix = torch.from_numpy(
        np.array(char_embedding_matrix)).type(torch.float32)
    idx2word = dict([(y, x) for x, y in word2idx.items()])

    context = clean_text(context)
    context = [w for w in word_tokenize(context) if w]

    question = clean_text(question)
    question = [w for w in word_tokenize(question) if w]

    if len(context) > config.max_len_context:
        print("The context is too long. Maximum accepted length is",
              config.max_len_context, "words.")
    if max([len(w) for w in context]) > config.max_len_word:
        print("Some words in the context are longer than", config.max_len_word,
              "characters.")
    if len(question) > config.max_len_question:
        print("The question is too long. Maximum accepted length is",
              config.max_len_question, "words.")
    if max([len(w) for w in question]) > config.max_len_word:
        print("Some words in the question are longer than",
              config.max_len_word, "characters.")
    if len(question) < 3:
        print(
            "The question is too short. It needs to be at least a three words question."
        )

    context_idx = np.zeros([config.max_len_context], dtype=np.int32)
    question_idx = np.zeros([config.max_len_question], dtype=np.int32)
    context_char_idx = np.zeros([config.max_len_context, config.max_len_word],
                                dtype=np.int32)
    question_char_idx = np.zeros(
        [config.max_len_question, config.max_len_word], dtype=np.int32)

    # replace 0 values with word and char IDs
    for j, word in enumerate(context):
        if word in word2idx:
            context_idx[j] = word2idx[word]
        else:
            context_idx[j] = 1
        for k, char in enumerate(word):
            if char in char2idx:
                context_char_idx[j, k] = char2idx[char]
            else:
                context_char_idx[j, k] = 1

    for j, word in enumerate(question):
        if word in word2idx:
            question_idx[j] = word2idx[word]
        else:
            question_idx[j] = 1
        for k, char in enumerate(word):
            if char in char2idx:
                question_char_idx[j, k] = char2idx[char]
            else:
                question_char_idx[j, k] = 1

    model = BiDAF(word_vectors=word_embedding_matrix,
                  char_vectors=char_embedding_matrix,
                  hidden_size=config.hidden_size,
                  drop_prob=config.drop_prob)
    try:
        if config.cuda:
            model.load_state_dict(
                torch.load(os.path.join(config.squad_models,
                                        "model_final.pkl"))["state_dict"])
        else:
            model.load_state_dict(
                torch.load(
                    os.path.join(config.squad_models, "model_final.pkl"),
                    map_location=lambda storage, loc: storage)["state_dict"])
        print("Model weights successfully loaded.")
    except:
        pass
        print(
            "Model weights not found, initialized model with random weights.")
    model.to(device)
    model.eval()
    with torch.no_grad():
        context_idx, context_char_idx, question_idx, question_char_idx = torch.tensor(context_idx, dtype=torch.int64).unsqueeze(0).to(device),\
                                                                         torch.tensor(context_char_idx, dtype=torch.int64).unsqueeze(0).to(device),\
                                                                         torch.tensor(question_idx, dtype=torch.int64).unsqueeze(0).to(device),\
                                                                         torch.tensor(question_char_idx, dtype=torch.int64).unsqueeze(0).to(device)

        pred1, pred2 = model(context_idx, context_char_idx, question_idx,
                             question_char_idx)
        starts, ends = discretize(pred1.exp(), pred2.exp(), 15, False)
        prediction = " ".join(context[starts.item():ends.item() + 1])

    return prediction
Beispiel #13
0
def main(NMT_config):

    ### Load RL (global) configurations ###
    config = parse_args()

    ### Load trained QA model ###
    QA_checkpoint = torch.load(config.data_dir + config.QA_best_model)
    QA_config = QA_checkpoint['config']

    QA_mod = BiDAF(QA_config)
    if QA_config.use_gpu:
        QA_mod.cuda()
    QA_mod.load_state_dict(QA_checkpoint['state_dict'])

    ### Load SQuAD dataset ###
    data_filter = get_squad_data_filter(QA_config)

    train_data = read_data(QA_config,
                           'train',
                           QA_config.load,
                           data_filter=data_filter)
    dev_data = read_data(QA_config, 'dev', True, data_filter=data_filter)

    update_config(QA_config, [train_data, dev_data])

    print("Total vocabulary for training is %s" % QA_config.word_vocab_size)

    # from all
    word2vec_dict = train_data.shared[
        'lower_word2vec'] if QA_config.lower_word else train_data.shared[
            'word2vec']
    # from filter-out set
    word2idx_dict = train_data.shared['word2idx']

    # filter-out set idx-vector
    idx2vec_dict = {
        word2idx_dict[word]: vec
        for word, vec in word2vec_dict.items() if word in word2idx_dict
    }
    print("{}/{} unique words have corresponding glove vectors.".format(
        len(idx2vec_dict), len(word2idx_dict)))

    # <null> and <unk> do not have corresponding vector so random.
    emb_mat = np.array([
        idx2vec_dict[idx]
        if idx in idx2vec_dict else np.random.multivariate_normal(
            np.zeros(QA_config.word_emb_size), np.eye(QA_config.word_emb_size))
        for idx in range(QA_config.word_vocab_size)
    ])

    config.emb_mat = emb_mat
    config.new_emb_mat = train_data.shared['new_emb_mat']

    num_steps = int(
        math.ceil(train_data.num_examples /
                  (QA_config.batch_size *
                   QA_config.num_gpus))) * QA_config.num_epochs

    # offset for question mark
    NMT_config.max_length = QA_config.ques_size_th - 1
    NMT_config.batch_size = QA_config.batch_size

    ### Construct translator ###
    translator = make_translator(NMT_config, report_score=True)

    ### Construct optimizer ###
    optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                 translator.model.parameters()),
                          lr=config.lr)

    ### Start RL training ###
    count = 0
    QA_mod.eval()
    F1_eval = F1Evaluator(QA_config, QA_mod)
    #eval_model(QA_mod, train_data, dev_data, QA_config, NMT_config, config, translator)

    for i in range(config.n_episodes):
        for batches in tqdm(train_data.get_multi_batches(
                QA_config.batch_size,
                QA_config.num_gpus,
                num_steps=num_steps,
                shuffle=True,
                cluster=QA_config.cluster),
                            total=num_steps):

            #for n, p in translator.model.named_parameters():
            #    print(n)
            #    print(p)
            #print(p.requires_grad)

            start = datetime.now()
            to_input(batches[0][1].data['q'], config.RL_path + config.RL_file)

            # obtain rewrite and log_prob
            q, scores, log_prob = translator.translate(NMT_config.src_dir,
                                                       NMT_config.src,
                                                       NMT_config.tgt,
                                                       NMT_config.batch_size,
                                                       NMT_config.attn_debug)

            q, cq = ref_query(q)
            batches[0][1].data['q'] = q
            batches[0][1].data['cq'] = cq

            log_prob = torch.stack(log_prob).squeeze(-1)
            #print(log_prob)

            translator.model.zero_grad()

            QA_mod(batches)

            e = F1_eval.get_evaluation(batches, False, NMT_config, config,
                                       translator)
            reward = Variable(torch.FloatTensor(e.f1s), requires_grad=False)
            #print(reward)

            ## Initial loss
            loss = create_loss(log_prob, reward)

            loss.backward()
            optimizer.step()