Example #1
0
def test(model, ema, args, data):
    device = torch.device(f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
    criterion = nn.CrossEntropyLoss()
    loss = 0
    answers = dict()
    model.eval()

    backup_params = EMA(0)
    for name, param in model.named_parameters():
        if param.requires_grad:
            backup_params.register(name, param.data)
            param.data.copy_(ema.get(name))


    total_time = 0 
    previous_time = time.time()
    for batch in iter(data.dev_iter):
        #time1 = time.time()
        with torch.no_grad():
            p1, p2 = model(batch.c_char,batch.q_char,batch.c_word[0],batch.q_word[0],batch.c_word[1],batch.q_word[1])
        #p1, p2 = model(batch)
        #time2 = time.time()
        #total_time = total_time + time2 - time1
        batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx)
        loss += batch_loss.item()

        # (batch, c_len, c_len)
        batch_size, c_len = p1.size()
        ls = nn.LogSoftmax(dim=1)
        mask = (torch.ones(c_len, c_len) * float('-inf')).to(device).tril(-1).unsqueeze(0).expand(batch_size, -1, -1)
        score = (ls(p1).unsqueeze(2) + ls(p2).unsqueeze(1)) + mask
        score, s_idx = score.max(dim=1)
        score, e_idx = score.max(dim=1)
        s_idx = torch.gather(s_idx, 1, e_idx.view(-1, 1)).squeeze()

        for i in range(batch_size):
            id = batch.id[i]
            answer = batch.c_word[0][i][s_idx[i]:e_idx[i] + 1]
            answer = ' '.join([data.CONTEXT_WORD.vocab.itos[idx] for idx in answer])
            if answer == "<eos>":
                answer = ""
            answers[id] = answer
    #print(f'one epoch time {time.time()-previous_time}')
    #print(f'total time {total_time}')

    for name, param in model.named_parameters():
        if param.requires_grad:
            param.data.copy_(backup_params.get(name))

    with open(args.prediction_file, 'w', encoding='utf-8') as f:
        print(json.dumps(answers), file=f)

    opts = evaluate.parse_args(args=[f"{args.dataset_file}", f"{args.prediction_file}" ])     

    results = evaluate.main(opts)
    return loss, results['exact'], results['f1'], results['HasAns_exact'], results['HasAns_f1'], results['NoAns_exact'], results['NoAns_f1']
Example #2
0
def test(model, ema, args, data):
    device = torch.device(
        f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
    criterion = nn.CrossEntropyLoss()
    loss = 0
    answers = dict()
    model.eval()

    backup_params = EMA(0)
    for name, param in model.named_parameters():
        if param.requires_grad:
            backup_params.register(name, param.data)
            param.data.copy_(ema.get(name))

    with torch.set_grad_enabled(False):
        for batch in iter(data.dev_iter):
            p1, p2 = model(batch)
            batch_loss = criterion(p1, batch.s_idx) + criterion(
                p2, batch.e_idx)
            loss += batch_loss.item()

            # (batch, c_len, c_len)
            batch_size, c_len = p1.size()
            ls = nn.LogSoftmax(dim=1)
            mask = (torch.ones(c_len, c_len) *
                    float('-inf')).to(device).tril(-1).unsqueeze(0).expand(
                        batch_size, -1, -1)
            score = (ls(p1).unsqueeze(2) + ls(p2).unsqueeze(1)) + mask
            score, s_idx = score.max(dim=1)
            score, e_idx = score.max(dim=1)
            s_idx = torch.gather(s_idx, 1, e_idx.view(-1, 1)).squeeze()

            for i in range(batch_size):
                id = batch.id[i]
                answer = batch.c_word[0][i][s_idx[i]:e_idx[i] + 1]
                answer = ' '.join(
                    [data.WORD.vocab.itos[idx] for idx in answer])
                answers[id] = answer

        for name, param in model.named_parameters():
            if param.requires_grad:
                param.data.copy_(backup_params.get(name))

    #print(answers)

    with open(args.prediction_file, 'w', encoding='utf-8') as f:
        print(json.dumps(answers, indent=4), file=f)

    results = evaluate.main(args, answers, data)
    return loss / len(data.dev_iter), results['exact_match'], results['f1']
Example #3
0
def cw_tree_attack_targeted():
    cw = CarliniL2_qa(debug=args.debugging)
    criterion = nn.CrossEntropyLoss()
    loss = 0
    tot = 0
    adv_loss = 0
    targeted_success = 0
    untargeted_success = 0
    adv_text = []
    answers = dict()
    adv_answers = dict()
    # model.eval()

    embed = torch.load(args.word_vector)
    device = torch.device("cuda:0" if args.cuda else "cpu")
    vocab = Vocab(filename=args.dictionary,
                  data=[PAD_WORD, UNK_WORD, EOS_WORD, SOS_WORD])
    generator = Generator(args.test_data, vocab=vocab, embed=embed)
    transfered_embedding = torch.load('bidaf_transfered_embedding.pth')
    transfer_emb = torch.nn.Embedding.from_pretrained(transfered_embedding).to(
        device)
    seqback = WrappedSeqback(embed,
                             device,
                             attack=True,
                             seqback_model=generator.seqback_model,
                             vocab=vocab,
                             transfer_emb=transfer_emb)
    treelstm = generator.tree_model
    generator.load_state_dict(torch.load(args.load_ae))

    backup_params = EMA(0)
    for name, param in model.named_parameters():
        if param.requires_grad:
            backup_params.register(name, param.data)
            param.data.copy_(ema.get(name))

    class TreeModel(nn.Module):
        def __init__(self):
            super(TreeModel, self).__init__()
            self.inputs = None

        def forward(self, hidden):
            self.embedding = seqback(hidden)
            return model(batch, perturbed=self.embedding)

        def set_temp(self, temp):
            seqback.temp = temp

        def get_embedding(self):
            return self.embedding

        def get_seqback(self):
            return seqback

    tree_model = TreeModel()
    for batch in tqdm(iter(data.dev_iter), total=1000):
        p1, p2 = model(batch)
        orig_answer, orig_s_idx, orig_e_idx = write_to_ans(
            p1, p2, batch, answers)
        batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx)
        loss += batch_loss.item()

        append_info = append_input(batch, vocab)
        batch_add_start = append_info['add_start']
        batch_add_end = append_info['add_end']
        batch_start_target = torch.LongTensor(
            append_info['target_start']).to(device)
        batch_end_target = torch.LongTensor(
            append_info['target_end']).to(device)
        add_sents = append_info['append_sent']

        input_embedding = model.word_emb(batch.c_word[0])
        append_info['tree'] = [generator.get_tree(append_info['tree'])]
        seqback.sentences = input_embedding.clone().detach()
        seqback.batch_trees = append_info['tree']
        seqback.batch_add_sent = append_info['ae_sent']
        seqback.start = append_info['add_start']
        seqback.end = append_info['add_end']
        seqback.adv_sent = []

        batch_tree_embedding = []
        for bi, append_sent in enumerate(append_info['ae_sent']):
            seqback.target_start = append_info['target_start'][
                0] - append_info['add_start'][0]
            seqback.target_end = append_info['target_end'][0] - append_info[
                'add_start'][0]
            sentences = [
                torch.tensor(append_sent, dtype=torch.long, device=device)
            ]
            seqback.target = sentences[0][seqback.
                                          target_start:seqback.target_end + 1]
            trees = [append_info['tree'][bi]]
            tree_embedding = treelstm(sentences, trees)[0][0].detach()
            batch_tree_embedding.append(tree_embedding)
        hidden = torch.cat(batch_tree_embedding, dim=0)
        cw.batch_info = append_info
        cw.num_classes = append_info['tot_length']

        adv_hidden = cw.run(tree_model,
                            hidden, (batch_start_target, batch_end_target),
                            input_token=input_embedding)
        seqback.adv_sent = []

        # re-test
        for bi, (add_start,
                 add_end) in enumerate(zip(batch_add_start, batch_add_end)):
            if bi in cw.o_best_sent:
                ae_words = cw.o_best_sent[bi]
                bidaf_tokens = bidaf_convert_to_idx(ae_words)
                batch.c_word[0].data[bi, add_start:add_end] = torch.LongTensor(
                    bidaf_tokens)
        p1, p2 = model(batch)
        adv_answer, adv_s_idx, adv_e_idx = write_to_ans(
            p1, p2, batch, adv_answers)
        batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx)
        adv_loss += batch_loss.item()

        for bi, (start_target, end_target) in enumerate(
                zip(batch_start_target, batch_end_target)):
            start_output = adv_s_idx
            end_output = adv_e_idx
            targeted_success += int(
                compare(start_output, start_target.item(), end_output,
                        end_target.item()))
            untargeted_success += int(
                compare_untargeted(start_output, start_target.item(),
                                   end_output, end_target.item()))

        for i in range(len(add_sents)):
            logger.info(("orig:", transform(add_sents[i])))
            try:
                logger.info(("adv:", cw.o_best_sent[i]))
                adv_text.append({
                    'adv_text': cw.o_best_sent[i],
                    'qas_id': batch.id[i],
                    'adv_predict': (orig_s_idx, orig_e_idx),
                    'orig_predict': (adv_s_idx, adv_e_idx),
                    'Orig answer:': orig_answer,
                    'Adv answer:': adv_answer
                })
                joblib.dump(adv_text, root_dir + '/adv_text.pkl')
            except:
                adv_text.append({
                    'adv_text': transform(add_sents[i]),
                    'qas_id': batch.id[i],
                    'adv_predict': (orig_s_idx, orig_e_idx),
                    'orig_predict': (adv_s_idx, adv_e_idx),
                    'Orig answer:': orig_answer,
                    'Adv answer:': adv_answer
                })
                joblib.dump(adv_text, root_dir + '/adv_text.pkl')
                continue
        # for batch size = 1
        tot += 1
        logger.info(("orig predict", (orig_s_idx, orig_e_idx)))
        logger.info(("adv append predict", (adv_s_idx, adv_e_idx)))
        logger.info(("targeted successful rate:", targeted_success))
        logger.info(("untargetd successful rate:", untargeted_success))
        logger.info(("Orig answer:", orig_answer))
        logger.info(("Adv answer:", adv_answer))
        logger.info(("tot:", tot))

    for name, param in model.named_parameters():
        if param.requires_grad:
            param.data.copy_(backup_params.get(name))

    with open(options.prediction_file, 'w', encoding='utf-8') as f:
        print(json.dumps(answers), file=f)
    with open(options.prediction_file + '_adv.json', 'w',
              encoding='utf-8') as f:
        print(json.dumps(adv_answers), file=f)
    results = evaluate.main(options)
    logger.info(tot)
    logger.info(("adv loss, results['exact_match'], results['f1']", loss,
                 results['exact_match'], results['f1']))
    return loss, results['exact_match'], results['f1']
Example #4
0
def cw_random_word_attack():
    cw = CarliniL2_untargeted_qa(debug=args.debugging)
    criterion = nn.CrossEntropyLoss()
    loss = 0
    adv_loss = 0
    targeted_success = 0
    untargeted_success = 0
    adv_text = []
    answers = dict()
    adv_answers = dict()

    backup_params = EMA(0)
    for name, param in model.named_parameters():
        if param.requires_grad:
            backup_params.register(name, param.data)
            param.data.copy_(ema.get(name))
    tot = 0
    for batch in tqdm(iter(data.dev_iter), total=1000):
        p1, p2 = model(batch)
        orig_answer, orig_s_idx, orig_e_idx = write_to_ans(
            p1, p2, batch, answers)
        batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx)
        loss += batch_loss.item()

        append_info = append_random_input(batch)
        allow_idxs = append_info['allow_idx']
        batch_start_target = torch.LongTensor([0]).to(device)
        batch_end_target = torch.LongTensor([0]).to(device)

        input_embedding = model.word_emb(batch.c_word[0])
        cw_mask = np.zeros(input_embedding.shape).astype(np.float32)
        cw_mask = torch.from_numpy(cw_mask).float().to(device)

        for bi, allow_idx in enumerate(allow_idxs):
            cw_mask[bi, np.array(allow_idx)] = 1
        cw.wv = model.word_emb.weight
        cw.inputs = batch
        cw.mask = cw_mask
        cw.batch_info = append_info
        cw.num_classes = append_info['tot_length']
        # print(transform(to_list(batch.c_word[0][0])))
        cw.run(model, input_embedding, (batch_start_target, batch_end_target))

        # re-test
        for bi, allow_idx in enumerate(allow_idxs):
            if bi in cw.o_best_sent:
                for i, idx in enumerate(allow_idx):
                    batch.c_word[0].data[bi, idx] = cw.o_best_sent[bi][i]
        p1, p2 = model(batch)
        adv_answer, adv_s_idx, adv_e_idx = write_to_ans(
            p1, p2, batch, adv_answers)
        batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx)
        adv_loss += batch_loss.item()

        for bi, (start_target, end_target) in enumerate(
                zip(batch_start_target, batch_end_target)):
            start_output = adv_s_idx
            end_output = adv_e_idx
            targeted_success += int(
                compare(start_output, start_target.item(), end_output,
                        end_target.item()))
            untargeted_success += int(
                compare_untargeted(start_output, start_target.item(),
                                   end_output, end_target.item()))
        for i in range(len(allow_idxs)):
            try:
                logger.info(("adv:", transform(cw.o_best_sent[i])))
                adv_text.append({
                    'added_text':
                    transform(cw.o_best_sent[i]),
                    'adv_text':
                    transform(to_list(batch.c_word[0][0])),
                    'qas_id':
                    batch.id[i],
                    'adv_predict': (orig_s_idx, orig_e_idx),
                    'orig_predict': (adv_s_idx, adv_e_idx),
                    'Orig answer:':
                    orig_answer,
                    'Adv answer:':
                    adv_answer
                })
                joblib.dump(adv_text, root_dir + '/adv_text.pkl')
            except:
                adv_text.append({
                    'adv_text':
                    transform(to_list(batch.c_word[0][0])),
                    'qas_id':
                    batch.id[i],
                    'adv_predict': (orig_s_idx, orig_e_idx),
                    'orig_predict': (adv_s_idx, adv_e_idx),
                    'Orig answer:':
                    orig_answer,
                    'Adv answer:':
                    adv_answer
                })
                joblib.dump(adv_text, root_dir + '/adv_text.pkl')
                continue
        # for batch size = 1
        tot += 1
        logger.info(("orig predict", (orig_s_idx, orig_e_idx)))
        logger.info(("adv append predict", (adv_s_idx, adv_e_idx)))
        logger.info(("targeted successful rate:", targeted_success))
        logger.info(("untargetd successful rate:", untargeted_success))
        logger.info(("Orig answer:", orig_answer))
        logger.info(("Adv answer:", adv_answer))
        logger.info(("tot:", tot))

    for name, param in model.named_parameters():
        if param.requires_grad:
            param.data.copy_(backup_params.get(name))

    with open(options.prediction_file, 'w', encoding='utf-8') as f:
        print(json.dumps(answers), file=f)
    with open(options.prediction_file + '_adv.json', 'w',
              encoding='utf-8') as f:
        print(json.dumps(adv_answers), file=f)
    results = evaluate.main(options)
    logger.info(tot)
    logger.info(("adv loss, results['exact_match'], results['f1']", loss,
                 results['exact_match'], results['f1']))
    return loss, results['exact_match'], results['f1']
Example #5
0
    question_append_sentences = joblib.load(
        'sampled_perturb_question_sentences.pkl')

    model = BiDAF(options, data.WORD.vocab.vectors).to(device)
    if options.old_model is not None:
        model.load_state_dict(
            torch.load(options.old_model,
                       map_location="cuda:{}".format(options.gpu)))
    if options.old_ema is not None:
        # ema = pickle.load(open(options.old_ema, "rb"))
        ema = torch.load(options.old_ema, map_location=device)
    else:
        ema = EMA(options.exp_decay_rate)
        for name, param in model.named_parameters():
            if param.requires_grad:
                ema.register(name, param.data)

    torch.manual_seed(args.seed)
    if torch.cuda.is_available():
        if not args.cuda:
            print(
                "WARNING: You have a CUDA device, so you should probably run with --cuda"
            )
        else:
            torch.cuda.manual_seed(args.seed)
    random.seed(args.seed)

    if args.model == 'word_attack':
        # dev_loss, dev_exact, dev_f1 = cw_word_attack()
        # dev_loss, dev_exact, dev_f1 = cw_word_attack_target()
        dev_loss, dev_exact, dev_f1 = cw_random_word_attack()
Example #6
0
def train(args):
    db = Data(args)
    # db.build_vocab()  # 每次build_vocab,相同频数的字词id可能不同
    db.load_vocab()
    db.build_dataset()  # 得到train_loader

    model = BiDAF(args)
    if args.cuda:
        model = model.cuda()
    if args.ema:
        ema = EMA(0.999)
        print("Register EMA ...")
        for name, param in model.named_parameters():
            if param.requires_grad:
                ema.register(name, param.data)
    init_lr = args.init_lr
    optimizer = torch.optim.Adam(params=model.parameters(), lr=init_lr)
    lr = init_lr

    batch_step = args.batch_step
    loss_fn = nn.CrossEntropyLoss()
    logger = Logger('./logs')
    step = 0

    valid_raw_article_list = db.valid_raw_article_list
    valid_answer_list = db.valid_answer_list

    print('========== Train ==============')

    for epoch in range(args.epoch_num):
        print('---Epoch', epoch, "lr:", lr)
        running_loss = 0.0
        count = 0
        print("len(db.train_loader):", len(db.train_loader))
        for article, question, answer_span, _ in db.train_loader:
            if args.cuda:
                article, question, answer_span = article.cuda(), question.cuda(
                ), answer_span.cuda()
            p1, p2 = model(article, question)
            loss_p1 = loss_fn(p1, answer_span.transpose(0, 1)[0])
            loss_p2 = loss_fn(p2, answer_span.transpose(0, 1)[1])
            running_loss += loss_p1.item()
            running_loss += loss_p2.item()

            optimizer.zero_grad()
            (loss_p1 + loss_p2).backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 2)
            optimizer.step()
            if args.ema:
                for name, param in model.named_parameters():
                    if param.requires_grad:
                        param.data = ema(name, param.data)

            count += 1
            if count % batch_step == 0:
                rep_str = '[{}] Epoch {}, loss: {:.3f}'
                print(
                    rep_str.format(
                        datetime.datetime.now().strftime('%Y%m%d-%H%M%S'),
                        epoch, running_loss / batch_step))

                info = {'loss': running_loss / batch_step}
                running_loss = 0.0
                count = 0

                # 1. Log scalar values (scalar summary)
                for tag, value in info.items():
                    logger.scalar_summary(tag, value, step + 1)

                # 2. Log values and gradients of the parameters (histogram summary)
                for tag, value in model.named_parameters():
                    tag = tag.replace('.', '/')
                    logger.histo_summary(tag,
                                         value.data.cpu().numpy(), step + 1)
                    logger.histo_summary(tag + '/grad',
                                         value.grad.data.cpu().numpy(),
                                         step + 1)
                step += 1

        # 验证集
        if args.with_valid:
            print('======== Epoch {} result ========'.format(epoch))
            print("len(db.valid_loader):", len(db.valid_loader))
            valid_result = []
            idx = 0
            for article, question, _ in db.valid_loader:
                if args.cuda:
                    article, question = article.cuda(), question.cuda()
                p1, p2 = model(article, question, is_trainning=False)

                _, p1_predicted = torch.max(p1.cpu().data, 1)
                _, p2_predicted = torch.max(p2.cpu().data, 1)
                p1_predicted = p1_predicted.numpy().tolist()
                p2_predicted = p2_predicted.numpy().tolist()
                for _p1, _p2, _raw_article, _answer in zip(
                        p1_predicted, p2_predicted,
                        valid_raw_article_list[idx:idx + len(p1_predicted)],
                        valid_answer_list[idx:idx + len(p1_predicted)]):
                    valid_result.append({
                        "ref_answer":
                        _answer,
                        "cand_answer":
                        "".join(_raw_article[_p1:_p2 + 1])
                    })
                idx = idx + len(p1_predicted)
            rouge_score = test_score(valid_result)
            info = {'rouge_score': rouge_score}

            for tag, value in info.items():
                logger.scalar_summary(tag, value, epoch + 1)

        lr = max(0.00001, init_lr * 0.9**(epoch + 1))
        print("lr:", lr)
        parameters = filter(lambda param: param.requires_grad,
                            model.parameters())
        optimizer = torch.optim.Adam(params=parameters,
                                     lr=lr,
                                     weight_decay=1e-7)

        # print(len(db.valid_loader))
        if epoch >= 1 and args.saved_model_file:
            torch.save(model.state_dict(),
                       args.saved_model_file + "_epoch_" + str(epoch))
            print("saved model")
Example #7
0
def train(args, data):
    device = torch.device(
        "cuda:{}".format(args.gpu) if torch.cuda.is_available() else "cpu")
    model = BiDAF(args, data.WORD.vocab.vectors).to(device)

    ema = EMA(args.exp_decay_rate)
    for name, param in model.named_parameters():
        if param.requires_grad:
            ema.register(name, param.data)
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adadelta(parameters, lr=args.learning_rate)
    criterion = nn.CrossEntropyLoss()

    writer = SummaryWriter(log_dir='runs/' + args.model_time)

    model.train()
    loss, last_epoch = 0, -1
    max_dev_exact, max_dev_f1 = -1, -1

    iterator = data.train_iter
    for i, batch in enumerate(iterator):
        present_epoch = int(iterator.epoch)
        if present_epoch == args.epoch:
            break
        if present_epoch > last_epoch:
            print('epoch:', present_epoch + 1)
        last_epoch = present_epoch

        p1, p2 = model(batch)

        optimizer.zero_grad()
        batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx)
        loss += batch_loss.item()
        batch_loss.backward()
        optimizer.step()

        for name, param in model.named_parameters():
            if param.requires_grad:
                ema.update(name, param.data)

        if (i + 1) % args.print_freq == 0:
            dev_loss, dev_exact, dev_f1 = test(model, ema, args, data)
            c = (i + 1) // args.print_freq

            writer.add_scalar('loss/train', loss, c)
            writer.add_scalar('loss/dev', dev_loss, c)
            writer.add_scalar('exact_match/dev', dev_exact, c)
            writer.add_scalar('f1/dev', dev_f1, c)
            print('train loss: {} / dev loss: {}'.format(loss, dev_loss) +
                  ' / dev EM: {} / dev F1: {}'.format(dev_exact, dev_f1))

            if dev_f1 > max_dev_f1:
                max_dev_f1 = dev_f1
                max_dev_exact = dev_exact
                best_model = copy.deepcopy(model)

            loss = 0
            model.train()

    writer.close()
    print('max dev EM: {} / max dev F1: {}'.format(max_dev_exact, max_dev_f1))

    return best_model
Example #8
0
def train(args):
    db = Data(args)
    # db.build_vocab()  # 每次build_vocab,相同频数的字词id可能不同
    db.load_vocab()
    db.build_dataset()  # 得到train_loader

    # model = BiDAF(args)
    model = SLQA(args)
    first_model = "./checkpoints/SLQA_elmo_epoch_0"
    model.load_state_dict(torch.load(first_model))
    if args.cuda:
        model = model.cuda()
    if args.ema:
        ema = EMA(0.999)
        print("Register EMA ...")
        for name, param in model.named_parameters():
            if param.requires_grad:
                ema.register(name, param.data)
    init_lr = args.init_lr
    parameters = filter(lambda param: param.requires_grad, model.parameters())
    weight_decay = 1e-6
    weight_decay = 0
    optimizer = torch.optim.Adam(params=parameters,
                                 lr=init_lr,
                                 weight_decay=weight_decay)
    batch_step = args.batch_step
    loss_fn = nn.CrossEntropyLoss()
    logger = Logger('./logs')
    step = 0

    train_raw_article_list = db.train_raw_article_list
    train_raw_question_list = db.train_raw_question_list

    valid_raw_article_list = db.valid_raw_article_list
    valid_answer_list = db.valid_answer_list
    valid_raw_question_list = db.valid_raw_question_list

    # question_hdf5_f = h5py.File(args.question_hdf5_path, "r")
    # article_hdf5_f = h5py.File(args.article_hdf5_path, "r")
    print('========== Train ==============')
    for epoch in range(args.epoch_num):
        print('---Epoch', epoch)
        running_loss = 0.0
        count = 0
        print("len(db.train_loader):", len(db.train_loader))
        train_idx = 0
        for batch_id, (article, question, answer_span,
                       _) in enumerate(db.train_loader):
            if args.cuda:
                article, question, answer_span = article.cuda(), question.cuda(
                ), answer_span.cuda()
            # tmp_train_raw_article_list = train_raw_article_list[train_idx:train_idx + question.size()[0]]
            # tmp_train_raw_question_list = train_raw_question_list[train_idx:train_idx + question.size()[0]]
            # question_elmo = gen_elmo_by_text(question_hdf5_f, tmp_train_raw_question_list, args.max_question_len)
            # article_elmo = gen_elmo_by_text(article_hdf5_f, tmp_train_raw_article_list, args.max_article_len)
            # pickle.dump((article_elmo, question_elmo), open(elmo_save_path, "wb"))
            elmo_save_path = "/backup231/lhliu/jszn/elmo/" + str(
                batch_id) + ".pkl"
            article_elmo, question_elmo = pickle.load(
                open(elmo_save_path, "rb"))
            # print(elmo_save_path)
            article_elmo = torch.tensor(article_elmo, dtype=torch.float)
            question_elmo = torch.tensor(question_elmo, dtype=torch.float)
            # train_idx += question.size()[0]
            # continue
            if args.cuda:
                question_elmo = question_elmo.cuda()
                article_elmo = article_elmo.cuda()

            p1, p2 = model(article,
                           question,
                           article_elmo=article_elmo,
                           question_elmo=question_elmo)
            loss_p1 = loss_fn(p1, answer_span.transpose(0, 1)[0])
            loss_p2 = loss_fn(p2, answer_span.transpose(0, 1)[1])
            running_loss += loss_p1.item()
            running_loss += loss_p2.item()

            optimizer.zero_grad()
            (loss_p1 + loss_p2).backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 2)
            optimizer.step()
            if args.ema:
                for name, param in model.named_parameters():
                    if param.requires_grad:
                        param.data = ema(name, param.data)

            count += 1
            if count % batch_step == 0:
                rep_str = '[{}] Epoch {}, loss: {:.3f}'
                print(
                    rep_str.format(
                        datetime.datetime.now().strftime('%Y%m%d-%H%M%S'),
                        epoch, running_loss / batch_step))

                # info = {'loss': running_loss / batch_step}
                running_loss = 0.0
                count = 0

                # # 1. Log scalar values (scalar summary)
                # for tag, value in info.items():
                #     logger.scalar_summary(tag, value, step + 1)

                # # 2. Log values and gradients of the parameters (histogram summary)
                # for tag, value in model.named_parameters():
                #     tag = tag.replace('.', '/')
                #     logger.histo_summary(tag, value.data.cpu().numpy(), step + 1)
                #     logger.histo_summary(tag + '/grad', value.grad.data.cpu().numpy(), step + 1)
                step += 1
            # break
        # 验证集
        if args.with_valid:
            print('======== Epoch {} result ========'.format(epoch))
            print("len(db.valid_loader):", len(db.valid_loader))
            valid_result = []
            idx = 0
            for article, question, _ in db.valid_loader:
                if args.cuda:
                    article, question = article.cuda(), question.cuda()

                tmp_valid_raw_article_list = valid_raw_article_list[idx:idx +
                                                                    question.
                                                                    size()[0]]
                tmp_valid_raw_question_list = valid_raw_question_list[
                    idx:idx + question.size()[0]]
                question_elmo = gen_elmo_by_text(question_hdf5_f,
                                                 tmp_valid_raw_question_list,
                                                 args.max_question_len)
                article_elmo = gen_elmo_by_text(article_hdf5_f,
                                                tmp_valid_raw_article_list,
                                                args.max_article_len)
                if args.cuda:
                    question_elmo = question_elmo.cuda()
                    article_elmo = article_elmo.cuda()
                p1, p2 = model(article,
                               question,
                               article_elmo,
                               question_elmo,
                               is_training=False)

                _, p1_predicted = torch.max(p1.cpu().data, 1)
                _, p2_predicted = torch.max(p2.cpu().data, 1)
                p1_predicted = p1_predicted.numpy().tolist()
                p2_predicted = p2_predicted.numpy().tolist()
                assert question.size()[0] == len(p1_predicted)
                for _p1, _p2, _raw_article, _answer in zip(
                        p1_predicted, p2_predicted,
                        valid_raw_article_list[idx:idx + len(p1_predicted)],
                        valid_answer_list[idx:idx + len(p1_predicted)]):
                    valid_result.append({
                        "ref_answer":
                        _answer,
                        "cand_answer":
                        "".join(_raw_article[_p1:_p2 + 1])
                    })
                idx = idx + len(p1_predicted)
            rouge_score = test_score(valid_result)
            info = {'rouge_score': rouge_score}

            for tag, value in info.items():
                logger.scalar_summary(tag, value, epoch + 1)
        #lr = init_lr
        lr = max(0.00001, init_lr * 0.9**(epoch + 1))  # 考虑是否使用
        print("lr:", lr)
        parameters = filter(lambda param: param.requires_grad,
                            model.parameters())
        optimizer = torch.optim.Adam(params=parameters,
                                     lr=lr,
                                     weight_decay=weight_decay)

        # print(len(db.valid_loader))
        if epoch >= 0 and args.saved_model_file:
            torch.save(model.state_dict(),
                       args.saved_model_file + "_epoch_" + str(epoch))
            print("saved model")
Example #9
0
def train(args, data):
    if args.load_model != "":
        model = BiDAF(args, data.WORD.vocab.vectors)
        model.load_state_dict(torch.load(args.load_model))
    else:
        model = BiDAF(args, data.WORD.vocab.vectors)
    device = torch.device(f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    ema = EMA(args.exp_decay_rate)
    for name, param in model.named_parameters():
        if param.requires_grad:
            ema.register(name, param.data)
    for name, i in model.named_parameters():
        if not i.is_leaf:
            print(name,i)

    writer = SummaryWriter(log_dir='runs/' + args.model_name)
    best_model = None

    for iterator, dev_iter, dev_file_name, index, print_freq, lr in zip(data.train_iter, data.dev_iter, args.dev_files, range(len(data.train)), args.print_freq, args.learning_rate):
        # print
        # (iterator[0])
        embed()
        exit(0)
        optimizer = optim.Adadelta(model.parameters(), lr=lr)
        criterion = nn.CrossEntropyLoss()
        model.train()
        loss, last_epoch = 0, 0
        max_dev_exact, max_dev_f1 = -1, -1
        print(f"Training with {dev_file_name}")
        print()
        for i, batch in tqdm(enumerate(iterator), total=len(iterator) * args.epoch[index], ncols=100):
            present_epoch = int(iterator.epoch)
            eva = False
            if present_epoch == args.epoch[index]:
                break
            if present_epoch > last_epoch:
                print('epoch:', present_epoch + 1)
                eva = True
            last_epoch = present_epoch

            p1, p2 = model(batch)

            optimizer.zero_grad()
            batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx)
            loss += batch_loss.item()
            batch_loss.backward()
            optimizer.step()

            for name, param in model.named_parameters():
                if param.requires_grad:
                    ema.update(name, param.data)

            torch.cuda.empty_cache()
            if (i + 1) % print_freq == 0 or eva:
                dev_loss, dev_exact, dev_f1 = test(model, ema, args, data, dev_iter, dev_file_name)
                c = (i + 1) // print_freq

                writer.add_scalar('loss/train', loss, c)
                writer.add_scalar('loss/dev', dev_loss, c)
                writer.add_scalar('exact_match/dev', dev_exact, c)
                writer.add_scalar('f1/dev', dev_f1, c)
                print()
                print(f'train loss: {loss:.3f} / dev loss: {dev_loss:.3f}'
                      f' / dev EM: {dev_exact:.3f} / dev F1: {dev_f1:.3f}')

                if dev_f1 > max_dev_f1:
                    max_dev_f1 = dev_f1
                    max_dev_exact = dev_exact
                    best_model = copy.deepcopy(model)

                loss = 0
                model.train()

    writer.close()
    print(f'max dev EM: {max_dev_exact:.3f} / max dev F1: {max_dev_f1:.3f}')
    print("testing with test batch on best model")
    test_loss, test_exact, test_f1 = test(best_model, ema, args, data, list(data.test_iter)[-1], args.test_files[-1])

    print(f'test loss: {test_loss:.3f}'
          f' / test EM: {test_exact:.3f} / test F1: {test_f1:.3f}')
    return best_model
Example #10
0
def train(args, data):
    device = torch.device(f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
    model = BiDAF(args, data.CONTEXT_WORD.vocab.vectors).to(device)
    
    num = count_parameters(model)
    print(f'paramter {num}')

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)

    ema = EMA(args.exp_decay_rate)
    for name, param in model.named_parameters():
        if param.requires_grad:
            ema.register(name, param.data)
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adadelta(parameters, lr=args.learning_rate)
    criterion = nn.CrossEntropyLoss()

    writer = SummaryWriter(log_dir='runs/' + args.model_time)

    model.train()
    loss, last_epoch = 0, -1
    max_dev_exact, max_dev_f1 = -1, -1
    print('totally {} epoch'.format(args.epoch))
    
    sys.stdout.flush()
    iterator = data.train_iter
    iterator.repeat = True
    for i, batch in enumerate(iterator):

        present_epoch = int(iterator.epoch)
        if present_epoch == args.epoch:
            print('present_epoch value:',present_epoch)
            break
        if present_epoch > last_epoch:
            print('epoch:', present_epoch + 1)
        last_epoch = present_epoch

        p1, p2 = model(batch.c_char,batch.q_char,batch.c_word[0],batch.q_word[0],batch.c_word[1],batch.q_word[1])
        optimizer.zero_grad()
        batch_loss = criterion(p1, batch.s_idx) + criterion(p2, batch.e_idx)
        loss += batch_loss.item()
        batch_loss.backward()
        optimizer.step()

        for name, param in model.named_parameters():
            if param.requires_grad:
                ema.update(name, param.data)

        if (i + 1) % args.print_freq == 0:
            dev_loss, dev_exact, dev_f1, dev_hasans_exact, dev_hasans_f1, dev_noans_exact,dev_noans_f1 = test(model, ema, args, data)
            c = (i + 1) // args.print_freq

            writer.add_scalar('loss/train', loss, c)
            writer.add_scalar('loss/dev', dev_loss, c)
            writer.add_scalar('exact_match/dev', dev_exact, c)
            writer.add_scalar('f1/dev', dev_f1, c)
            print(f'train loss: {loss:.3f} / dev loss: {dev_loss:.3f}'
                  f' / dev EM: {dev_exact:.3f} / dev F1: {dev_f1:.3f}'
                  f' / dev hasans EM: {dev_hasans_exact} / dev hasans F1: {dev_hasans_f1}'
                  f' / dev noans EM: {dev_noans_exact} / dev noans F1: {dev_noans_f1}')

            if dev_f1 > max_dev_f1:
                max_dev_f1 = dev_f1
                max_dev_exact = dev_exact
                best_model = copy.deepcopy(model)

            loss = 0
            model.train() 
        sys.stdout.flush()
    writer.close()
    args.max_f1 = max_dev_f1
    print(f'max dev EM: {max_dev_exact:.3f} / max dev F1: {max_dev_f1:.3f}')

    return best_model
Example #11
0
def train(args, data):
    device = torch.device(
        f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
    model = BiDAF(args, data.WORD.vocab.vectors).to(device)

    ema = EMA(args.exp_decay_rate)
    for name, param in model.named_parameters():
        if param.requires_grad:
            ema.register(name, param.data)
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adadelta(parameters, lr=args.learning_rate)
    criterion = nn.CrossEntropyLoss()

    writer = SummaryWriter(log_dir='runs/' + args.model_time)

    model.train()
    loss, last_epoch = 0, -1
    max_dev_exact, max_dev_f1 = -1, -1

    iterator = data.train_iter
    num_batch = len(iterator)
    for present_epoch in range(args.epoch):
        print('epoch', present_epoch + 1)
        for i, batch in enumerate(iterator):
            # present_epoch = int(iterator.epoch)
            """
            if present_epoch == args.epoch:
                print(present_epoch)
                print()
                print(args.epoch)
                break
            if present_epoch > last_epoch:
                print('epoch:', present_epoch + 1)
            last_epoch = present_epoch
            """

            p1, p2 = model(batch)

            optimizer.zero_grad()
            """
            print(p1)
            print()
            print(batch.s_idx)
            """

            if len(p1.size()) == 1:
                p1 = p1.reshape(1, -1)
            if len(p2.size()) == 1:
                p2 = p2.reshape(1, -1)
            batch_loss = criterion(p1, batch.s_idx) + criterion(
                p2, batch.e_idx)
            loss += batch_loss.item()
            batch_loss.backward()
            optimizer.step()

            for name, param in model.named_parameters():
                if param.requires_grad:
                    ema.update(name, param.data)

            best_model = copy.deepcopy(model)

            if i + 1 == num_batch:
                dev_loss, dev_exact, dev_f1 = test(model, ema, args, data)
                c = (i + 1) // args.print_freq

                writer.add_scalar('loss/train', loss / num_batch, c)
                writer.add_scalar('loss/dev', dev_loss, c)
                writer.add_scalar('exact_match/dev', dev_exact, c)
                writer.add_scalar('f1/dev', dev_f1, c)
                print(
                    f'train loss: {loss/num_batch:.3f} / dev loss: {dev_loss:.3f}'
                    f' / dev EM: {dev_exact:.3f} / dev F1: {dev_f1:.3f}')

                if dev_f1 > max_dev_f1:
                    max_dev_f1 = dev_f1
                    max_dev_exact = dev_exact
                    best_model = copy.deepcopy(model)

                loss = 0
                model.train()

    writer.close()
    print(f'max dev EM: {max_dev_exact:.3f} / max dev F1: {max_dev_f1:.3f}')

    return best_model
Example #12
0
def train(args, data):
    device = torch.device(
        f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
    model = BiDAF(args).to(device)

    D_batch = args.train_batch_size
    ema = EMA(args.exp_decay_rate)
    for name, param in model.named_parameters():
        if param.requires_grad:
            ema.register(name, param.data)
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adadelta(parameters, lr=args.learning_rate)
    criterion = nn.CrossEntropyLoss()

    # writer = SummaryWriter(log_dir='runs/' + args.model_time)

    model.train()
    loss, last_epoch = 0, -1
    max_dev_exact, max_dev_f1 = -1, -1
    i = 0
    # iterator = data.train_iter
    while i + D_batch < len(data.data):
        b_id = i
        e_id = i + D_batch
        # present_epoch = int(iterator.epoch)
        # if present_epoch == args.epoch:
        #     break
        # if present_epoch > last_epoch:
        #     print('epoch:', present_epoch + 1)
        # last_epoch = present_epoch

        p1, p2 = model(data, b_id, e_id)

        optimizer.zero_grad()
        s_idx, e_idx = data.get_targ(b_id, e_id)
        batch_loss = criterion(p1, s_idx) + criterion(p2, e_idx)
        loss += batch_loss.item()
        batch_loss.backward()
        optimizer.step()

        for name, param in model.named_parameters():
            if param.requires_grad:
                ema.update(name, param.data)

        # if (i + 1) % args.print_freq == 0:
        #     dev_loss, dev_exact, dev_f1 = test(model, ema, args, data)
        #     c = (i + 1) // args.print_freq

        #     # writer.add_scalar('loss/train', loss, c)
        #     # writer.add_scalar('loss/dev', dev_loss, c)
        #     # writer.add_scalar('exact_match/dev', dev_exact, c)
        #     # writer.add_scalar('f1/dev', dev_f1, c)
        #     # print(f'train loss: {loss:.3f} / dev loss: {dev_loss:.3f}'
        #     #       f' / dev EM: {dev_exact:.3f} / dev F1: {dev_f1:.3f}')

        #     if dev_f1 > max_dev_f1:
        #         max_dev_f1 = dev_f1
        #         max_dev_exact = dev_exact
        #         best_model = copy.deepcopy(model)

        #     loss = 0
        #     model.train()

        i += D_batch

    # writer.close()
    print(f'max dev EM: {max_dev_exact:.3f} / max dev F1: {max_dev_f1:.3f}')

    return best_model