예제 #1
0
def train(model, optimizer, scheduler, train_data, dev_data, batch_size, fp16,
          checkpoint, gpu, max_grad_norm, best_acc):
    loss_fn = nn.CrossEntropyLoss()

    step_cnt = 0
    best_model_weights = None

    for pointer in tqdm(range(0, len(train_data), batch_size),
                        desc='training'):
        model.train(
        )  # model was in eval mode in evaluate(); re-activate the train mode
        optimizer.zero_grad()  # clear gradients first
        torch.cuda.empty_cache()  # releases all unoccupied cached memory

        step_cnt += 1
        sent_pairs = []
        labels = []
        for i in range(pointer, pointer + batch_size):
            if i >= len(train_data): break
            sents = train_data[i].get_texts()
            if len(word_tokenize(' '.join(sents))) > 300: continue
            sent_pairs.append(sents)
            labels.append(train_data[i].get_label())
        logits, _ = model.ff(sent_pairs, checkpoint)
        if logits is None: continue
        true_labels = torch.LongTensor(labels)
        if gpu:
            true_labels = true_labels.to('cuda')
        loss = loss_fn(logits, true_labels)

        # back propagate
        if fp16:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer),
                                           max_grad_norm)
        else:
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)

        # update weights
        optimizer.step()

        # update training rate
        scheduler.step()

        if step_cnt % 2000 == 0:
            acc = evaluate(model, dev_data, checkpoint, mute=True)
            logging.info('==> step {} dev acc: {}'.format(step_cnt, acc))
            if acc > best_acc:
                best_acc = acc
                best_model_weights = copy.deepcopy(model.cpu().state_dict())
                model.to('cuda')

    return best_model_weights
예제 #2
0
                "Please install apex from https://www.github.com/nvidia/apex to use fp16 training."
            )
        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')

    best_acc = -1.
    best_model_dic = None
    for ep in range(epoch_num):
        logging.info('\n=====epoch {}/{}====='.format(ep, epoch_num))
        model_dic = train(model, optimizer, scheduler, train_data, dev_data,
                          batch_size, fp16, checkpoint, gpu, max_grad_norm,
                          best_acc)
        if model_dic is not None:
            best_model_dic = model_dic
    assert best_model_dic is not None

    # for testing load the best model
    model.load_model(best_model_dic)
    logging.info('\n=====Training finished. Now start test=====')
    test_data = nli_reader.get_examples('dev.gz')  #,max_examples=50)
    logging.info('test data size: {}'.format(len(test_data)))
    test_acc = evaluate(model, test_data, batch_size)
    logging.info('accuracy on test set: {}'.format(test_acc))

    if model_save_path is not None:
        os.makedirs(model_save_path, exist_ok=True)
        if os.listdir(model_save_path):
            raise ValueError(
                "Output directory ({}) already exists and is not empty.".
                format(model_save_path))
    model.save(model_save_path, best_model_dic, test_acc)
예제 #3
0
        if model_dic is not None:
            best_model_dic = model_dic
    assert best_model_dic is not None

    # for testing load the best model
    model.load_model(best_model_dic)
    logging.info('\n=====Training finished. Now start test=====')

    if hans:
        nli_reader = NLIDataReader('datasets/Hans')
        hans_test_data = nli_reader.get_hans_examples('heuristics_evaluation_set.txt')
    else:
        hans_test_data = []

    nli_reader = NLIDataReader('datasets/SUFE')
    msnli_test_data = nli_reader.get_examples('dev.gz') #,max_examples=50)

    test_data = msnli_test_data + hans_test_data

    logging.info('test data size: {}'.format(len(test_data)))
    test_acc = evaluate(model, test_data, checkpoint, mute=True)
    # test_acc = evaluate(model,test_data,batch_size)
    logging.info('accuracy on test set: {}'.format(test_acc))

    if model_save_path is not None:
        os.makedirs(model_save_path, exist_ok=True)
        if os.listdir(model_save_path):
            raise ValueError("Output directory ({}) already exists and is not empty.".format(
                model_save_path))
    model.save(model_save_path,best_model_dic,test_acc)