コード例 #1
0
def test(model, tokenizer, test_file, checkpoint, output_dir=None):
    test_data = TrainData(data_file=test_file,
                          max_length=args.max_length,
                          tokenizer=tokenizer,
                          model_type=args.model_type)

    test_dataLoader = DataLoader(dataset=test_data,
                                 batch_size=args.batch_size,
                                 shuffle=False)

    logger.debug("***** Running test {} *****".format(checkpoint))
    logger.debug("  Num examples = %d", len(test_dataLoader))
    logger.debug("  Batch size = %d", args.batch_size)

    loss = []

    all_labels = None
    all_logits = None

    model.eval()

    for batch in tqdm(test_dataLoader, desc="Evaluating", ncols=50):
        with torch.no_grad():
            if 'roberta' in args.model_type:
                batch = [t.to(args.device) for t in batch[:-2]]
                input_ids, attention_mask, labels = batch
                outputs = model(input_ids=input_ids.long(),
                                attention_mask=attention_mask.long(),
                                labels=labels)

            else:
                batch = [t.to(args.device) for t in batch[:-2]]
                input_ids, token_type_ids, attention_mask, labels = batch
                outputs = model(input_ids=input_ids.long(),
                                token_type_ids=token_type_ids.long(),
                                attention_mask=attention_mask.long(),
                                labels=labels)

            eval_loss, logits = outputs[:2]

            loss.append(eval_loss.item())

            if all_labels is None:
                all_labels = labels.detach().cpu().numpy()
                all_logits = logits.detach().cpu().numpy()
            else:
                all_labels = np.concatenate((all_labels, labels.detach().cpu().numpy()), axis=0)
                all_logits = np.concatenate((all_logits, logits.detach().cpu().numpy()), axis=0)

    acc = accuracy(all_logits, all_labels)
    f1 = f1_score(all_logits, all_labels)

    return np.array(loss).mean(), acc, f1
コード例 #2
0
def test(model, tokenizer, test_file, model_type):

    test_data = None
    test_dataLoader = None

    if model_type == 'baseline':
        test_data = TrainData(data_file=test_file,
                              max_length=args2.max_length,
                              tokenizer=tokenizer)
        test_dataLoader = DataLoader(test_data,
                                     batch_size=args2.batch_size,
                                     shuffle=False)

    elif model_type == 'vae2task':
        test_data = Multi_task_dataset(data_file=test_file,
                                       max_length=args2.max_length,
                                       tokenizer=tokenizer)
        test_dataLoader = DataLoader(dataset=test_data,
                                     batch_size=args2.batch_size,
                                     shuffle=False)

    elif model_type == 'cvae':
        test_data = TrainData(data_file=test_file,
                              max_length=args2.max_length,
                              tokenizer=tokenizer)
        test_dataLoader = DataLoader(test_data,
                                     batch_size=args2.batch_size,
                                     shuffle=False)

    loss = []

    all_labels = None
    all_logits = None

    model.eval()
    mdoel = model.to(args2.device)

    if model_type == "vae2task":
        for batch in tqdm(test_dataLoader, desc="Evaluating", ncols=50):
            with torch.no_grad():
                batch = [t.to(args2.device) for t in batch]
                input_ids, token_type_ids, attention_mask, labels_main, labels_vice1, labels_vice2 = batch
                outputs = model(input_ids=input_ids.long(),
                                token_type_ids=token_type_ids.long(),
                                attention_mask=attention_mask.long(),
                                labels_main=labels_main,
                                labels_vice1=labels_vice1,
                                labels_vice2=labels_vice2)

                eval_loss, logits = outputs[:2]

                loss.append(eval_loss.item())

                if all_labels is None:
                    all_labels = labels_main.detach().cpu().numpy()
                    all_logits = logits.detach().cpu().numpy()
                else:
                    all_labels = np.concatenate(
                        (all_labels, labels_main.detach().cpu().numpy()),
                        axis=0)
                    all_logits = np.concatenate(
                        (all_logits, logits.detach().cpu().numpy()), axis=0)

    elif model_type == 'baseline':
        for batch in tqdm(test_dataLoader, desc="Evaluating", ncols=50):
            with torch.no_grad():
                batch = [t.to(args2.device) for t in batch[:-2]]
                input_ids, token_type_ids, attention_mask, labels = batch
                outputs = model(input_ids=input_ids.long(),
                                token_type_ids=token_type_ids.long(),
                                attention_mask=attention_mask.long(),
                                labels=labels)

                eval_loss, logits = outputs[:2]

                loss.append(eval_loss.item())

                if all_labels is None:
                    all_labels = labels.detach().cpu().numpy()
                    all_logits = logits.detach().cpu().numpy()
                else:
                    all_labels = np.concatenate(
                        (all_labels, labels.detach().cpu().numpy()), axis=0)
                    all_logits = np.concatenate(
                        (all_logits, logits.detach().cpu().numpy()), axis=0)

    elif model_type == 'cvae':
        for batch in tqdm(test_dataLoader, desc="Evaluating", ncols=50):
            with torch.no_grad():
                query1, query2 = batch[-2:]
                batch = [t.to(args2.device) for t in batch[:-2]]
                input_ids, token_type_ids, attention_mask, labels = batch
                outputs = model(input_ids=input_ids.long(),
                                token_type_ids=token_type_ids.long(),
                                attention_mask=attention_mask.long(),
                                labels=labels,
                                query1=query1,
                                query2=query2)

                eval_loss, logits = outputs[:2]

                loss.append(eval_loss.item())

                if all_labels is None:
                    all_labels = labels.detach().cpu().numpy()
                    all_logits = logits.detach().cpu().numpy()
                else:
                    all_labels = np.concatenate(
                        (all_labels, labels.detach().cpu().numpy()), axis=0)
                    all_logits = np.concatenate(
                        (all_logits, logits.detach().cpu().numpy()), axis=0)

    acc = accuracy(all_logits, all_labels)
    f1 = f1_score(all_logits, all_labels)
    return np.array(loss).mean(), acc, f1