コード例 #1
0
    def evaluate_nsp_intersentence(self):
        print()
        print(
            f"{Fore.LIGHTBLUE_EX}Evaluating bias on intersentence tasks...{Style.RESET_ALL}"
        )
        nsp_dim = 300
        model = getattr(models, self.INTERSENTENCE_MODEL)(
            self.PRETRAINED_CLASS, nsp_dim=nsp_dim).to(self.device)

        if "gpt2" in args.tokenizer.lower():
            print("Adding <PAD> token to tokenizer...")
            self.tokenizer.add_special_tokens({"pad_token": "<PAD>"})
            model.core_model.resize_token_embeddings(len(self.tokenizer))

        print(f"Number of parameters: {self.count_parameters(model):,}")
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = torch.nn.DataParallel(model)

        if self.INTERSENTENCE_LOAD_PATH:
            model.load_state_dict(torch.load(self.INTERSENTENCE_LOAD_PATH))
            print('model loaded')

        model.eval()
        dataset = IntersentenceDataset(self.tokenizer, args)
        dataloader = DataLoader(dataset,
                                batch_size=args.batch_size,
                                shuffle=True,
                                num_workers=0)
        predictions = []

        for batch_num, batch in tqdm(enumerate(dataloader),
                                     total=len(dataloader)):
            input_ids, token_type_ids, attention_mask, sentence_id = batch
            input_ids = input_ids.to(self.device)
            token_type_ids = token_type_ids.to(self.device)
            outputs = model(input_ids,
                            token_type_ids=token_type_ids,
                            attention_mask=attention_mask)

            if type(outputs) == tuple:
                outputs = outputs[0]
            outputs = torch.softmax(outputs, dim=1)

            for idx in range(input_ids.shape[0]):
                probabilities = {}
                probabilities['id'] = sentence_id[idx]

                if "bert" in self.PRETRAINED_CLASS:
                    probabilities['score'] = outputs[idx, 0].item()
                else:
                    probabilities['score'] = outputs[idx, 1].item()
                predictions.append(probabilities)

        return predictions
    def evaluate_intersentence(self):
        print()
        print(
            f"{Fore.LIGHTBLUE_EX}Evaluating bias on intersentence tasks...{Style.RESET_ALL}"
        )
        model = getattr(models, self.INTERSENTENCE_MODEL)(
            self.PRETRAINED_CLASS).to(self.device)

        print(f"Number of parameters: {self.count_parameters(model):,}")
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = torch.nn.DataParallel(model)

        if self.INTERSENTENCE_LOAD_PATH:
            model.load_state_dict(torch.load(self.INTERSENTENCE_LOAD_PATH))

        model.eval()
        dataset = IntersentenceDataset(self.tokenizer, args)
        # TODO: test this on larger batch sizes.
        assert args.batch_size == 1
        dataloader = DataLoader(dataset, shuffle=True, num_workers=0)

        if args.no_cuda:
            n_cpus = cpu_count()
            print(f"Using {n_cpus} cpus!")
            predictions = Parallel(n_jobs=n_cpus, backend="multiprocessing")(
                delayed(process_job)(batch, model, self.PRETRAINED_CLASS)
                for batch in tqdm(dataloader, total=len(dataloader)))
        else:
            predictions = []

            for batch_num, batch in tqdm(enumerate(dataloader),
                                         total=len(dataloader)):
                input_ids, token_type_ids, attention_mask, sentence_id = batch
                input_ids = input_ids.to(self.device)
                token_type_ids = token_type_ids.to(self.device)
                attention_mask = attention_mask.to(self.device)
                outputs = model(input_ids, token_type_ids=token_type_ids)
                if type(outputs) == tuple:
                    outputs = outputs[0]
                outputs = torch.softmax(outputs, dim=1)

                for idx in range(input_ids.shape[0]):
                    probabilities = {}
                    probabilities['id'] = sentence_id[idx]
                    if "bert" == self.PRETRAINED_CLASS[:
                                                       4] or "roberta-base" == self.PRETRAINED_CLASS:
                        probabilities['score'] = outputs[idx, 0].item()
                    else:
                        probabilities['score'] = outputs[idx, 1].item()
                    predictions.append(probabilities)

        return predictions