def evaluate_nsp_intersentence(self): print() print( f"{Fore.LIGHTBLUE_EX}Evaluating bias on intersentence tasks...{Style.RESET_ALL}" ) nsp_dim = 300 model = getattr(models, self.INTERSENTENCE_MODEL)( self.PRETRAINED_CLASS, nsp_dim=nsp_dim).to(self.device) if "gpt2" in args.tokenizer.lower(): print("Adding <PAD> token to tokenizer...") self.tokenizer.add_special_tokens({"pad_token": "<PAD>"}) model.core_model.resize_token_embeddings(len(self.tokenizer)) print(f"Number of parameters: {self.count_parameters(model):,}") print("Let's use", torch.cuda.device_count(), "GPUs!") model = torch.nn.DataParallel(model) if self.INTERSENTENCE_LOAD_PATH: model.load_state_dict(torch.load(self.INTERSENTENCE_LOAD_PATH)) print('model loaded') model.eval() dataset = IntersentenceDataset(self.tokenizer, args) dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=0) predictions = [] for batch_num, batch in tqdm(enumerate(dataloader), total=len(dataloader)): input_ids, token_type_ids, attention_mask, sentence_id = batch input_ids = input_ids.to(self.device) token_type_ids = token_type_ids.to(self.device) outputs = model(input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask) if type(outputs) == tuple: outputs = outputs[0] outputs = torch.softmax(outputs, dim=1) for idx in range(input_ids.shape[0]): probabilities = {} probabilities['id'] = sentence_id[idx] if "bert" in self.PRETRAINED_CLASS: probabilities['score'] = outputs[idx, 0].item() else: probabilities['score'] = outputs[idx, 1].item() predictions.append(probabilities) return predictions
def evaluate_intersentence(self): print() print( f"{Fore.LIGHTBLUE_EX}Evaluating bias on intersentence tasks...{Style.RESET_ALL}" ) model = getattr(models, self.INTERSENTENCE_MODEL)( self.PRETRAINED_CLASS).to(self.device) print(f"Number of parameters: {self.count_parameters(model):,}") print("Let's use", torch.cuda.device_count(), "GPUs!") model = torch.nn.DataParallel(model) if self.INTERSENTENCE_LOAD_PATH: model.load_state_dict(torch.load(self.INTERSENTENCE_LOAD_PATH)) model.eval() dataset = IntersentenceDataset(self.tokenizer, args) # TODO: test this on larger batch sizes. assert args.batch_size == 1 dataloader = DataLoader(dataset, shuffle=True, num_workers=0) if args.no_cuda: n_cpus = cpu_count() print(f"Using {n_cpus} cpus!") predictions = Parallel(n_jobs=n_cpus, backend="multiprocessing")( delayed(process_job)(batch, model, self.PRETRAINED_CLASS) for batch in tqdm(dataloader, total=len(dataloader))) else: predictions = [] for batch_num, batch in tqdm(enumerate(dataloader), total=len(dataloader)): input_ids, token_type_ids, attention_mask, sentence_id = batch input_ids = input_ids.to(self.device) token_type_ids = token_type_ids.to(self.device) attention_mask = attention_mask.to(self.device) outputs = model(input_ids, token_type_ids=token_type_ids) if type(outputs) == tuple: outputs = outputs[0] outputs = torch.softmax(outputs, dim=1) for idx in range(input_ids.shape[0]): probabilities = {} probabilities['id'] = sentence_id[idx] if "bert" == self.PRETRAINED_CLASS[: 4] or "roberta-base" == self.PRETRAINED_CLASS: probabilities['score'] = outputs[idx, 0].item() else: probabilities['score'] = outputs[idx, 1].item() predictions.append(probabilities) return predictions