def create_submition_file(outfolder, mymodel, loader, device): if not os.path.exists(outfolder): os.makedirs(outfolder) outfile = os.path.join(outfolder, 'output.csv') mymodel.eval() all_ids = [] all_outputs = [] for index, batch in enumerate(tqdm(loader)): myid = batch[0] inputs = to_device(batch[1], device=device) inputs_att = to_device(batch[2], device=device) outputs = mymodel(input_ids=inputs, attention_mask=inputs_att) if not mymodel.act: outputs = torch.softmax(outputs, dim=1) outputs = torch.argmax(outputs, dim=1) all_ids.append(myid) all_outputs.append(outputs) ids_list = [item for sublist in all_ids for item in sublist] outs_list = [item for sublist in all_outputs for item in sublist] with open(outfile, 'w') as output: csv_writer = csv.writer(output, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) for id, out in zip(ids_list, outs_list): csv_writer.writerow([id, int(out)])
def train_step(self, batch): self.model.train() self.optimizer.zero_grad() inputs = to_device(batch[0], device=self.device) inputs_att = to_device(batch[1], device=self.device) targets = to_device(batch[3], device=self.device) outputs = self.model(input_ids=inputs, attention_mask=inputs_att) loss = self.criterion(outputs, targets) # print(loss) return loss
def calc_val_loss(self, val_loader): self.model.eval() with torch.no_grad(): avg_val_loss = 0 f1 = [] acc = [] prec = [] rec = [] metrics_dict = {} for index, batch in enumerate(tqdm(val_loader)): inputs = to_device(batch[0], device=self.device) inputs_att = to_device(batch[1], device=self.device) targets = to_device(batch[3], device=self.device) outputs = self.model(input_ids=inputs, attention_mask=inputs_att) loss = self.criterion(outputs, targets) avg_val_loss += loss.item() if 'f1-score' in self.metrics: preds = torch.argmax(outputs, dim=1) true = copy.deepcopy(targets) f1.append( sklearn.metrics.f1_score(true.cpu().numpy(), preds.cpu().numpy())) if 'accuracy' in self.metrics: preds = torch.argmax(outputs, dim=1) true = copy.deepcopy(targets) acc.append( sklearn.metrics.accuracy_score(true.cpu().numpy(), preds.cpu().numpy())) if 'precision' in self.metrics: preds = torch.argmax(outputs, dim=1) true = copy.deepcopy(targets) prec.append( sklearn.metrics.precision_score( true.cpu().numpy(), preds.cpu().numpy())) if 'recall' in self.metrics: preds = torch.argmax(outputs, dim=1) true = copy.deepcopy(targets) rec.append( sklearn.metrics.recall_score(true.cpu().numpy(), preds.cpu().numpy())) avg_val_loss = avg_val_loss / len(val_loader) if 'f1-score' in self.metrics: metrics_dict['f1-score'] = np.mean(f1) if 'accuracy' in self.metrics: metrics_dict['accuracy'] = np.mean(acc) if 'recall' in self.metrics: metrics_dict['recall'] = np.mean(rec) if 'precision' in self.metrics: metrics_dict['precision'] = np.mean(prec) return avg_val_loss, metrics_dict
def train_step(self, batch): self.model.train() self.optimizer.zero_grad() inputs = to_device(batch[0], device=self.device) inputs_att = to_device(batch[1], device=self.device) humor_rating = to_device(batch[2], device=self.device) outputs = self.model(input_ids=inputs, attention_mask=inputs_att) outputs = outputs.squeeze(1) loss = self.criterion(outputs, humor_rating) return loss
def get_features(data_loader,mymodel,device): features = [] ids = [] mymodel.eval() with torch.no_grad(): for index, batch in enumerate(tqdm(data_loader)): myid = batch[0] inputs = to_device(batch[1], device=device) inputs_att = to_device(batch[2], device=device) outputs = mymodel.encoder(input_ids=inputs, attention_mask=inputs_att) features.append(outputs[1].cpu().numpy()) ids.append(myid) ids = [item for sublist in ids for item in sublist] features = [item for sublist in features for item in sublist] res = {ids[i]: features[i] for i in range(len(ids))} return res
def calc_val_loss(self, val_loader): self.model.eval() with torch.no_grad(): avg_val_loss = 0 metrics_dict = {} for index, batch in enumerate(tqdm(val_loader)): inputs = to_device(batch[0], device=self.device) inputs_att = to_device(batch[1], device=self.device) humor_rating = to_device(batch[2], device=self.device) outputs = self.model(input_ids=inputs, attention_mask=inputs_att) outputs = outputs.squeeze(1) loss = self.criterion(outputs, humor_rating) avg_val_loss += loss.item() avg_val_loss = avg_val_loss / len(val_loader) return avg_val_loss, metrics_dict