Esempio n. 1
0
def create_submition_file(outfolder, mymodel, loader, device):
    if not os.path.exists(outfolder):
        os.makedirs(outfolder)
    outfile = os.path.join(outfolder, 'output.csv')
    mymodel.eval()
    all_ids = []
    all_outputs = []
    for index, batch in enumerate(tqdm(loader)):
        myid = batch[0]
        inputs = to_device(batch[1], device=device)
        inputs_att = to_device(batch[2], device=device)

        outputs = mymodel(input_ids=inputs, attention_mask=inputs_att)
        if not mymodel.act:
            outputs = torch.softmax(outputs, dim=1)
        outputs = torch.argmax(outputs, dim=1)
        all_ids.append(myid)
        all_outputs.append(outputs)

    ids_list = [item for sublist in all_ids for item in sublist]
    outs_list = [item for sublist in all_outputs for item in sublist]

    with open(outfile, 'w') as output:
        csv_writer = csv.writer(output,
                                delimiter=',',
                                quotechar='"',
                                quoting=csv.QUOTE_MINIMAL)
        for id, out in zip(ids_list, outs_list):
            csv_writer.writerow([id, int(out)])
Esempio n. 2
0
    def train_step(self, batch):
        self.model.train()
        self.optimizer.zero_grad()

        inputs = to_device(batch[0], device=self.device)
        inputs_att = to_device(batch[1], device=self.device)
        targets = to_device(batch[3], device=self.device)
        outputs = self.model(input_ids=inputs, attention_mask=inputs_att)
        loss = self.criterion(outputs, targets)
        # print(loss)
        return loss
Esempio n. 3
0
    def calc_val_loss(self, val_loader):

        self.model.eval()
        with torch.no_grad():
            avg_val_loss = 0
            f1 = []
            acc = []
            prec = []
            rec = []
            metrics_dict = {}
            for index, batch in enumerate(tqdm(val_loader)):
                inputs = to_device(batch[0], device=self.device)
                inputs_att = to_device(batch[1], device=self.device)
                targets = to_device(batch[3], device=self.device)
                outputs = self.model(input_ids=inputs,
                                     attention_mask=inputs_att)
                loss = self.criterion(outputs, targets)
                avg_val_loss += loss.item()
                if 'f1-score' in self.metrics:
                    preds = torch.argmax(outputs, dim=1)
                    true = copy.deepcopy(targets)
                    f1.append(
                        sklearn.metrics.f1_score(true.cpu().numpy(),
                                                 preds.cpu().numpy()))
                if 'accuracy' in self.metrics:
                    preds = torch.argmax(outputs, dim=1)
                    true = copy.deepcopy(targets)
                    acc.append(
                        sklearn.metrics.accuracy_score(true.cpu().numpy(),
                                                       preds.cpu().numpy()))
                if 'precision' in self.metrics:
                    preds = torch.argmax(outputs, dim=1)
                    true = copy.deepcopy(targets)
                    prec.append(
                        sklearn.metrics.precision_score(
                            true.cpu().numpy(),
                            preds.cpu().numpy()))
                if 'recall' in self.metrics:
                    preds = torch.argmax(outputs, dim=1)
                    true = copy.deepcopy(targets)
                    rec.append(
                        sklearn.metrics.recall_score(true.cpu().numpy(),
                                                     preds.cpu().numpy()))
            avg_val_loss = avg_val_loss / len(val_loader)
            if 'f1-score' in self.metrics:
                metrics_dict['f1-score'] = np.mean(f1)
            if 'accuracy' in self.metrics:
                metrics_dict['accuracy'] = np.mean(acc)
            if 'recall' in self.metrics:
                metrics_dict['recall'] = np.mean(rec)
            if 'precision' in self.metrics:
                metrics_dict['precision'] = np.mean(prec)
            return avg_val_loss, metrics_dict
Esempio n. 4
0
    def train_step(self, batch):
        self.model.train()
        self.optimizer.zero_grad()

        inputs = to_device(batch[0], device=self.device)
        inputs_att = to_device(batch[1], device=self.device)
        humor_rating = to_device(batch[2], device=self.device)
        outputs = self.model(input_ids=inputs, attention_mask=inputs_att)
        outputs = outputs.squeeze(1)
        loss = self.criterion(outputs, humor_rating)

        return loss
Esempio n. 5
0
def get_features(data_loader,mymodel,device):
    features = []
    ids = []
    mymodel.eval()
    with torch.no_grad():
        for index, batch in enumerate(tqdm(data_loader)):
            myid = batch[0]
            inputs = to_device(batch[1], device=device)
            inputs_att = to_device(batch[2], device=device)
            outputs = mymodel.encoder(input_ids=inputs, attention_mask=inputs_att)
            features.append(outputs[1].cpu().numpy())
            ids.append(myid)
    ids = [item for sublist in ids for item in sublist]
    features = [item for sublist in features for item in sublist]
    res = {ids[i]: features[i] for i in range(len(ids))}
    return res
Esempio n. 6
0
    def calc_val_loss(self, val_loader):

        self.model.eval()
        with torch.no_grad():
            avg_val_loss = 0

            metrics_dict = {}
            for index, batch in enumerate(tqdm(val_loader)):
                inputs = to_device(batch[0], device=self.device)
                inputs_att = to_device(batch[1], device=self.device)
                humor_rating = to_device(batch[2], device=self.device)
                outputs = self.model(input_ids=inputs,
                                     attention_mask=inputs_att)
                outputs = outputs.squeeze(1)
                loss = self.criterion(outputs, humor_rating)
                avg_val_loss += loss.item()

            avg_val_loss = avg_val_loss / len(val_loader)

            return avg_val_loss, metrics_dict