예제 #1
0
    def __init__(self, dataset, param_dict):
        """ Initializes a Trainer object which will be used to optimize
        the neural network.
        Arguments:
            dataset {AspectDataset} -- the part of the data usable for training and validation
            param_dict {dict} -- parameter dictionary, parameters can be seen above
        """
        self.dataset = dataset
        self.param = param_dict
        if param_dict["use_linmodel"]:
            self.model = LinModel(self.param["embedding_dim"],
                                  self.param["output_dim"])
        else:
            self.model = Model(self.param["embedding_dim"],
                               self.param["output_dim"])
        self.use_train_iterator = True
        # Use for the classification training afterwards
        self.only_supervised = False

        self.filename = 'training_{}'.format(
            datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
        self.training_records = [self.param]
        self.model_name = 'binary-abae'
        if self.param["save_model_path"] is not None:
            self.model_path = os.path.join(os.getcwd(),
                                           self.param["save_model_path"],
                                           self.filename)
        self.current_epoch = 0
        self.best_train_f1 = 0.0
        self.best_eval_f1 = 0.0
예제 #2
0
 def train_classifier(self, freeze=True, new_param=None):
     """ Trains a classification layer on top of the previously trained model.
     The parameters of the previous migh the freezed
     Arguments:
         freeze {bool} -- whether to freeze the previous parameters
     Returns:
         {Model} -- the final model
     """
     self.model_name = 'classifier'
     if new_param is not None:
         self.param = new_param
     if freeze:
         for param in self.model.parameters():
             param.requires_grad = False
     self.only_supervised = True
     self.model = Classification(self.model, self.param["output_dim"],
                                 self.param["classification_dim"])
     model = self.train()
     return model
예제 #3
0
    def __init__(self, dataset, param_dict, other_dataset):
        """ Initializes a Trainer object which will be used to optimize
        the neural network.
        Arguments:
            dataset {AspectDataset} -- the part of the data usable for training and validation
            param_dict {dict} -- parameter dictionary, parameters can be seen above
            other_dataset {AspectDataset} -- dataset create by dfToBinarySamplingDatasets. Used
            if we want to vary the number of similar (in dataset) and dissimilar samples (in 
            other_dataset). We can only use this in the binary classification setting.
        """
        self.dataset = dataset
        self.other_dataset = other_dataset
        self.param = param_dict
        if param_dict["use_linmodel"]:
            self.model = LinModel(self.param["embedding_dim"],
                                  self.param["output_dim"])
        else:
            self.model = Model(self.param["embedding_dim"],
                               self.param["output_dim"])
        # Next value is used for iterting through the other_dataset in binary_sampling,
        # see getOtherBatch()
        self.use_train_iterator = True
        self.other_iterator = None
        # Use for the classification training afterwards
        self.only_supervised = False

        self.filename = 'training_{}'.format(
            datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
        self.training_records = [self.param]
        self.model_name = 'binary-abae'
        if self.param["save_model_path"] is not None:
            self.model_path = os.path.join(os.getcwd(),
                                           self.param["save_model_path"],
                                           self.filename)
        self.current_epoch = 0
        self.best_train_f1 = 0.0
        self.best_eval_f1 = 0.0
        print("Skipping:", file)
        continue
    binary_target_class = param["binary_target_class"]
    use_attributes = param["label"] == "attribute"

    middel_dim = 6 if use_attributes else 7

    model_folder_string = "models/restaurants/" + (
        "attribute/" if use_attributes else "entity/")
    model = LinModel(embedding_dim, middel_dim)
    if "activation" in param.keys():
        activation = param["activation"]
    else:
        activation = "softmax"
    model = Classification(model,
                           middel_dim,
                           output_dim=1,
                           activation=activation)
    try:
        model = load_model(model, model_folder_string + model_name)
    except:
        print(model_folder_string)
        print("Could not find model file:", model_folder_string + model_name)
        continue

    start_index = 0
    for sentences, ent, att in dataloader:
        output = model(sentences)
        output.detach()
        for index in range(len(ent)):
            prob = output[index].item()
            if use_attributes:
def run(vocab_size=8000, verbose=True, early_stopping=True, separation=True):
    args = json.load(open("config.json"))
    args['device'] = torch.device(
        "cuda" if torch.cuda.is_available() else "cpu")
    config = Config(args)
    print('Preparing Train.............')

    ### DATASET:
    DataSet = TextDataset(vocab_size=vocab_size, separation=separation)
    args['n_enc_vocab'] = vocab_size + 7
    args['n_dec_vocab'] = vocab_size + 7
    config.n_enc_vocab = vocab_size + 7
    config.n_dec_vocab = vocab_size + 7

    ### DATALOADER
    ratio = [
        int(len(DataSet) * args['train_ratio']),
        len(DataSet) - int(len(DataSet) * args['train_ratio'])
    ]
    train_set, val_set = torch.utils.data.random_split(DataSet, ratio)
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=args['batch_size'],
                                               shuffle=True,
                                               collate_fn=collate_fn)
    val_loader = torch.utils.data.DataLoader(val_set,
                                             batch_size=args['batch_size'],
                                             num_workers=2,
                                             collate_fn=collate_fn)

    ### MODEL
    if config.model == 'conv1drnn':
        model = Conv1dRNN(512,
                          3,
                          config.dropout,
                          config.n_dec_vocab,
                          config.d_hidn,
                          padding=1)
    else:
        model = Classification(config)

    ## CRITERION & OPTIMIZER & SECHEDULER
    criterion = nn.CrossEntropyLoss()
    optimizer = select_optimizer(model, args['Optim'])
    scheduler = select_scheduler(optimizer, args['Scheduler'])

    print('Training Start..............')
    history = {
        'train_losses': [],
        'train_f1s': [],
        'train_accs': [],
        'eval_losses': [],
        'eval_f1s': [],
        'eval_accs': []
    }
    ### RUN
    for i in range(args['Scheduler']['T_max']):
        model, train_loss, train_f1, train_acc = train(model,
                                                       loader=train_loader,
                                                       criterion=criterion,
                                                       optimizer=optimizer,
                                                       config=config)
        model, eval_loss, eval_f1, eval_acc = evaluate(model,
                                                       loader=val_loader,
                                                       criterion=criterion,
                                                       config=config)
        scheduler.step()

        history['train_losses'].append(train_loss)
        history['train_f1s'].append(train_f1)
        history['train_accs'].append(train_acc)
        history['eval_losses'].append(eval_loss)
        history['eval_f1s'].append(eval_f1)
        history['eval_accs'].append(eval_acc)
        if verbose:
            print(
                f'epoch {i + 1} train : train_loss = {train_loss:.4f}, train_f1 = {train_f1:.4f}, train_acc = {train_acc:.4f}'
            )
            print(
                f'epoch {i + 1} validation : val_loss = {eval_loss:.4f}, val_f1 = {eval_f1:.4f}, val_acc = {eval_acc:.4f}'
            )
            print('-' * 50)
        if early_stopping:
            current_eval_loss = eval_loss
            minimum_eval_loss = min(history['eval_losses'])
            if current_eval_loss > minimum_eval_loss:
                patience += 1
                if patience > 10:
                    print(
                        f'early stop at best epoch {best_epoch}, valloss = {minimum_eval_loss}'
                    )
                    break
            else:
                best_model = model
                best_epoch = i
                patience = 0
    return best_model, history
예제 #6
0
class RandomSamplingTrainer:
    def __init__(self, dataset, param_dict):
        """ Initializes a Trainer object which will be used to optimize
        the neural network.
        Arguments:
            dataset {AspectDataset} -- the part of the data usable for training and validation
            param_dict {dict} -- parameter dictionary, parameters can be seen above
        """
        self.dataset = dataset
        self.param = param_dict
        if param_dict["use_linmodel"]:
            self.model = LinModel(self.param["embedding_dim"],
                                  self.param["output_dim"])
        else:
            self.model = Model(self.param["embedding_dim"],
                               self.param["output_dim"])
        self.use_train_iterator = True
        # Use for the classification training afterwards
        self.only_supervised = False

        self.filename = 'training_{}'.format(
            datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
        self.training_records = [self.param]
        self.model_name = 'binary-abae'
        if self.param["save_model_path"] is not None:
            self.model_path = os.path.join(os.getcwd(),
                                           self.param["save_model_path"],
                                           self.filename)
        self.current_epoch = 0
        self.best_train_f1 = 0.0
        self.best_eval_f1 = 0.0

    def train(self, verbose=True):
        """ Starts the training procedure.
        Arguments:
            verbose {bool} -- Whether to log messages during training
        Returns:
            {Model} -- The trained model
        """

        verbose_training = verbose

        train_dataset, validation_dataset = split_dataset(
            self.dataset, self.param["validation_percentage"])

        # Create the dataloaders for sampling, as we use the binary case we additionally intialize dataloaders for the
        # other classes
        self.dataloader = DataLoader(train_dataset,
                                     batch_size=self.param["batch_size"],
                                     shuffle=True,
                                     collate_fn=collate_padding)
        self.validloader = DataLoader(validation_dataset,
                                      batch_size=self.param["batch_size"],
                                      collate_fn=collate_padding)

        # Initialize the optimizer, Learning rate scheduler and the classification loss
        #self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.param["lr"])
        self.optimizer = torch.optim.RMSprop(self.model.parameters(),
                                             lr=self.param["lr"])
        self.scheduler = StepLR(self.optimizer, step_size=1, gamma=0.1)
        self.classification_loss = nn.BCELoss()

        # Initiliaze the correct loss. This is wrapped by the learner object which takes care
        # of all the similarity calculations
        if self.param["use_kcl"]:
            self.learner_clustering = Learner_Clustering(KCL())
        else:
            self.learner_clustering = Learner_Clustering(MCL())

        if self.param["cuda"] and torch.cuda.is_available():
            log("Using GPU")
            device = torch.device('cuda')
            self.model.to(device)
        else:
            log("Using CPU")
            self.param["cuda"] = False
            device = torch.device('cpu')

        # The patience value will be used to determine whethter we want to stop the training early
        # In each epoch with the validation error not decreasing patience will be decreased.
        # If it as a zero, the training will be terminated.
        patience = self.param["patience_early_stopping"]
        best_eval_loss = torch.tensor(float('inf'))

        for e in range(self.param["epochs"]):
            self.current_epoch = e
            log("Epoch:", e)

            # Start one training epoch and log the loss
            self.model.train()
            loss = self.train_epoch()
            loss = loss.to(torch.device('cpu'))
            log("Train loss:", loss.item())

            # Start one evaluation epoch and log the loss
            self.model.eval()
            eval_loss = self.eval_epoch()
            eval_loss = eval_loss.to(torch.device('cpu'))
            log("Eval Loss:", eval_loss.item())
            if eval_loss < best_eval_loss:
                best_eval_loss = eval_loss
                patience = self.param["patience_early_stopping"]
                if self.param["save_model_path"] is not None:
                    save_model(self.model, self.model_path)
            else:
                patience -= 1
            log("Current patience:", patience)
            if patience <= 0:
                break

            # We might not want to decay the learning rate during the whole training
            """if e < self.param["lr_decay_epochs"]:
                self.scheduler.step()"""
            if patience < 6:
                self.scheduler.step()

            self.training_records.append({
                'epoch': e,
                'model': self.model_name,
                'loss': loss.item(),
                'eval_loss': eval_loss.item()
            })
        if self.param["save_model_path"] is not None:
            print("Reloading best model")
            self.model.load_state_dict(torch.load(self.model_path))
        if self.only_supervised:
            log("Best Scores:", self.best_train_f1, "Train F1",
                self.best_eval_f1, "Validation F1")
            self.training_records.append({
                "train_f1": self.best_train_f1,
                "val_f1": self.best_eval_f1
            })
        if self.param["save_training_records"]:
            save_records(self.param['records_data_path'], self.filename,
                         self.training_records)
        return self.model

    def train_classifier(self, freeze=True, new_param=None):
        """ Trains a classification layer on top of the previously trained model.
        The parameters of the previous migh the freezed
        Arguments:
            freeze {bool} -- whether to freeze the previous parameters
        Returns:
            {Model} -- the final model
        """
        self.model_name = 'classifier'
        if new_param is not None:
            self.param = new_param
        if freeze:
            for param in self.model.parameters():
                param.requires_grad = False
        self.only_supervised = True
        self.model = Classification(self.model, self.param["output_dim"],
                                    self.param["classification_dim"])
        model = self.train()
        return model

    def train_epoch(self):
        """ Training method, if we want to only predict one class -> transformation of
        the problem to binary classification.
        The parameters for training are all stored in self.param
        """
        aggregated_targets = []
        aggregated_outputs = []
        loss = torch.zeros(1)
        for sentences, entities, attributes in self.dataloader:
            if self.param["train_entities"]:
                target = entities
            else:
                target = attributes
            if self.param["cuda"]:
                sentences = sentences.cuda()
                target = target.cuda()

            batch_loss, output = self.train_batch(sentences, target)
            loss += batch_loss
            aggregated_targets.append(target.to(torch.device('cpu')))
            aggregated_outputs.append(output.to(torch.device('cpu')))
        aggregated_targets = torch.cat(aggregated_targets)
        aggregated_outputs = torch.cat(aggregated_outputs)
        # Only if we are in the classification phase we can get metrics
        if self.only_supervised:
            metrics = calculate_metrics(aggregated_targets, aggregated_outputs)
            if metrics["f1"] > self.best_train_f1:
                self.best_train_f1 = metrics["f1"]
            log("Train", metrics)
            metrics.update({
                'epoch': self.current_epoch,
                'step': 'train',
                'model': self.model_name
            })
            self.training_records.append(metrics)

        return loss

    def train_batch(self, sentences, target):
        """ Training method for one data batch.
        """
        self.optimizer.zero_grad()

        output = self.model(sentences)

        # only_supervised means we are training the classifier after the ABAE model
        if not self.only_supervised:
            similarity = Class2Simi(target)
            loss = self.learner_clustering.calculate_criterion(
                output, similarity)
        else:
            # In the binary case we need to add one dimension
            loss = self.classification_loss(output, target[:, None])

        loss.backward()
        self.optimizer.step()
        return loss, output

    def eval_epoch(self):
        aggregated_targets = []
        aggregated_outputs = []
        loss = torch.tensor(0.0)
        for sentences, entities, attributes in self.validloader:
            if self.param["train_entities"]:
                target = entities
            else:
                target = attributes
            if self.param["cuda"]:
                sentences = sentences.cuda()
                target = target.cuda()

            batch_loss, output = self.eval_batch(sentences, target)
            loss += batch_loss
            aggregated_targets.append(target.to(torch.device('cpu')))
            aggregated_outputs.append(output.to(torch.device('cpu')))
        aggregated_targets = torch.cat(aggregated_targets)
        aggregated_outputs = torch.cat(aggregated_outputs)
        # Only if we are in the classification case we can get metrics
        if self.only_supervised:
            metrics = calculate_metrics(aggregated_targets, aggregated_outputs)
            if metrics["f1"] > self.best_eval_f1:
                self.best_eval_f1 = metrics["f1"]
            log("Eval", metrics)
            metrics.update({
                'epoch': self.current_epoch,
                'step': 'eval',
                'model': self.model_name
            })
            self.training_records.append(metrics)
        return loss

    def eval_batch(self, sentences, target):
        """ Evaluation of one batch. """
        output = self.model(sentences)

        if not self.only_supervised:
            similarity = Class2Simi(target)
            loss = self.learner_clustering.calculate_criterion(
                output, similarity)
        else:
            # In the binary case we need to add one dimension
            loss = self.classification_loss(output, target[:, None])
        return loss, output
예제 #7
0
class Trainer:
    def __init__(self, dataset, param_dict, other_dataset):
        """ Initializes a Trainer object which will be used to optimize
        the neural network.
        Arguments:
            dataset {AspectDataset} -- the part of the data usable for training and validation
            param_dict {dict} -- parameter dictionary, parameters can be seen above
            other_dataset {AspectDataset} -- dataset create by dfToBinarySamplingDatasets. Used
            if we want to vary the number of similar (in dataset) and dissimilar samples (in 
            other_dataset). We can only use this in the binary classification setting.
        """
        self.dataset = dataset
        self.other_dataset = other_dataset
        self.param = param_dict
        if param_dict["use_linmodel"]:
            self.model = LinModel(self.param["embedding_dim"],
                                  self.param["output_dim"])
        else:
            self.model = Model(self.param["embedding_dim"],
                               self.param["output_dim"])
        # Next value is used for iterting through the other_dataset in binary_sampling,
        # see getOtherBatch()
        self.use_train_iterator = True
        self.other_iterator = None
        # Use for the classification training afterwards
        self.only_supervised = False

        self.filename = 'training_{}'.format(
            datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
        self.training_records = [self.param]
        self.model_name = 'binary-abae'
        if self.param["save_model_path"] is not None:
            self.model_path = os.path.join(os.getcwd(),
                                           self.param["save_model_path"],
                                           self.filename)
        self.current_epoch = 0
        self.best_train_f1 = 0.0
        self.best_eval_f1 = 0.0

    def train(self, verbose=True):
        """ Starts the training procedure.
        Arguments:
            verbose {bool} -- Whether to log messages during training
        Returns:
            {Model} -- The trained model
        """

        verbose_training = verbose

        train_dataset, validation_dataset = split_dataset(
            self.dataset, self.param["validation_percentage"])
        other_train_dataset, other_val_dataset = split_dataset(
            self.other_dataset, self.param["validation_percentage"])

        # Create the dataloaders for sampling, as we use the binary case we additionally intialize dataloaders for the
        # other classes
        self.dataloader = DataLoader(train_dataset,
                                     batch_size=self.param["batch_size"],
                                     shuffle=True,
                                     collate_fn=collate_padding)
        self.validloader = DataLoader(validation_dataset,
                                      batch_size=self.param["batch_size"],
                                      collate_fn=collate_padding)
        batch_size = int(
            round(self.param["batch_size"] *
                  self.param["binary_sampling_percentage"]))
        self.other_dataloader = DataLoader(other_train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           collate_fn=collate_padding)
        self.other_validloader = DataLoader(other_val_dataset,
                                            batch_size=batch_size,
                                            shuffle=True,
                                            collate_fn=collate_padding)

        # Initialize the optimizer, Learning rate scheduler and the classification loss
        #self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.param["lr"])
        self.optimizer = torch.optim.RMSprop(self.model.parameters(),
                                             lr=self.param["lr"])
        self.scheduler = StepLR(self.optimizer, step_size=1, gamma=0.1)
        self.classification_loss = nn.BCELoss()

        # Initiliaze the correct loss. This is wrapped by the learner object which takes care
        # of all the similarity calculations
        if self.param["use_kcl"]:
            self.learner_clustering = Learner_Clustering(KCL())
        else:
            self.learner_clustering = Learner_Clustering(MCL())

        if self.param["cuda"] and torch.cuda.is_available():
            log("Using GPU")
            device = torch.device('cuda')
            self.model.to(device)
        else:
            log("Using CPU")
            self.param["cuda"] = False
            device = torch.device('cpu')

        # The patience value will be used to determine whethter we want to stop the training early
        # In each epoch with the validation error not decreasing patience will be decreased.
        # If it as a zero, the training will be terminated.
        patience = self.param["patience_early_stopping"]
        best_eval_loss = torch.tensor(float('inf'))

        for e in range(self.param["epochs"]):
            self.current_epoch = e
            log("Epoch:", e)

            # Start one training epoch and log the loss
            self.model.train()
            loss = self.train_epoch()
            loss = loss.to(torch.device('cpu'))
            log("Train loss:", loss.item())

            # Start one evaluation epoch and log the loss
            self.model.eval()
            eval_loss = self.eval_epoch()
            eval_loss = eval_loss.to(torch.device('cpu'))
            log("Eval Loss:", eval_loss.item())
            if eval_loss < best_eval_loss:
                best_eval_loss = eval_loss
                patience = self.param["patience_early_stopping"]
                if self.param["save_model_path"] is not None:
                    save_model(self.model, self.model_path)
            else:
                patience -= 1
            log("Current patience:", patience)
            if patience <= 0:
                break

            # We might not want to decay the learning rate during the whole training
            """if e < self.param["lr_decay_epochs"]:
                self.scheduler.step()"""
            if patience < 6:
                self.scheduler.step()

            self.training_records.append({
                'epoch': e,
                'model': self.model_name,
                'loss': loss.item(),
                'eval_loss': eval_loss.item()
            })
        if self.param["save_model_path"] is not None:
            print("Reloading best model")
            self.model.load_state_dict(torch.load(self.model_path))
        if self.only_supervised:
            log("Best Scores:", self.best_train_f1, "Train F1",
                self.best_eval_f1, "Validation F1")
            self.training_records.append({
                "train_f1": self.best_train_f1,
                "val_f1": self.best_eval_f1
            })
        if self.param["save_training_records"]:
            save_records(self.param['records_data_path'], self.filename,
                         self.training_records)
        return self.model

    def train_classifier(self, freeze=True, new_param=None):
        """ Trains a classification layer on top of the previously trained model.
        The parameters of the previous migh the freezed
        Arguments:
            freeze {bool} -- whether to freeze the previous parameters
        Returns:
            {Model} -- the final model
        """
        self.model_name = 'classifier'
        if new_param is not None:
            self.param = new_param
        if freeze:
            for param in self.model.parameters():
                param.requires_grad = False
        self.only_supervised = True
        self.model = Classification(self.model, self.param["output_dim"],
                                    self.param["classification_dim"],
                                    self.param["activation"])
        model = self.train()
        return model

    def train_epoch(self):
        """ Training method, if we want to only predict one class -> transformation of
        the problem to binary classification.
        The parameters for training are all stored in self.param
        """
        aggregated_targets = []
        aggregated_outputs = []
        loss = torch.zeros(1)
        for sentences, entities, attributes in self.dataloader:
            # After getting a batch from the other classes, simply append them to the current
            # sentences. The error calculation is robust enough.
            other_sentences, other_entities, other_attributes = self.getOtherBatch(
                train=True)
            sentences = self.fuze_sentences(sentences, other_sentences)
            entities = torch.cat([entities, other_entities])
            attributes = torch.cat([attributes, other_attributes])

            if self.param["train_entities"]:
                target = entities
            else:
                target = attributes
            if self.param["cuda"]:
                sentences = sentences.cuda()
                target = target.cuda()

            batch_loss, output = self.train_batch(sentences, target)
            loss += batch_loss
            aggregated_targets.append(target.to(torch.device('cpu')))
            aggregated_outputs.append(output.to(torch.device('cpu')))
        aggregated_targets = torch.cat(aggregated_targets)
        aggregated_outputs = torch.cat(aggregated_outputs)
        # Only if we are in the classification phase we can get metrics
        if self.only_supervised:
            metrics = calculate_metrics(aggregated_targets, aggregated_outputs)
            if metrics["f1"] > self.best_train_f1:
                self.best_train_f1 = metrics["f1"]
            log("Train", metrics)
            metrics.update({
                'epoch': self.current_epoch,
                'step': 'train',
                'model': self.model_name
            })
            self.training_records.append(metrics)

        return loss

    def train_batch(self, sentences, target):
        """ Training method for one data batch.
        """
        self.optimizer.zero_grad()

        output = self.model(sentences)

        # only_supervised means we are training the classifier after the ABAE model
        if not self.only_supervised:
            similarity = Class2Simi(target)
            loss = self.learner_clustering.calculate_criterion(
                output, similarity)
        else:
            # In the binary case we need to add one dimension
            loss = self.classification_loss(output, target[:, None])

        loss.backward()
        self.optimizer.step()
        return loss, output

    def eval_epoch(self):
        aggregated_targets = []
        aggregated_outputs = []
        loss = torch.tensor(0.0)
        for sentences, entities, attributes in self.validloader:
            # After getting a batch from the other classes, simply append them to the current
            # sentences. The error calculation is robust enough.
            other_sentences, other_entities, other_attributes = self.getOtherBatch(
                train=False)
            # Combining the sample depends on wheter we used padding or not (=tensor vs list output of dataloader)
            sentences = self.fuze_sentences(sentences, other_sentences)
            entities = torch.cat([entities, other_entities])
            attributes = torch.cat([attributes, other_attributes])

            if self.param["train_entities"]:
                target = entities
            else:
                target = attributes
            if self.param["cuda"]:
                sentences = sentences.cuda()
                target = target.cuda()

            batch_loss, output = self.eval_batch(sentences, target)
            loss += batch_loss
            aggregated_targets.append(target.to(torch.device('cpu')))
            aggregated_outputs.append(output.to(torch.device('cpu')))
        aggregated_targets = torch.cat(aggregated_targets)
        aggregated_outputs = torch.cat(aggregated_outputs)
        # Only if we are in the classification case we can get metrics
        if self.only_supervised:
            metrics = calculate_metrics(aggregated_targets, aggregated_outputs)
            if metrics["f1"] > self.best_eval_f1:
                self.best_eval_f1 = metrics["f1"]
            log("Eval", metrics)
            metrics.update({
                'epoch': self.current_epoch,
                'step': 'eval',
                'model': self.model_name
            })
            self.training_records.append(metrics)
        return loss

    def eval_batch(self, sentences, target):
        """ Evaluation of one batch. """
        output = self.model(sentences)

        if not self.only_supervised:
            similarity = Class2Simi(target)
            loss = self.learner_clustering.calculate_criterion(
                output, similarity)
        else:
            # In the binary case we need to add one dimension
            loss = self.classification_loss(output, target[:, None])
        return loss, output

    def fuze_sentences(self, sentences, other_sentences):
        """ Combines the sentences and other_sentences into one tensor, which has the maximum sentence
        length as second dimension 
        Arguments:
            sentences {tensor[batch1, max_sentence_length, embedding_dim]}
            other_sentences {tensor[batch2, max_other_sentence_length, embedding_dim]}
        Returns:
            {tensor[batch1+batch2, max(max_sentence_length, max_other_sentence_length), embedding_dim]}
        """
        sentences_max_length = sentences.size()[1]
        other_sentences_max_length = other_sentences.size()[1]
        # We need to check which tensor needs additional padding before we can concatenate them
        if sentences_max_length > other_sentences_max_length:
            new_size = other_sentences.size(
            )[0], sentences_max_length, other_sentences.size()[2]
            new_other = torch.zeros(new_size, device=other_sentences.device)
            new_other[:, :other_sentences_max_length, :] = other_sentences
            other_sentences = new_other
        elif sentences_max_length < other_sentences_max_length:
            new_size = sentences.size(
            )[0], other_sentences_max_length, sentences.size()[2]
            new_sentences = torch.zeros(new_size, device=sentences.device)
            new_sentences[:, :sentences_max_length, :] = sentences
            sentences = new_sentences
        return torch.cat([sentences, other_sentences])

    def getOtherBatch(self, train):
        """ This method basically gives you the next batch of samples from the other classes
        in the binary classificationi case. It is saving the state of a iterator and if 
        that iterator is done, gets a new one. The datasets must be non-empty. """
        # We already have an iterator and the iterator is for the same dataset.
        if self.use_train_iterator == train and self.other_iterator is not None:
            try:
                return self.other_iterator.__next__()
            except StopIteration:
                # If there are no elements left, we just create a new iterator
                pass
        if train:
            dataloader = self.other_dataloader
        else:
            dataloader = self.other_validloader
        self.use_train_iterator = train
        self.other_iterator = dataloader.__iter__()
        return self.other_iterator.__next__()