Beispiel #1
0
    def _perform_training(self,
                          data_csv_path,
                          model,
                          num_epochs,
                          trained_epochs=0,
                          plot_metrics=False):
        train_df, validation_df, _ = TrainingUtils.split_train_val_test(
            data_csv_path)
        dataloader = retrieve_train_dataloader(train_df,
                                               self.vocab,
                                               batch_size=self.batch_size)

        loss_func = nn.CrossEntropyLoss(ignore_index=self.vocab.stoi["<PAD>"])
        optimizer = optim.Adam(model.parameters(), lr=3e-4)
        vocab_size = len(self.vocab)

        train_losses, val_losses = [], []
        train_levenshteins, val_levenshteins = [], []

        for epoch in tqdm(range(trained_epochs + 1,
                                trained_epochs + num_epochs + 1),
                          position=0,
                          leave=True):
            print("\n Epoch: ", epoch)
            for image, captions in tqdm(dataloader):
                image, captions = image.to(device), captions.to(device)

                optimizer.zero_grad()

                outputs = model(image, 'train', captions)
                targets = captions[:, 1:]  # Remove <SOS> token

                loss = loss_func(outputs.view(-1, vocab_size),
                                 targets.reshape(-1))
                loss.backward()
                optimizer.step()

            model.eval()
            print(f'Training set evaluation:')
            train_loss, train_levenshtein = TrainingUtils.evaluate_model_on_dataset(
                model, train_df, self.sequence_length, self.batch_size,
                self.vocab, 'eval')
            train_losses.append(train_loss)
            train_levenshteins.append(train_levenshtein)

            print(f'Validation set evaluation:')
            val_loss, val_levenshtein = TrainingUtils.evaluate_model_on_dataset(
                model, validation_df, self.sequence_length, self.batch_size,
                self.vocab, 'eval')
            val_losses.append(val_loss)
            val_levenshteins.append(val_levenshtein)

            model.train()

            if plot_metrics and train_losses and train_levenshteins and val_losses and val_levenshteins:
                TrainingUtils.plot_metrics(train_losses, train_levenshteins,
                                           val_losses, val_levenshteins)

            self._save_model(model, epoch)
    def _perform_training(self,
                          data_csv_path,
                          model,
                          num_epochs,
                          trained_epochs=0,
                          plot_metrics=False):
        train_df, validation_df, _ = TrainingUtils.split_train_val_test(
            data_csv_path)
        dataloader = retrieve_train_dataloader(train_df,
                                               self.vocab,
                                               batch_size=self.batch_size)

        loss_func = nn.CrossEntropyLoss(ignore_index=self.vocab.stoi["<PAD>"])
        optimizer = optim.Adam(model.parameters(), self.LEARNING_RATE)

        train_losses, val_losses = [], []
        train_levenshteins, val_levenshteins = [], []
        for epoch in tqdm(range(trained_epochs + 1,
                                trained_epochs + num_epochs + 1),
                          position=0,
                          leave=True):
            print("\nEpoch: ", epoch)

            for images, captions in tqdm(dataloader):
                images, captions = images.to(device), captions.to(device)

                optimizer.zero_grad()

                targets = captions[:, 1:]
                preds = model(images, 'train', captions)
                loss = loss_func(preds.reshape(-1, self.vocab_size),
                                 targets.reshape(-1))

                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
                optimizer.step()

            model.eval()
            print(f'Training set evaluation:')
            train_loss, train_levenshtein = TrainingUtils.evaluate_model_on_dataset(
                model, train_df, None, self.batch_size, self.vocab, 'train')
            train_losses.append(train_loss)
            train_levenshteins.append(train_levenshtein)

            print(f'Validation set evaluation:')
            val_loss, val_levenshtein = TrainingUtils.evaluate_model_on_dataset(
                model, validation_df, None, self.batch_size, self.vocab,
                'train')
            val_losses.append(val_loss)
            val_levenshteins.append(val_levenshtein)
            model.train()

            if plot_metrics and train_losses and train_levenshteins and val_losses and val_levenshteins:
                TrainingUtils.plot_metrics(train_losses, train_levenshteins,
                                           val_losses, val_levenshteins)

            self._save_model(model, epoch)