Example #1
0
def detect(vae, dataset, sample_size, device):
    """
    Compute the anomaly score for a single review, 
    see algorithm 4 in https://pdfs.semanticscholar.org/0611/46b1d7938d7a8dae70e3531a00fceb3c78e8.pdf
    Args
        vae: trained vae model
        data: tensor of size (1, )
        sample_size: number of z~N(\mu,\sigma) to sample
        device: 
    Returns
        Anomaly score: List[List[str]] where reviews are sorted by reconstruction loss

    """
    data_loader = utils.batch_loader(dataset=dataset, batch_size=1, shuffle=True)
    sents_recon_loss = []
    for batch_idx, batch_data in data_loader:
        # sents is tensor of size (sents_len, batch_size)
        sent = vae.vocab.vocab_entry.to_input_tensor(batch_data, device=device)
        wun = torch.ones([1, sample_size], device=device)
        sents = torch.mm(sent.float(), wun).long()
        sents_len = torch.tensor([len(sent)]*sample_size, device=device)
        sents_hat, mu, log_sd = vae.forward(sents, sents_len)
        loss = loss_function(sents_hat, sents, mu, log_sd, vae.padding_idx, 1)
        sents_recon_loss.append((batch_data, loss.item()/len(sent)))
        if batch_idx % 100 == 0:
            print('Progress: [{}/{} ({:.0f}%)]'.format(batch_idx, len(dataset), 100*batch_idx/len(dataset)))

    sents_recon_loss.sort(key=lambda x:x[1], reverse=True)
    return sents_recon_loss
Example #2
0
    def train(self, num_epochs: int):
        """
        Training loop for model.
        Implements stochastic gradient descent with num_epochs passes over the train dataset.
        Returns:
            train_history: a dictionary containing loss and accuracy over all training steps
            val_history: a dictionary containing loss and accuracy over a selected set of steps
        """
        # Utility variables
        num_batches_per_epoch = self.X_train.shape[0] // self.batch_size
        num_steps_per_val = num_batches_per_epoch // 5
        # A tracking value of loss over all training steps
        train_history = dict(loss={}, accuracy={})
        val_history = dict(loss={}, accuracy={})

        global_step = 0
        dummy_counter = 0
        lowest_loss = np.inf
        # value to track the progress
        last_epoch = -1
        for epoch in range(num_epochs):
            train_loader = utils.batch_loader(self.X_train,
                                              self.Y_train,
                                              self.batch_size,
                                              shuffle=self.shuffle_dataset)
            for X_batch, Y_batch in iter(train_loader):
                loss = self.train_step(X_batch, Y_batch)
                # Track training loss continuously
                train_history["loss"][global_step] = loss

                # Track validation loss / accuracy every time we progress 20% through the dataset
                if global_step % num_steps_per_val == 0:
                    val_loss, accuracy_train, accuracy_val = self.validation_step(
                    )
                    train_history["accuracy"][global_step] = accuracy_train
                    val_history["loss"][global_step] = val_loss
                    val_history["accuracy"][global_step] = accuracy_val

                    # Keeping track of the progress of the training
                    if (last_epoch != epoch):
                        print(epoch)
                        last_epoch = epoch

                    # You can access the validation loss in val_history["loss"]
                    if (self.early_stopping and
                        (lowest_loss < val_history["loss"][global_step])):
                        dummy_counter += 1
                        if (dummy_counter >= 10):
                            print(
                                f'Early stopping kicked in at:{epoch}, with loss value: {val_history["loss"][global_step]}, lowest value = {lowest_loss}'
                            )
                            return train_history, val_history
                    else:
                        dummy_counter = 0
                        lowest_loss = val_history["loss"][global_step]

                global_step += 1

        return train_history, val_history
Example #3
0
    def train(
            self,
            num_epochs: int):
        """
        Training loop for model.
        Implements stochastic gradient descent with num_epochs passes over the train dataset.
        Returns:
            train_history: a dictionary containing loss and accuracy over all training steps
            val_history: a dictionary containing loss and accuracy over a selected set of steps
        """
        # Utility variables
        num_batches_per_epoch = self.X_train.shape[0] // self.batch_size
        num_steps_per_val = num_batches_per_epoch // 5
        # A tracking value of loss over all training steps
        train_history = dict(
            loss={},
            accuracy={}
        )
        val_history = dict(
            loss={},
            accuracy={}
        )

        global_step = 0

        counter = 0
        lowest_val_loss = 1
        for epoch in range(num_epochs):
            train_loader = utils.batch_loader(
                self.X_train, self.Y_train, self.batch_size, shuffle=self.shuffle_dataset)
            for X_batch, Y_batch in iter(train_loader):
                loss = self.train_step(X_batch, Y_batch)
                # Track training loss continuously
                train_history["loss"][global_step] = loss

                # Track validation loss / accuracy every time we progress 20% through the dataset
                if global_step % num_steps_per_val == 0:
                    val_loss, accuracy_train, accuracy_val = self.validation_step()
                    train_history["accuracy"][global_step] = accuracy_train
                    val_history["loss"][global_step] = val_loss
                    val_history["accuracy"][global_step] = accuracy_val

                    # TODO (Task 2d): Implement early stopping here.
                    # You can access the validation loss in val_history["loss"]
                    
                    if (val_loss>lowest_val_loss):
                        counter = counter + 1
                    else:
                        lowest_val_loss = val_loss
                        counter = 0
                    if (counter == 10):
                        break 
                global_step += 1
            if (counter == 10):
                        print("Stopped at epoch:", epoch)
                        break 
        return train_history, val_history
Example #4
0
    def train(self, num_epochs: int):
        """
        Training loop for model.
        Implements stochastic gradient descent with num_epochs passes over the train dataset.
        Returns:
            train_history: a dictionary containing loss and accuracy over all training steps
            val_history: a dictionary containing loss and accuracy over a selected set of steps
        """
        # Utility variables
        num_batches_per_epoch = self.X_train.shape[0] // self.batch_size
        num_steps_per_val = num_batches_per_epoch // 5
        # A tracking value of loss over all training steps
        train_history = dict(loss={}, accuracy={})
        val_history = dict(loss={}, accuracy={})

        global_step = 0

        early_stopping = False
        previous_validation_loss = float('inf')
        validation_steps = 0

        for epoch in range(num_epochs):
            if (early_stopping is True):
                break
            train_loader = utils.batch_loader(self.X_train,
                                              self.Y_train,
                                              self.batch_size,
                                              shuffle=self.shuffle_dataset)
            for X_batch, Y_batch in iter(train_loader):
                loss = self.train_step(X_batch, Y_batch)
                # Track training loss continuously
                train_history["loss"][global_step] = loss

                # Track validation loss / accuracy every time we progress 20% through the dataset
                if global_step % num_steps_per_val == 0:
                    val_loss, accuracy_train, accuracy_val = self.validation_step(
                    )
                    train_history["accuracy"][global_step] = accuracy_train
                    val_history["loss"][global_step] = val_loss
                    val_history["accuracy"][global_step] = accuracy_val

                    validation_steps += 1

                    if (previous_validation_loss >
                            val_history["loss"][global_step]):
                        previous_validation_loss = val_history["loss"][
                            global_step]
                        validation_steps = 0

                    if (validation_steps == 10):
                        print("Early stopping on epoch: ", epoch)
                        early_stopping = True
                        break

                global_step += 1
        return train_history, val_history
Example #5
0
    def train(self, num_epochs: int):
        """
        Training loop for model.
        Implements stochastic gradient descent with num_epochs passes over the train dataset.
        Returns:
            train_history: a dictionary containing loss and accuracy over all training steps
            val_history: a dictionary containing loss and accuracy over a selected set of steps
        """
        # Utility variables
        num_batches_per_epoch = self.X_train.shape[0] // self.batch_size
        num_steps_per_val = num_batches_per_epoch // 5
        # A tracking value of loss over all training steps
        train_history = dict(loss={}, accuracy={})
        val_history = dict(loss={}, accuracy={})

        global_step = 0
        # Dummy variable, used for Early Stopping.
        local_step = 0
        val_min = 1000000
        for epoch in range(num_epochs):
            train_loader = utils.batch_loader(self.X_train,
                                              self.Y_train,
                                              self.batch_size,
                                              shuffle=self.shuffle_dataset)
            for X_batch, Y_batch in iter(train_loader):
                loss = self.train_step(X_batch, Y_batch)
                # Track training loss continuously
                train_history["loss"][global_step] = loss

                # Track validation loss / accuracy every time we progress 20% through the dataset
                if global_step % num_steps_per_val == 0:
                    val_loss, accuracy_train, accuracy_val = self.validation_step(
                    )
                    train_history["accuracy"][global_step] = accuracy_train
                    val_history["loss"][global_step] = val_loss
                    val_history["accuracy"][global_step] = accuracy_val

                    # TODO (Task 2d): Implement early stopping here.
                    # You can access the validation loss in val_history["loss"]

                    if local_step < 10:
                        if val_history["loss"][global_step] > val_min:
                            local_step += 1
                        else:
                            val_min = val_history["loss"][global_step]
                            local_step = 0
                    else:
                        # val_history["loss"][global_step] = val_min
                        print(f"Early stopping after {epoch} epochs")
                        return train_history, val_history

                global_step += 1
        return train_history, val_history
Example #6
0
    def train(
            self,
            num_epochs: int):
        """
        Training loop for model.
        Implements stochastic gradient descent with num_epochs passes over the train dataset.
        Returns:
            train_history: a dictionary containing loss and accuracy over all training steps
            val_history: a dictionary containing loss and accuracy over a selected set of steps
        """
        # Utility variables
        num_batches_per_epoch = self.X_train.shape[0] // self.batch_size
        num_steps_per_val = num_batches_per_epoch // 5
        # A tracking value of loss over all training steps
        train_history = dict(
            loss={},
            accuracy={}
        )
        val_history = dict(
            loss={},
            accuracy={}
        )

        global_step = 0
        best_loss = 1
        count = 0
        for epoch in tqdm(range(num_epochs)):
            train_loader = utils.batch_loader(
                self.X_train, self.Y_train, self.batch_size, shuffle=self.shuffle_dataset)
            for X_batch, Y_batch in iter(train_loader):
                loss = self.train_step(X_batch, Y_batch)
                # Track training loss continuously
                train_history["loss"][global_step] = loss

                # Track validation loss / accuracy every time we progress 20% through the dataset
                if global_step % num_steps_per_val == 0:
                    val_loss, accuracy_train, accuracy_val = self.validation_step()
                    train_history["accuracy"][global_step] = accuracy_train
                    val_history["loss"][global_step] = val_loss
                    val_history["accuracy"][global_step] = accuracy_val

                    # Early stopping
                    if val_loss < best_loss:
                        best_loss = val_loss
                        count = 0
                    else:
                        count +=1
                        if count >= 50 and self.early_stop:
                            print("Early stopping at ", global_step, "epoch ", epoch)
                            return train_history, val_history

                global_step += 1
        return train_history, val_history
Example #7
0
    def train(self, num_epochs: int):
        """
        Training loop for model.
        Implements stochastic gradient descent with num_epochs passes over the train dataset.
        Returns:
            train_history: a dictionary containing loss and accuracy over all training steps
            val_history: a dictionary containing loss and accuracy over a selected set of steps
        """
        # Utility variables
        num_batches_per_epoch = self.X_train.shape[0] // self.batch_size
        num_steps_per_val = num_batches_per_epoch // 5
        # A tracking value of loss over all training steps
        train_history = dict(loss={}, accuracy={})
        val_history = dict(loss={}, accuracy={})

        global_step = 0
        counter = 0
        prev_best_loss = np.inf
        for epoch in range(num_epochs):
            train_loader = utils.batch_loader(self.X_train,
                                              self.Y_train,
                                              self.batch_size,
                                              shuffle=self.shuffle_dataset)
            for X_batch, Y_batch in iter(train_loader):
                loss = self.train_step(X_batch, Y_batch)
                # Track training loss continuously
                train_history["loss"][global_step] = loss

                # Track validation loss / accuracy every time we progress 20% through the dataset
                if global_step % num_steps_per_val == 0:
                    val_loss, accuracy_train, accuracy_val = self.validation_step(
                    )
                    train_history["accuracy"][global_step] = accuracy_train
                    val_history["loss"][global_step] = val_loss
                    val_history["accuracy"][global_step] = accuracy_val

                    if self.stop_at_count:
                        # No improvement
                        if prev_best_loss < val_loss:
                            counter += 1
                        else:
                            counter = 0
                            prev_best_loss = val_loss

                        # We have reached max number of passes trough dataset without improvement
                        if counter == self.stop_at_count:
                            print(
                                f"We went trough {epoch} of {num_epochs} before stopping"
                            )
                            return train_history, val_history

                global_step += 1
        return train_history, val_history
Example #8
0
    def train(self, num_epochs: int):
        """
        Training loop for model.
        Implements stochastic gradient descent with num_epochs passes over the train dataset.
        Returns:
            train_history: a dictionary containing loss and accuracy over all training steps
            val_history: a dictionary containing loss and accuracy over a selected set of steps
        """
        # Utility variables
        num_batches_per_epoch = self.X_train.shape[0] // self.batch_size
        num_steps_per_val = num_batches_per_epoch // 5
        # A tracking value of loss over all training steps
        train_history = dict(loss={}, accuracy={})
        val_history = dict(loss={}, accuracy={})

        global_step = 0
        lowest_val_loss = np.inf
        early_stop_counter = 0

        for epoch in range(num_epochs):
            train_loader = utils.batch_loader(self.X_train,
                                              self.Y_train,
                                              self.batch_size,
                                              shuffle=self.shuffle_dataset)
            for X_batch, Y_batch in iter(train_loader):
                loss = self.train_step(X_batch, Y_batch)
                # Track training loss continuously
                train_history["loss"][global_step] = loss

                # Track validation loss / accuracy every time we progress 20% through the dataset
                if global_step % num_steps_per_val == 0:
                    val_loss, accuracy_train, accuracy_val = self.validation_step(
                    )
                    train_history["accuracy"][global_step] = accuracy_train
                    val_history["loss"][global_step] = val_loss
                    val_history["accuracy"][global_step] = accuracy_val

                    # Task 2d - Early Stopping implemented here with
                    if val_loss < lowest_val_loss:
                        lowest_val_loss = val_loss
                        early_stop_counter = 0

                    else:
                        early_stop_counter += 1

                    if early_stop_counter >= 10:
                        print("early stopping implemented in epoch: ", epoch)
                        #return train_history, val_history   #remove comment to have early_stopping

                    # You can access the validation loss in val_history["loss"]
                global_step += 1
        return train_history, val_history
Example #9
0
    def train(self, num_epochs: int):
        """
        Training loop for model.
        Implements stochastic gradient descent with num_epochs passes over the train dataset.
        Returns:
            train_history: a dictionary containing loss and accuracy over all training steps
            val_history: a dictionary containing loss and accuracy over a selected set of steps
        """
        # Utility variables
        num_batches_per_epoch = self.X_train.shape[0] // self.batch_size
        num_steps_per_val = num_batches_per_epoch // 5
        # A tracking value of loss over all training steps
        train_history = dict(loss={}, accuracy={})
        val_history = dict(loss={}, accuracy={})
        min_val_loss = 1
        es_tracker = 0
        global_step = 0
        for epoch in range(num_epochs):
            train_loader = utils.batch_loader(self.X_train,
                                              self.Y_train,
                                              self.batch_size,
                                              shuffle=self.shuffle_dataset)
            for X_batch, Y_batch in iter(train_loader):
                loss = self.train_step(X_batch, Y_batch)
                # Track training loss continuously
                train_history["loss"][global_step] = loss

                # Track validation loss / accuracy every time we progress 20% through the dataset
                if global_step % num_steps_per_val == 0:
                    val_loss, accuracy_train, accuracy_val = self.validation_step(
                    )
                    train_history["accuracy"][global_step] = accuracy_train
                    val_history["loss"][global_step] = val_loss
                    val_history["accuracy"][global_step] = accuracy_val

                    # TODO (Task 2d): Implement early stopping here.
                    # You can access the validation loss in val_history["loss"]
                    # need to find average of 10 previous
                    if val_loss < min_val_loss:
                        min_val_loss = val_loss
                        es_tracker = 0
                    else:
                        es_tracker += 1

                    if es_tracker > 9:
                        # Early stop triggered
                        print('ES Triggered, Step: ', global_step, epoch)
                        break
                global_step += 1
            if es_tracker > 9:
                break
        return train_history, val_history
Example #10
0
    def train(self, num_epochs: int):
        """
        Training loop for model.
        Implements stochastic gradient descent with num_epochs passes over the train dataset.
        Returns:
            train_history: a dictionary containing loss and accuracy over all training steps
            val_history: a dictionary containing loss and accuracy over a selected set of steps
        """
        # Utility variables
        num_batches_per_epoch = self.X_train.shape[0] // self.batch_size
        num_steps_per_val = num_batches_per_epoch // 5
        # A tracking value of loss over all training steps
        train_history = dict(loss={}, accuracy={})
        val_history = dict(loss={}, accuracy={})

        do_early_stopping = True
        global_step = 0
        early_stop_counter = 0
        lowest_val_loss = sys.maxsize

        for epoch in range(num_epochs):
            train_loader = utils.batch_loader(self.X_train,
                                              self.Y_train,
                                              self.batch_size,
                                              shuffle=self.shuffle_dataset)
            for X_batch, Y_batch in iter(train_loader):
                loss = self.train_step(X_batch, Y_batch)
                # Track training loss continuously
                train_history["loss"][global_step] = loss

                # Track validation loss / accuracy every time we progress 20% through the dataset
                if global_step % num_steps_per_val == 0:
                    val_loss, accuracy_train, accuracy_val = self.validation_step(
                    )
                    train_history["accuracy"][global_step] = accuracy_train
                    val_history["loss"][global_step] = val_loss
                    val_history["accuracy"][global_step] = accuracy_val
                    # TODO: Implement early stopping (copy from last assignment)

                    if do_early_stopping:
                        if val_loss < lowest_val_loss:
                            lowest_val_loss = val_loss
                            early_stop_counter = 0
                        else:
                            early_stop_counter += 1

                        if early_stop_counter >= 50:
                            print("Early stop at epoch ", epoch,
                                  " and global step ", global_step)
                            return train_history, val_history
                global_step += 1
        return train_history, val_history
Example #11
0
    def train(self, num_epochs: int):
        """
        Training loop for model.
        Implements stochastic gradient descent with num_epochs passes over the train dataset.
        Returns:
            train_history: a dictionary containing loss and accuracy over all training steps
            val_history: a dictionary containing loss and accuracy over a selected set of steps
        """
        # Utility variables
        num_batches_per_epoch = self.X_train.shape[0] // self.batch_size
        num_steps_per_val = num_batches_per_epoch // 5
        # A tracking value of loss over all training steps
        train_history = dict(loss={}, accuracy={})
        val_history = dict(loss={}, accuracy={})

        global_step = 0
        stop = False
        best = 1000000000  #Some great number representing infinity
        best_arr = [
        ]  #This will hold the best values for each epoch (validation loss)

        for epoch in tqdm(range(num_epochs)):
            train_loader = utils.batch_loader(self.X_train,
                                              self.Y_train,
                                              self.batch_size,
                                              shuffle=self.shuffle_dataset)
            for X_batch, Y_batch in iter(train_loader):
                loss = self.train_step(X_batch, Y_batch)
                # Track training loss continuously
                train_history["loss"][global_step] = loss

                # Track validation loss / accuracy every time we progress 20% through the dataset
                if global_step % num_steps_per_val == 0:

                    val_loss, accuracy_train, accuracy_val = self.validation_step(
                    )
                    train_history["accuracy"][global_step] = accuracy_train
                    val_history["loss"][global_step] = val_loss
                    val_history["accuracy"][global_step] = accuracy_val
                    best_this_epoch = np.min(val_history["loss"][global_step])
                    if best_this_epoch < best:
                        best = best_this_epoch
                    best_arr.append(best_this_epoch)
                    # TODO (Task 2d): Implement early stopping here.
                    # You can access the validation loss in val_history["loss"]
                    if (best not in best_arr[-10:] and len(best_arr) >= 10):
                        return train_history, val_history

                global_step += 1
        return train_history, val_history
Example #12
0
    def train(self, num_epochs: int):
        """
        Training loop for model.
        Implements stochastic gradient descent with num_epochs passes over the train dataset.
        Returns:
            train_history: a dictionary containing loss and accuracy over all training steps
            val_history: a dictionary containing loss and accuracy over a selected set of steps
        """
        # Utility variables
        num_batches_per_epoch = self.X_train.shape[0] // self.batch_size
        num_steps_per_val = num_batches_per_epoch // 5
        # A tracking value of loss over all training steps
        train_history = dict(loss={}, accuracy={})
        val_history = dict(loss={}, accuracy={})

        # variables for early stopping
        counter = 0
        times = 50
        lowest_val_loss = float('inf')

        global_step = 0
        for epoch in range(num_epochs):
            train_loader = utils.batch_loader(self.X_train,
                                              self.Y_train,
                                              self.batch_size,
                                              shuffle=self.shuffle_dataset)
            for X_batch, Y_batch in iter(train_loader):
                loss = self.train_step(X_batch, Y_batch)
                # Track training loss continuously
                train_history["loss"][global_step] = loss

                # Track validation loss / accuracy every time we progress 20% through the dataset
                if global_step % num_steps_per_val == 0:
                    val_loss, accuracy_train, accuracy_val = self.validation_step(
                    )
                    train_history["accuracy"][global_step] = accuracy_train
                    val_history["loss"][global_step] = val_loss
                    val_history["accuracy"][global_step] = accuracy_val
                    # TODO: Implement early stopping (copy from last assignment)
                    counter += 1
                    if (val_history["loss"][global_step] < lowest_val_loss):
                        lowest_val_loss = val_history["loss"][global_step]
                        counter = 0
                    if (counter > times):
                        print(f'Training stopped after {epoch} epochs')
                        return train_history, val_history
                global_step += 1
        return train_history, val_history
Example #13
0
    def train(self, num_epochs: int):
        """
        Training loop for model.
        Implements stochastic gradient descent with num_epochs passes over the train dataset.
        Returns:
            train_history: a dictionary containing loss and accuracy over all training steps
            val_history: a dictionary containing loss and accuracy over a selected set of steps
        """
        # Utility variables
        num_batches_per_epoch = self.X_train.shape[0] // self.batch_size
        num_steps_per_val = num_batches_per_epoch // 5
        # A tracking value of loss over all training steps
        train_history = dict(loss={}, accuracy={})
        val_history = dict(loss={}, accuracy={})

        global_step = 0
        for epoch in range(num_epochs):
            train_loader = utils.batch_loader(self.X_train,
                                              self.Y_train,
                                              self.batch_size,
                                              shuffle=self.shuffle_dataset)
            for X_batch, Y_batch in iter(train_loader):
                loss = self.train_step(X_batch, Y_batch)
                # Track training loss continuously
                train_history["loss"][global_step] = loss

                # Track validation loss / accuracy every time we progress 20% through the dataset
                if global_step % num_steps_per_val == 0:
                    val_loss, accuracy_train, accuracy_val = self.validation_step(
                    )
                    train_history["accuracy"][global_step] = accuracy_train
                    val_history["loss"][global_step] = val_loss
                    val_history["accuracy"][global_step] = accuracy_val
                    # TODO: Implement early stopping (copy from last assignment)
                    # You can access the validation loss in val_history["loss"]
                    # min_step: the training step when the loss was minimal
                    if len(val_history["loss"]) > 50:
                        min_step = min(val_history["loss"].items(),
                                       key=lambda x: x[1])[0]
                        # if min_step is not in the last 10 loss result, then stop the training
                        if min_step not in sorted(val_history['loss'].keys(),
                                                  reverse=True)[:50]:
                            print(
                                "Early stopping after {} epoch".format(epoch))
                            return train_history, val_history

                global_step += 1
        return train_history, val_history
    def train(
            self,
            num_epochs: int):
        """
        Training loop for model.
        Implements stochastic gradient descent with num_epochs passes over the train dataset.
        Returns:
            train_history: a dictionary containing loss and accuracy over all training steps
            val_history: a dictionary containing loss and accuracy over a selected set of steps
        """
        # Utility variables
        num_batches_per_epoch = self.X_train.shape[0] // self.batch_size
        num_steps_per_val = num_batches_per_epoch // 5
        # A tracking value of loss over all training steps
        train_history = dict(
            loss={},
            accuracy={}
        )
        val_history = dict(
            loss={},
            accuracy={}
        )

        global_step = 0
        for epoch in range(num_epochs):
            train_loader = utils.batch_loader(
                self.X_train, self.Y_train, self.batch_size, shuffle=self.shuffle_dataset)
            for X_batch, Y_batch in iter(train_loader):
                loss = self.train_step(X_batch, Y_batch)
                # Track training loss continuously
                train_history["loss"][global_step] = loss

                # Track validation loss / accuracy every time we progress 20% through the dataset
                if global_step % num_steps_per_val == 0:
                    val_loss, accuracy_train, accuracy_val = self.validation_step()
                    train_history["accuracy"][global_step] = accuracy_train
                    val_history["loss"][global_step] = val_loss
                    val_history["accuracy"][global_step] = accuracy_val
                    
                    # early stopping: stop training when loss didn't improved in last 60 steps
                    if global_step > 60 and np.argmin(list(val_history["loss"].values())[-61:]) == 0:
                        print(f'Early stopping after {epoch} epochs')
                        print(f'Final Accuracy: {val_history["accuracy"][global_step]}')
                        print(f'Final Training Loss: {train_history["loss"][global_step]}')
                        return train_history, val_history
                global_step += 1
        return train_history, val_history
Example #15
0
def train(epoch, vae, optimizer, dataset, batch_size, device, keep_rate, a, b):
    KLweight = utils.KLweight(a, b)
    vae.train()
    total_loss = 0
    data_loader = utils.batch_loader(dataset=dataset, batch_size=batch_size, shuffle=True)
    dataset_size = len(dataset)
    #batch_data is List[List[str]]
    for batch_idx, batch_data in data_loader:
        # sents is tensor of size (sents_len, batch_size)
        sents = vae.vocab.vocab_entry.to_input_tensor(batch_data, device=device)
        batch_data_dropout = utils.word_dropout(batch_data, keep_rate, '<unk>')
        sents_dropout = vae.vocab.vocab_entry.to_input_tensor(batch_data_dropout, device=device)
        sents_len = torch.tensor([len(sent) for sent in batch_data], device=device)
        optimizer.zero_grad()
        sents_hat, mu, log_sd = vae.forward(sents_dropout, sents_len)
        x = epoch+min(dataset_size, (batch_idx+1)*batch_size)/dataset_size
        loss = loss_function(sents_hat, sents, mu, log_sd, vae.padding_idx, KLweight.weight(x))
        loss.backward()
        total_loss += loss.item()
        optimizer.step()

        print('Train Epoch: {} [{}/{} ({:.0f}%)]\t Loss: {:.6f}'.format(epoch, min(dataset_size, (batch_idx+1)*batch_size), dataset_size, 100.*min(1, (batch_idx+1)*batch_size/dataset_size), loss.item()))