Beispiel #1
0
    def _test_restore_with_val_losses(self, checkpointer, val_losses,
                                      best_epoch):
        generator = some_data_generator(BestModelRestoreTest.batch_size)

        best_epoch_weights = None
        checkpointer.set_params({'epochs': len(val_losses), 'steps': 1})
        checkpointer.set_model(self.model)
        checkpointer.on_train_begin({})
        for epoch, val_loss in enumerate(val_losses, 1):
            checkpointer.on_epoch_begin(epoch, {})
            checkpointer.on_batch_begin(1, {})
            loss = self._update_model(generator)
            checkpointer.on_batch_end(1, {
                'batch': 1,
                'size': BestModelRestoreTest.batch_size,
                'loss': loss
            })
            checkpointer.on_epoch_end(epoch, {
                'epoch': epoch,
                'loss': loss,
                'val_loss': val_loss
            })
            if epoch == best_epoch:
                best_epoch_weights = torch_to_numpy(
                    self.model.get_weight_copies())
        checkpointer.on_train_end({})

        final_weights = torch_to_numpy(self.model.get_weight_copies())
        self.assertEqual(best_epoch_weights, final_weights)
def acc(y_pred_tensor, y_true_tensor):
    y_pred = torch_to_numpy(y_pred_tensor)
    y_true = torch_to_numpy(y_true_tensor)

    predictions = list()
    for yp, yt in zip(y_pred, y_true):
        predictions.append(np.argmax(yp) == yt)

    return y_pred_tensor.data.new([np.mean(predictions) * 100])
Beispiel #3
0
    def _test_restore_best(self, val_losses):
        final_weights = torch_to_numpy(self.model.get_weight_copies())

        epoch = val_losses.index(min(val_losses)) + 1
        best_epoch_filename = self.checkpoint_filename.format(epoch=epoch)
        self.model.load_weights(best_epoch_filename)

        best_weights = torch_to_numpy(self.model.get_weight_copies())

        self.assertEqual(best_weights, final_weights)
def acc(y_pred_tensor, y_true_tensor):
    y_pred_tensor = y_pred_tensor.view(y_pred_tensor.shape[0] * y_pred_tensor.shape[1], -1)
    y_true_tensor = y_true_tensor.view(y_true_tensor.shape[0] * y_true_tensor.shape[1])
    y_pred = torch_to_numpy(y_pred_tensor)
    y_true = torch_to_numpy(y_true_tensor)

    predictions = list()
    for yp, yt in zip(y_pred, y_true):
        if yt != 0:
            predictions.append(np.argmax(yp) == yt)

    return y_pred_tensor.data.new([np.mean(predictions) * 100])
def f1(y_pred_tensor, y_true_tensor):
    y_pred_tensor = y_pred_tensor.view(y_pred_tensor.shape[0] * y_pred_tensor.shape[1], -1)
    y_true_tensor = y_true_tensor.view(y_true_tensor.shape[0] * y_true_tensor.shape[1])
    y_pred = torch_to_numpy(y_pred_tensor)
    y_true = torch_to_numpy(y_true_tensor)
    predictions = list()
    truths = list()
    for yp, yt in zip(y_pred, y_true):
        if yt != 0:
            predictions.append(np.argmax(yp))
            truths.append(yt)
    return torch.FloatTensor([f1_score(truths, predictions, average='macro')])
Beispiel #6
0
    def predict_generator(self, generator, *, steps=None):
        """
        Returns the predictions of the network given batches of samples ``x``,
        where the tensors are converted into Numpy arrays.

        generator: Generator-like object for the dataset. The generator must
            yield a batch of samples. See the ``fit_generator()`` method for
            details on the types of generators supported.
        steps (int, optional): Number of iterations done on
            ``generator``. (Defaults the number of steps needed to see the
            entire dataset)

        Returns:
            List of the predictions of each batch with tensors converted into
            Numpy arrays.
        """
        self.model.eval()
        if steps is None and hasattr(generator, '__len__'):
            steps = len(generator)
        pred_y = []
        with torch.no_grad():
            for _, x in _get_step_iterator(steps, generator):
                x = self._process_input(x)
                pred_y.append(torch_to_numpy(self.model(x)))
        return pred_y
Beispiel #7
0
    def _test_checkpointer(self, checkpointer, lr_scheduler):
        scheduler_states = {}
        generator = some_data_generator(OptimizerCheckpointTest.batch_size)

        checkpointer.set_params({
            'epochs': OptimizerCheckpointTest.epochs,
            'steps': 1
        })
        checkpointer.set_model(self.model)
        checkpointer.on_train_begin({})
        for epoch in range(1, OptimizerCheckpointTest.epochs + 1):
            checkpointer.on_epoch_begin(epoch, {})
            checkpointer.on_batch_begin(1, {})
            loss = self._update_model(generator)
            checkpointer.on_batch_end(
                1, {
                    'batch': 1,
                    'size': OptimizerCheckpointTest.batch_size,
                    'loss': loss
                })
            checkpointer.on_epoch_end(epoch, {
                'epoch': epoch,
                'loss': loss,
                'val_loss': 1
            })
            filename = self.checkpoint_filename.format(epoch=epoch)
            self.assertTrue(os.path.isfile(filename))
            scheduler_states[epoch] = torch_to_numpy(
                lr_scheduler.scheduler.state_dict(), copy=True)
        checkpointer.on_train_end({})

        self._test_checkpoint(scheduler_states, lr_scheduler)
Beispiel #8
0
    def _test_checkpoint(self, optimizer_states):
        for epoch, epoch_optimizer_state in optimizer_states.items():
            filename = self.checkpoint_filename.format(epoch=epoch)
            self.model.load_optimizer_state(filename)
            saved_optimizer_state = torch_to_numpy(self.optimizer.state_dict())

            self.assertEqual(epoch_optimizer_state, saved_optimizer_state)
Beispiel #9
0
    def _test_checkpoint(self, scheduler_states, lr_scheduler):
        for epoch, epoch_scheduler_state in scheduler_states.items():
            filename = self.checkpoint_filename.format(epoch=epoch)
            lr_scheduler.load_state(filename)
            saved_scheduler_state = torch_to_numpy(
                lr_scheduler.scheduler.state_dict())

            self.assertEqual(epoch_scheduler_state, saved_scheduler_state)
Beispiel #10
0
    def _compute_loss_and_metrics(self, x, y, return_loss_tensor=False, return_pred=False):
        x, y = self._process_input(x, y)
        pred_y = self.model(x)
        loss = self.loss_function(pred_y, y)
        if not return_loss_tensor:
            loss = float(loss)
        with torch.no_grad():
            metrics = self._compute_metrics(pred_y, y)

        pred_y = torch_to_numpy(pred_y) if return_pred else None
        return loss, metrics, pred_y
Beispiel #11
0
 def _loss_and_metrics_tensors_to_numpy(self,
                                        loss_tensor,
                                        metrics_tensors,
                                        pred_y=None):
     loss = float(loss_tensor)
     metrics = np.array(
         [float(metric_tensor) for metric_tensor in metrics_tensors])
     ret = (loss, metrics)
     if pred_y is not None:
         pred_y = torch_to_numpy(pred_y)
         ret = ret + (pred_y, )
     return ret
Beispiel #12
0
def predict_embeddings(model, loader):
    model.model.eval()
    predicted_embeddings = {}
    for x, y in loader:
        x = tensors_to_variables(x)
        embeddings = torch_to_numpy(model.model(x))
        for label, embedding in zip(y, embeddings):
            if label in predicted_embeddings:
                predicted_embeddings[label].append(embedding)
            else:
                predicted_embeddings[label] = [embedding]

    return predicted_embeddings
Beispiel #13
0
    def predict_on_batch(self, x):
        """
        Returns the predictions of the network given a batch ``x``, where the
        tensors are converted into Numpy arrays.

        Args:
            x (Union[Tensor, np.ndarray]): Batch for which to predict.

        Returns:
            The predictions with tensors converted into Numpy arrays.
        """
        self.model.eval()
        with torch.no_grad():
            x = self._process_input(x)
            return torch_to_numpy(self.model(x))
def predict_mean_embeddings(model, loader):
    model.model.eval()
    predicted_embeddings = {}
    for x, y in loader:
        x = tensors_to_variables(x)
        embeddings = torch_to_numpy(model.model(x))
        for label, embedding in zip(y, embeddings):
            if label in predicted_embeddings:
                predicted_embeddings[label].append(embedding)
            else:
                predicted_embeddings[label] = [embedding]

    mean_pred_embeddings = {}
    for label in predicted_embeddings:
        mean_pred_embeddings[label] = np.mean(np.array(
            predicted_embeddings[label]),
                                              axis=0)
    return mean_pred_embeddings
Beispiel #15
0
def predict_OOV(model, char_to_idx, OOV_path, filename):
    OOVs = load_vocab(OOV_path)

    vectorizer = Vectorizer(char_to_idx)
    examples = [(vectorizer.vectorize_sequence(word), word) for word in OOVs]
    loader = DataLoader(examples,
                        collate_fn=collate_x,
                        use_gpu=False,
                        batch_size=1)

    model.model.eval()
    predicted_embeddings = {}
    for x, y in loader:
        x = tensors_to_variables(x)
        embeddings = torch_to_numpy(model.model(x))
        for label, embedding in zip(y, embeddings):
            predicted_embeddings[label] = embedding

    save_embeddings(predicted_embeddings, filename)
Beispiel #16
0
    def predict_generator(self, generator, steps=None):
        """
        Returns the predictions of the network given batches of samples ``x``,
        where the tensors are converted into Numpy arrays.

        generator: Generator-like object for the dataset. The generator must
            yield a batch of samples.

            If the generator does not have a method ``__len__()``, the
            ``steps`` argument must be provided. Notice that a
            generator made using the python keyword ``yield`` does not
            have such method. However, a PyTorch DataLoader object has a
            such method.

            The method ``__iter__()`` on the generator is called and the
            method ``__next__()`` is called for each step on resulting
            object returned by ``__iter__()``. Notice that a call to
            ``__iter__()`` on a generator made using the python keyword
            ``yield`` returns the generator itself.
        steps (int, optional): Number of iterations done on
            ``generator``. (Defaults the number of steps needed to see the
            entire dataset)

        Returns:
            List of the predictions of each batch with tensors converted into
            Numpy arrays.
        """
        self.model.eval()
        if steps is None:
            steps = len(generator)
        pred_y = []
        iterator = iter(generator)
        with torch.no_grad():
            for _ in range(steps):
                x = self._process_input(next(iterator))
                pred_y.append(torch_to_numpy(self.model(x)))
        return pred_y
Beispiel #17
0
def euclidean_distance(y_pred_tensor, y_true_tensor):
    y_pred = torch_to_numpy(y_pred_tensor)
    y_true = torch_to_numpy(y_true_tensor)
    dist = np.linalg.norm((y_true - y_pred), axis=1).mean()
    return torch.FloatTensor([dist.tolist()])
Beispiel #18
0
def save_char_embeddings(model, char_to_idx, filename='mimick_char_embeddings'):
    char_embeddings = {}
    for char, idx in char_to_idx.items():
        char_embeddings[char] = torch_to_numpy(model.model.mimick_lstm.embeddings.weight.data[idx])
    save_embeddings(char_embeddings, filename)