def _compute_loss_and_metrics(self, x, y, return_pred=False): x = tensors_to_variables(x, volatile=not self.model.training) y = tensors_to_variables(y, volatile=not self.model.training) pred_y = self.model(x) loss_tensor = self.loss_function(pred_y, y) metrics_tensors = self._compute_metrics(pred_y, y) ret = (loss_tensor, metrics_tensors) if return_pred: ret = ret + (pred_y, ) return ret
def predict_embeddings(model, loader): model.model.eval() predicted_embeddings = {} for x, y in loader: x = tensors_to_variables(x) embeddings = torch_to_numpy(model.model(x)) for label, embedding in zip(y, embeddings): if label in predicted_embeddings: predicted_embeddings[label].append(embedding) else: predicted_embeddings[label] = [embedding] return predicted_embeddings
def predict_mean_embeddings(model, loader): model.model.eval() predicted_embeddings = {} for x, y in loader: x = tensors_to_variables(x) embeddings = torch_to_numpy(model.model(x)) for label, embedding in zip(y, embeddings): if label in predicted_embeddings: predicted_embeddings[label].append(embedding) else: predicted_embeddings[label] = [embedding] mean_pred_embeddings = {} for label in predicted_embeddings: mean_pred_embeddings[label] = np.mean(np.array( predicted_embeddings[label]), axis=0) return mean_pred_embeddings
def predict_OOV(model, char_to_idx, OOV_path, filename): OOVs = load_vocab(OOV_path) vectorizer = Vectorizer(char_to_idx) examples = [(vectorizer.vectorize_sequence(word), word) for word in OOVs] loader = DataLoader(examples, collate_fn=collate_x, use_gpu=False, batch_size=1) model.model.eval() predicted_embeddings = {} for x, y in loader: x = tensors_to_variables(x) embeddings = torch_to_numpy(model.model(x)) for label, embedding in zip(y, embeddings): predicted_embeddings[label] = embedding save_embeddings(predicted_embeddings, filename)
def predict_generator(self, generator, steps=None): """ Returns the predictions of the network given a batch of samples ``x``, where the torch variables are converted into numpy arrays. generator: Generator-like object for the dataset. The generator must yield a tuple a batch of samples. If the generator does not have a method ``__len__()``, the ``steps`` argument must be provided. Notice that a generator made using the python keyword ``yield`` does not have such method. However, a PyTorch DataLoader object has a such method. The method ``__iter__()`` on the generator is called and the method ``__next__()`` is called for each step on resulting object returned by ``__iter__()``. Notice that a call to ``__iter__()`` on a generator made using the python keyword ``yield`` returns the generator itself. steps (int, optional): Number of iterations done on ``generator``. (Defaults the number of steps needed to see the entire dataset) Returns: List of the predictions of each batch with torch variables converted into numpy arrays. """ self.model.eval() if steps is None: steps = len(generator) pred_y = [] iterator = iter(generator) for _ in range(steps): x = next(iterator) x = tensors_to_variables(x, volatile=True) pred_y.append(torch_to_numpy(self.model(x))) return pred_y