Esempio n. 1
0
 def _feed_prep_tokens(self, prep_tokens: List[str]) -> None:
     self._check_model_loaded()
     context_tensor = torch.tensor([self._vocab.numericalize(prep_tokens)], device=get_device(self._force_use_cpu))
     with lock:
         self._save_context(prep_tokens)
         _ = get_last_layer_activations(self._model, context_tensor[:, :-1])
         self._last_predicted_token_tensor = context_tensor[:, -1:]
Esempio n. 2
0
    def _get_entropies_for_prep_text(self, prep_text_chunks: List[List[List[str]]],
                                     max_context_allowed: int) -> List[float]:
        """
        changes hidden states of the model!!
        """
        with lock:
            self._check_model_loaded()

        if prep_text_chunks == [[]]:
            return []

        loss_list = []

        for chunk in prep_text_chunks:
            for sub_chunk in chunk:
                numericalized_prep_text = torch.tensor([self._vocab.numericalize(sub_chunk)],
                                                       device=get_device(self._force_use_cpu))

                self._save_context(sub_chunk)
                last_layer = get_last_layer_activations(self._model, torch.cat([self._last_predicted_token_tensor, numericalized_prep_text[:, :-1]], dim=1))
                loss = F.cross_entropy(last_layer.view(-1, last_layer.shape[-1]),
                                       numericalized_prep_text.view(-1),
                                       reduction='none')
                binary_loss = to_binary_entropy(loss)
                loss_list.extend(binary_loss.tolist())
                self._last_predicted_token_tensor = numericalized_prep_text[:, -1:]
            if len(self.context) == max_context_allowed:
                self._reset()
        return loss_list
Esempio n. 3
0
def calculate_losses_for_batch(trained_model: TrainedModel, token_loader: BatchedTokenLoader) -> Generator[List[LossesWithMetadata], None, None]:
    """
    changes hidden states of the model!!
    """
    with torch.no_grad():
        batch_size = token_loader.batch_size
        numericalized_start_point = trained_model.vocab.stoi[trained_model.STARTING_TOKEN]
        numericalized_last_predicted = torch.full((batch_size, 1), numericalized_start_point, dtype=torch.int64, device=get_device())

        losses_with_metadata_list = [LossesWithMetadata.empty(get_device()) for i in range(batch_size)]

        for prepped_token_batch, non_max_seq_len, code_structure, reset in token_loader:
            sub_token_seq_len = prepped_token_batch[0].sub_token_size()

            numericalized_batch = torch.tensor([trained_model.vocab.numericalize(sequence)
                                                for i, sequence in enumerate(prepped_token_batch)],
                                               device=get_device(), dtype=torch.int64)

            input_batch = torch.cat([numericalized_last_predicted, numericalized_batch[:, :-1]], dim=1)
            last_layer = get_last_layer_activations(trained_model.model, input_batch)
            loss: torch.Tensor = cross_entropy(last_layer.view(-1, last_layer.shape[-1]),
                                               numericalized_batch.view(-1),
                                               reduction='none').view(-1, sub_token_seq_len) / log(2)
            numericalized_last_predicted = numericalized_batch[:, -1:]

            current_batch_losses_with_metadata = [LossesWithMetadata(loss[i], code_structure[i], prepped_token_batch[i]) for i in range(batch_size)]
            current_batch_losses_with_metadata = cut_off_placeholders(current_batch_losses_with_metadata, non_max_seq_len)

            for i in range(batch_size):
                losses_with_metadata_list[i].extend(current_batch_losses_with_metadata[i])

            to_yield = []
            for i in range(batch_size):
                y, losses_with_metadata_list[i] = losses_with_metadata_list[i].split_by_first_subtoken()
                to_yield.append(y)

            yield to_yield

            if reset:
                trained_model.reset()
Esempio n. 4
0
def _get_topk_predictions(model: SequentialRNN, context: AnyDeviceLongTensor, top_k: int) -> Tuple[AnyDeviceFloatTensor, AnyDeviceLongTensor]:
    last_token_activations = get_last_layer_activations(model, context)
    predictions = log_softmax(last_token_activations[:, -1], dim=-1)  # TODO log_softmax not really needed
    return predictions.topk(top_k, dim=-1)