def _feed_prep_tokens(self, prep_tokens: List[str]) -> None: self._check_model_loaded() context_tensor = torch.tensor([self._vocab.numericalize(prep_tokens)], device=get_device(self._force_use_cpu)) with lock: self._save_context(prep_tokens) _ = get_last_layer_activations(self._model, context_tensor[:, :-1]) self._last_predicted_token_tensor = context_tensor[:, -1:]
def _get_entropies_for_prep_text(self, prep_text_chunks: List[List[List[str]]], max_context_allowed: int) -> List[float]: """ changes hidden states of the model!! """ with lock: self._check_model_loaded() if prep_text_chunks == [[]]: return [] loss_list = [] for chunk in prep_text_chunks: for sub_chunk in chunk: numericalized_prep_text = torch.tensor([self._vocab.numericalize(sub_chunk)], device=get_device(self._force_use_cpu)) self._save_context(sub_chunk) last_layer = get_last_layer_activations(self._model, torch.cat([self._last_predicted_token_tensor, numericalized_prep_text[:, :-1]], dim=1)) loss = F.cross_entropy(last_layer.view(-1, last_layer.shape[-1]), numericalized_prep_text.view(-1), reduction='none') binary_loss = to_binary_entropy(loss) loss_list.extend(binary_loss.tolist()) self._last_predicted_token_tensor = numericalized_prep_text[:, -1:] if len(self.context) == max_context_allowed: self._reset() return loss_list
def calculate_losses_for_batch(trained_model: TrainedModel, token_loader: BatchedTokenLoader) -> Generator[List[LossesWithMetadata], None, None]: """ changes hidden states of the model!! """ with torch.no_grad(): batch_size = token_loader.batch_size numericalized_start_point = trained_model.vocab.stoi[trained_model.STARTING_TOKEN] numericalized_last_predicted = torch.full((batch_size, 1), numericalized_start_point, dtype=torch.int64, device=get_device()) losses_with_metadata_list = [LossesWithMetadata.empty(get_device()) for i in range(batch_size)] for prepped_token_batch, non_max_seq_len, code_structure, reset in token_loader: sub_token_seq_len = prepped_token_batch[0].sub_token_size() numericalized_batch = torch.tensor([trained_model.vocab.numericalize(sequence) for i, sequence in enumerate(prepped_token_batch)], device=get_device(), dtype=torch.int64) input_batch = torch.cat([numericalized_last_predicted, numericalized_batch[:, :-1]], dim=1) last_layer = get_last_layer_activations(trained_model.model, input_batch) loss: torch.Tensor = cross_entropy(last_layer.view(-1, last_layer.shape[-1]), numericalized_batch.view(-1), reduction='none').view(-1, sub_token_seq_len) / log(2) numericalized_last_predicted = numericalized_batch[:, -1:] current_batch_losses_with_metadata = [LossesWithMetadata(loss[i], code_structure[i], prepped_token_batch[i]) for i in range(batch_size)] current_batch_losses_with_metadata = cut_off_placeholders(current_batch_losses_with_metadata, non_max_seq_len) for i in range(batch_size): losses_with_metadata_list[i].extend(current_batch_losses_with_metadata[i]) to_yield = [] for i in range(batch_size): y, losses_with_metadata_list[i] = losses_with_metadata_list[i].split_by_first_subtoken() to_yield.append(y) yield to_yield if reset: trained_model.reset()
def _get_topk_predictions(model: SequentialRNN, context: AnyDeviceLongTensor, top_k: int) -> Tuple[AnyDeviceFloatTensor, AnyDeviceLongTensor]: last_token_activations = get_last_layer_activations(model, context) predictions = log_softmax(last_token_activations[:, -1], dim=-1) # TODO log_softmax not really needed return predictions.topk(top_k, dim=-1)