def _get_entropies_for_prep_text(self, prep_text_chunks: List[List[List[str]]], max_context_allowed: int) -> List[float]: """ changes hidden states of the model!! """ with lock: self._check_model_loaded() if prep_text_chunks == [[]]: return [] loss_list = [] for chunk in prep_text_chunks: for sub_chunk in chunk: numericalized_prep_text = torch.tensor([self._vocab.numericalize(sub_chunk)], device=get_device(self._force_use_cpu)) self._save_context(sub_chunk) last_layer = get_last_layer_activations(self._model, torch.cat([self._last_predicted_token_tensor, numericalized_prep_text[:, :-1]], dim=1)) loss = F.cross_entropy(last_layer.view(-1, last_layer.shape[-1]), numericalized_prep_text.view(-1), reduction='none') binary_loss = to_binary_entropy(loss) loss_list.extend(binary_loss.tolist()) self._last_predicted_token_tensor = numericalized_prep_text[:, -1:] if len(self.context) == max_context_allowed: self._reset() return loss_list
def __init__(self, path: str, force_use_cpu: bool = False, load_only_description: bool = False): if not os.path.exists(path): raise FileNotFoundError(f'Path does not exist: {path}') self._force_use_cpu = force_use_cpu self._id = os.path.basename(path) path_to_config_file = os.path.join(path, CONFIG_FILE_NAME) path_to_metrics_file = os.path.join(path, METRICS_FILE_NAME) path_to_tags_file = os.path.join(path, TAGS_FILE_NAME) self._metrics = None self._config = None self._tags = [] self._context: List[str] = [] try: self._config: LMTrainingConfig = load_config_or_metrics_from_file(path_to_config_file, LMTrainingConfig) except FileNotFoundError: logger.warning(f'Config file not found: {path_to_config_file}') try: self._metrics: LMTrainingMetrics = load_config_or_metrics_from_file(os.path.join(path, METRICS_FILE_NAME), LMTrainingMetrics) except FileNotFoundError: logger.warning(f'File with metrics not found: {path_to_metrics_file}') if os.path.exists(path_to_tags_file): value = read_value_from_file(path_to_tags_file, value_type=str) if value != '': self._tags = value.split(',') self._prep_function = self._config.prep_function self._load_only_description = load_only_description if not load_only_description: # we might want to load only description without loading actual weights when we want # to save time when loading multiple models to choose one of them to work with self._original_vocab = Vocab.load(os.path.join(path, VOCAB_FILE_NAME)) term_vocab, self._first_nonterm_token = _create_term_vocab(self._original_vocab) self._model, self._vocab = self._load_model(path, term_vocab) to_test_mode(self._model) self._initial_snapshot = take_hidden_state_snapshot(self._model) # last_predicted_token_tensor is a rank-2 tensor! self._last_predicted_token_tensor = torch.tensor([self._vocab.numericalize([self.STARTING_TOKEN])], device=get_device(self._force_use_cpu))
def reset(self) -> None: with lock: self._check_model_loaded() self._reset() self._last_predicted_token_tensor = torch.tensor([self._vocab.numericalize([self.STARTING_TOKEN])], device=get_device(self._force_use_cpu))
def _feed_prep_tokens(self, prep_tokens: List[str]) -> None: self._check_model_loaded() context_tensor = torch.tensor([self._vocab.numericalize(prep_tokens)], device=get_device(self._force_use_cpu)) with lock: self._save_context(prep_tokens) _ = get_last_layer_activations(self._model, context_tensor[:, :-1]) self._last_predicted_token_tensor = context_tensor[:, -1:]
from typing import Tuple, Callable import torch from torch import FloatTensor, LongTensor, Tensor from dataclasses import dataclass from fastai.text import SequentialRNN from torch.nn.functional import log_softmax from langmodels.nn import get_last_layer_activations, take_hidden_state_snapshot, TORCH_LONG_MIN_VAL from langmodels.cuda_util import get_device from langmodels.nn import restore_snapshot DEVICE = get_device() def _get_topk_predictions(model: SequentialRNN, context: FloatTensor, top_k: int) -> Tuple[FloatTensor, LongTensor]: last_token_activations = get_last_layer_activations(model, context) predictions = log_softmax(last_token_activations[:, -1], dim=-1) # TODO log_softmax not really needed return predictions.topk(top_k, dim=-1) def _topk_are_full_tokens(full_token_flags_sorted: torch.Tensor, top_k: int) -> bool: return full_token_flags_sorted.size( 0) >= top_k and full_token_flags_sorted[top_k - 1].item() == (top_k - 1) @dataclass