def calculate_losses_for_batch(trained_model: TrainedModel, token_loader: BatchedTokenLoader) -> Generator[List[LossesWithMetadata], None, None]: """ changes hidden states of the model!! """ with torch.no_grad(): batch_size = token_loader.batch_size numericalized_start_point = trained_model.vocab.stoi[trained_model.STARTING_TOKEN] numericalized_last_predicted = torch.full((batch_size, 1), numericalized_start_point, dtype=torch.int64, device=get_device()) losses_with_metadata_list = [LossesWithMetadata.empty(get_device()) for i in range(batch_size)] for prepped_token_batch, non_max_seq_len, code_structure, reset in token_loader: sub_token_seq_len = prepped_token_batch[0].sub_token_size() numericalized_batch = torch.tensor([trained_model.vocab.numericalize(sequence) for i, sequence in enumerate(prepped_token_batch)], device=get_device(), dtype=torch.int64) input_batch = torch.cat([numericalized_last_predicted, numericalized_batch[:, :-1]], dim=1) last_layer = get_last_layer_activations(trained_model.model, input_batch) loss: torch.Tensor = cross_entropy(last_layer.view(-1, last_layer.shape[-1]), numericalized_batch.view(-1), reduction='none').view(-1, sub_token_seq_len) / log(2) numericalized_last_predicted = numericalized_batch[:, -1:] current_batch_losses_with_metadata = [LossesWithMetadata(loss[i], code_structure[i], prepped_token_batch[i]) for i in range(batch_size)] current_batch_losses_with_metadata = cut_off_placeholders(current_batch_losses_with_metadata, non_max_seq_len) for i in range(batch_size): losses_with_metadata_list[i].extend(current_batch_losses_with_metadata[i]) to_yield = [] for i in range(batch_size): y, losses_with_metadata_list[i] = losses_with_metadata_list[i].split_by_first_subtoken() to_yield.append(y) yield to_yield if reset: trained_model.reset()
def __init__(self, path: str, after_epoch: Optional[int] = None, force_use_cpu: bool = False, load_only_description: bool = False, device: Optional[int] = None): if not os.path.exists(path): raise FileNotFoundError(f'Path does not exist: {path}') self._force_use_cpu = force_use_cpu self._id = os.path.basename(path) path_to_config_file = os.path.join(path, CONFIG_FILE_NAME) path_to_metrics_file = os.path.join(path, METRICS_FILE_NAME) path_to_tags_file = os.path.join(path, TAGS_FILE_NAME) self._metrics = None self._config = None self._tags = [] self._context: List[str] = [] try: self._config: LMTrainingConfig = load_config_or_metrics_from_file(path_to_config_file, LMTrainingConfig) except FileNotFoundError: logger.warning(f'Config file not found: {path_to_config_file}') try: self._metrics: LMTrainingMetrics = load_config_or_metrics_from_file(os.path.join(path, METRICS_FILE_NAME), LMTrainingMetrics) except FileNotFoundError: logger.warning(f'File with metrics not found: {path_to_metrics_file}') if os.path.exists(path_to_tags_file): value = read_value_from_file(path_to_tags_file, value_type=str) if value != '': self._tags = value.split(',') self.prep_function = self._config.prep_function self._load_only_description = load_only_description if not load_only_description: # we might want to load only description without loading actual weights when we want # to save time when loading multiple models to choose one of them to work with self._original_vocab = Vocab.load(os.path.join(path, VOCAB_FILE_NAME)) term_vocab, self._first_nonterm_token = _create_term_vocab(self._original_vocab) self._model, self._vocab = self._load_model(path, after_epoch, term_vocab, device=device) to_test_mode(self._model) self._initial_snapshot = take_hidden_state_snapshot(self._model) # last_predicted_token_tensor is a rank-2 tensor! self._last_predicted_token_tensor = torch.tensor([self._vocab.numericalize([self.STARTING_TOKEN])], device=get_device(self._force_use_cpu))
from math import log from typing import Tuple, Callable, Optional import torch from torch import FloatTensor, LongTensor, Tensor from dataclasses import dataclass from fastai.text import SequentialRNN from torch.nn.functional import log_softmax from langmodels.nn import get_last_layer_activations, take_hidden_state_snapshot, TORCH_LONG_MIN_VAL from langmodels.nn import restore_snapshot from langmodels.torchtypes import AnyDeviceFloatTensor, AnyDeviceLongTensor from langmodels.util.cuda import get_device, Device DEFAULT_DEVICE: Device = get_device() def _get_topk_predictions(model: SequentialRNN, context: AnyDeviceLongTensor, top_k: int) -> Tuple[AnyDeviceFloatTensor, AnyDeviceLongTensor]: last_token_activations = get_last_layer_activations(model, context) predictions = log_softmax(last_token_activations[:, -1], dim=-1) # TODO log_softmax not really needed return predictions.topk(top_k, dim=-1) def _topk_are_full_tokens(full_token_flags_sorted: torch.Tensor, top_k: int) -> bool: return full_token_flags_sorted.size(0) >= top_k and full_token_flags_sorted[top_k-1].item() == (top_k - 1) @dataclass class FlattenedCandidateList(object): """
def reset(self) -> None: with lock: self._check_model_loaded() self._reset() self._last_predicted_token_tensor = torch.tensor([self._vocab.numericalize([self.STARTING_TOKEN])], device=get_device(self._force_use_cpu))
def _feed_prep_tokens(self, prep_tokens: List[str]) -> None: self._check_model_loaded() context_tensor = torch.tensor([self._vocab.numericalize(prep_tokens)], device=get_device(self._force_use_cpu)) with lock: self._save_context(prep_tokens) _ = get_last_layer_activations(self._model, context_tensor[:, :-1]) self._last_predicted_token_tensor = context_tensor[:, -1:]