コード例 #1
0
def calculate_losses_for_batch(trained_model: TrainedModel, token_loader: BatchedTokenLoader) -> Generator[List[LossesWithMetadata], None, None]:
    """
    changes hidden states of the model!!
    """
    with torch.no_grad():
        batch_size = token_loader.batch_size
        numericalized_start_point = trained_model.vocab.stoi[trained_model.STARTING_TOKEN]
        numericalized_last_predicted = torch.full((batch_size, 1), numericalized_start_point, dtype=torch.int64, device=get_device())

        losses_with_metadata_list = [LossesWithMetadata.empty(get_device()) for i in range(batch_size)]

        for prepped_token_batch, non_max_seq_len, code_structure, reset in token_loader:
            sub_token_seq_len = prepped_token_batch[0].sub_token_size()

            numericalized_batch = torch.tensor([trained_model.vocab.numericalize(sequence)
                                                for i, sequence in enumerate(prepped_token_batch)],
                                               device=get_device(), dtype=torch.int64)

            input_batch = torch.cat([numericalized_last_predicted, numericalized_batch[:, :-1]], dim=1)
            last_layer = get_last_layer_activations(trained_model.model, input_batch)
            loss: torch.Tensor = cross_entropy(last_layer.view(-1, last_layer.shape[-1]),
                                               numericalized_batch.view(-1),
                                               reduction='none').view(-1, sub_token_seq_len) / log(2)
            numericalized_last_predicted = numericalized_batch[:, -1:]

            current_batch_losses_with_metadata = [LossesWithMetadata(loss[i], code_structure[i], prepped_token_batch[i]) for i in range(batch_size)]
            current_batch_losses_with_metadata = cut_off_placeholders(current_batch_losses_with_metadata, non_max_seq_len)

            for i in range(batch_size):
                losses_with_metadata_list[i].extend(current_batch_losses_with_metadata[i])

            to_yield = []
            for i in range(batch_size):
                y, losses_with_metadata_list[i] = losses_with_metadata_list[i].split_by_first_subtoken()
                to_yield.append(y)

            yield to_yield

            if reset:
                trained_model.reset()
コード例 #2
0
ファイル: model.py プロジェクト: giganticode/langmodels
    def __init__(self, path: str, after_epoch: Optional[int] = None,
                 force_use_cpu: bool = False, load_only_description: bool = False, device: Optional[int] = None):
        if not os.path.exists(path):
            raise FileNotFoundError(f'Path does not exist: {path}')
        self._force_use_cpu = force_use_cpu
        self._id = os.path.basename(path)
        path_to_config_file = os.path.join(path, CONFIG_FILE_NAME)
        path_to_metrics_file = os.path.join(path, METRICS_FILE_NAME)
        path_to_tags_file = os.path.join(path, TAGS_FILE_NAME)
        self._metrics = None
        self._config = None
        self._tags = []
        self._context: List[str] = []
        try:
            self._config: LMTrainingConfig = load_config_or_metrics_from_file(path_to_config_file, LMTrainingConfig)
        except FileNotFoundError:
            logger.warning(f'Config file not found: {path_to_config_file}')
        try:
            self._metrics: LMTrainingMetrics = load_config_or_metrics_from_file(os.path.join(path, METRICS_FILE_NAME), LMTrainingMetrics)
        except FileNotFoundError:
            logger.warning(f'File with metrics not found: {path_to_metrics_file}')
        if os.path.exists(path_to_tags_file):
            value = read_value_from_file(path_to_tags_file, value_type=str)
            if value != '':
                self._tags = value.split(',')
        self.prep_function = self._config.prep_function

        self._load_only_description = load_only_description
        if not load_only_description:
            # we might want to load only description without loading actual weights when we want
            # to save time when loading multiple models to choose one of them to work with

            self._original_vocab = Vocab.load(os.path.join(path, VOCAB_FILE_NAME))
            term_vocab, self._first_nonterm_token = _create_term_vocab(self._original_vocab)
            self._model, self._vocab = self._load_model(path, after_epoch, term_vocab, device=device)
            to_test_mode(self._model)
            self._initial_snapshot = take_hidden_state_snapshot(self._model)

            # last_predicted_token_tensor is a rank-2 tensor!
            self._last_predicted_token_tensor = torch.tensor([self._vocab.numericalize([self.STARTING_TOKEN])],
                                                             device=get_device(self._force_use_cpu))
コード例 #3
0
ファイル: beamsearch.py プロジェクト: giganticode/langmodels
from math import log
from typing import Tuple, Callable, Optional

import torch
from torch import FloatTensor, LongTensor, Tensor
from dataclasses import dataclass
from fastai.text import SequentialRNN
from torch.nn.functional import log_softmax

from langmodels.nn import get_last_layer_activations, take_hidden_state_snapshot, TORCH_LONG_MIN_VAL

from langmodels.nn import restore_snapshot
from langmodels.torchtypes import AnyDeviceFloatTensor, AnyDeviceLongTensor
from langmodels.util.cuda import get_device, Device

DEFAULT_DEVICE: Device = get_device()


def _get_topk_predictions(model: SequentialRNN, context: AnyDeviceLongTensor, top_k: int) -> Tuple[AnyDeviceFloatTensor, AnyDeviceLongTensor]:
    last_token_activations = get_last_layer_activations(model, context)
    predictions = log_softmax(last_token_activations[:, -1], dim=-1)  # TODO log_softmax not really needed
    return predictions.topk(top_k, dim=-1)


def _topk_are_full_tokens(full_token_flags_sorted: torch.Tensor, top_k: int) -> bool:
    return full_token_flags_sorted.size(0) >= top_k and full_token_flags_sorted[top_k-1].item() == (top_k - 1)


@dataclass
class FlattenedCandidateList(object):
    """
コード例 #4
0
ファイル: model.py プロジェクト: giganticode/langmodels
 def reset(self) -> None:
     with lock:
         self._check_model_loaded()
         self._reset()
         self._last_predicted_token_tensor = torch.tensor([self._vocab.numericalize([self.STARTING_TOKEN])],
                                                          device=get_device(self._force_use_cpu))
コード例 #5
0
ファイル: model.py プロジェクト: giganticode/langmodels
 def _feed_prep_tokens(self, prep_tokens: List[str]) -> None:
     self._check_model_loaded()
     context_tensor = torch.tensor([self._vocab.numericalize(prep_tokens)], device=get_device(self._force_use_cpu))
     with lock:
         self._save_context(prep_tokens)
         _ = get_last_layer_activations(self._model, context_tensor[:, :-1])
         self._last_predicted_token_tensor = context_tensor[:, -1:]