Ejemplo n.º 1
0
    def predict(self, datas: Iterable[Any]) -> Any:
        """Make ad-hoc predictions on batches without labels, and return the results.

        :param datas: the data predict on, each as a separate element as a data
                      point in a batch

        """
        executor: ModelExecutor = self.executor
        ms: ModelSettings = self.model_settings
        if ms.prediction_mapper_name is None:
            raise ModelError(
                f'The model settings ({ms.name}) is not configured to create '
                + "prediction batches: no set 'prediction_mapper'")
        pm: PredictionMapper = self.config_factory.new_instance(
            ms.prediction_mapper_name, datas, self.batch_stash)
        self._notify('predict_start')
        try:
            batches: List[Batch] = pm.batches
            if not executor.model_exists:
                executor.load()
            logger.info('predicting...')
            with time('predicted'):
                res: ModelResult = executor.predict(batches)
            eres: EpochResult = res.results[0]
            ret: Any = pm.map_results(eres)
        finally:
            self._notify('predict_end')
            pm.deallocate()
        return ret
Ejemplo n.º 2
0
    def model(self) -> BaseNetworkModule:
        """Get the PyTorch module that is used for training and test.

        """
        if self._model is None:
            raise ModelError('No model, is populated; use \'load\'')
        return self._model
Ejemplo n.º 3
0
 def map_split(n: str):
     s = splits.get(n)
     if s is None:
         raise ModelError(
             f"No split '{n}' in {self.dataset_stash.split_names}, " +
             f'executor splits: {self.dataset_split_names}')
     return s
Ejemplo n.º 4
0
    def get_status(self) -> TrainStatus:
        """Return the epoch to set in the training loop of the :class:`.ModelExecutor`.

        """
        status = self._read_status()
        if status.action == UpdateAction.STOP:
            # setting to the max value fails the executors train outter loop
            # causing a robust non-error exit
            status.epoch = sys.maxsize
        elif status.action == UpdateAction.SET_EPOCH:
            self.current_epoch = status.epoch
            if self.pbar is not None:
                self.pbar.reset()
                self.pbar.update(self.current_epoch)
        elif status.action == UpdateAction.ITERATE_EPOCH:
            self.current_epoch += 1
            status.epoch = self.current_epoch
            stop_reason = self._get_stop_reason()
            if self.pbar is not None:
                self.pbar.update()
            if stop_reason is not None:
                status.action = UpdateAction.STOP
                status.reason = stop_reason
        else:
            raise ModelError(f'Unknownn status: {status}')
        if status.reason and self.status_logger.isEnabledFor(logging.INFO):
            self.status_logger.info(status.reason)
        return status
Ejemplo n.º 5
0
    def __init__(self, net_settings: NetworkSettings,
                 sub_logger: logging.Logger = None):
        """Initialize.

        :param net_settings: contains common layers such as droput and batch
                             normalization

        :param sub_logger: used to log activity in this module so they logged
                           module comes from some parent model

        """
        super().__init__(sub_logger)
        self.net_settings = ns = net_settings
        if isinstance(ns, DropoutNetworkSettings):
            self.dropout = ns.dropout_layer
        else:
            self.dropout = None
        if isinstance(ns, BatchNormNetworkSettings) and \
           (ns.batch_norm_d is not None or ns.batch_norm_features is not None):
            if ns.batch_norm_d is None or ns.batch_norm_features is None:
                raise ModelError('Both the dimension and features must be ' +
                                 f'set if one is set: {ns}')
            self.batch_norm = ns.batch_norm_layer
        else:
            self.batch_norm = None
        if isinstance(ns, ActivationNetworkSettings):
            self.activation_function = ns.activation_function
        else:
            self.activation_function = None
Ejemplo n.º 6
0
    def result_manager(self) -> ModelResultManager:
        """Return the executor's result manager.

        """
        rm: ModelResultManager = self.executor.result_manager
        if rm is None:
            rm = ModelError('No result manager available')
        return rm
Ejemplo n.º 7
0
    def previous_results(self) -> ModelResult:
        """Return the previous results (see class docs).

        """
        rm: ModelResultManager = self.executor.result_manager
        if rm is None:
            rm = ModelError('No result manager available')
        return rm[self.previous_results_key]
Ejemplo n.º 8
0
 def _forward(self, batch: Batch, context: SequenceNetworkContext) -> \
         SequenceNetworkOutput:
     use_crf = self.net_settings.use_crf
     split_type: DatasetSplitType = context.split_type
     preds: List[List[int]] = None
     labels: Optional[Tensor] = batch.get_labels()
     loss: Tensor = None
     score: Tensor = None
     if self.logger.isEnabledFor(logging.DEBUG):
         self.logger.debug(f'forward on splt: {context.split_type}')
     if context.split_type != DatasetSplitType.train and self.training:
         raise ModelError(
             f'Attempting to use split {split_type} while training')
     if context.split_type == DatasetSplitType.train:
         if use_crf:
             loss = self._forward_train_with_crf(batch)
         else:
             preds, loss = self._forward_train_no_crf(batch, context)
     elif context.split_type == DatasetSplitType.validation:
         if use_crf:
             preds, loss, score = self._decode(batch, True)
         else:
             preds, loss = self._forward_train_no_crf(batch, context)
     elif context.split_type == DatasetSplitType.test:
         if use_crf:
             preds, _, score = self._decode(batch, False)
             loss = batch.torch_config.singleton([0], dtype=torch.float32)
         else:
             preds, loss = self._forward_train_no_crf(batch, context)
     else:
         raise ModelError(f'Unknown data split type: {split_type}')
     # list of lists of the predictions, which are the CRF output when
     # enabled
     if preds is not None:
         preds = self._map_labels(batch, context, preds)
     # padded tensor of shape (batch, data i.e. token length)
     if labels is not None:
         labels = self._map_labels(batch, context, labels)
     self._shape_or_list_debug('output preds', preds)
     self._shape_or_list_debug('output labels', labels)
     out = SequenceNetworkOutput(preds, loss, score, labels)
     if preds is not None and labels is not None and len(labels.size()) > 1:
         out.righsize_labels(preds)
     return out
Ejemplo n.º 9
0
    def get_predictions_factory(self, column_names: List[str] = None,
                                transform: Callable[[DataPoint], tuple] = None,
                                batch_limit: int = sys.maxsize,
                                name: str = None) \
            -> PredictionsDataFrameFactory:
        """Generate a predictions factoty from the test data set.

        :param column_names: the list of string column names for each data item
                             the list returned from ``data_point_transform`` to
                             be added to the results for each label/prediction

        :param transform:

            a function that returns a tuple, each with an element respective of
            ``column_names`` to be added to the results for each
            label/prediction; if ``None`` (the default), ``str`` used (see the
            `Iris Jupyter Notebook
            <https://github.com/plandes/deeplearn/blob/master/notebook/iris.ipynb>`_
            example)

        :param batch_limit: the max number of batche of results to output

        :param name: the name/ID (name of the file sans extension in the
                     results directory) of the previously archived saved
                     results to fetch or ``None`` to get the last result

        """
        rm: ModelResultManager = self.result_manager
        res: ModelResult
        if name is None:
            res = self.last_result
            key: str = rm.get_last_key(False)
        else:
            res = rm.results_stash[name].model_result
            key: str = name
        if res is None:
            raise ModelError(f'No test results found: {name}')
        if not res.test.contains_results:
            raise ModelError('No test results found')
        path: Path = rm.key_to_path(key)
        return self.predictions_datafrmae_factory_class(
            path, res, self.batch_stash, column_names, transform, batch_limit)
Ejemplo n.º 10
0
    def _prepare_datasets(self, batch_limit: Union[int, float],
                          to_deallocate: List[Batch],
                          ds_src: List[Stash]) -> List[List[Batch]]:
        """Return batches for each data set.  The batches are returned per dataset as
        given in :meth:`_get_dataset_splits`.

        Return:
          [(training batch 1..N), (validation batch 1..N), (test batch 1..N)]

        """
        biter = self.model_settings.batch_iteration
        cnt = 0

        if logger.isEnabledFor(logging.INFO):
            logger.info(f'preparing datasets using iteration: {biter}')

        self._notify('prepare_datasets_start', biter)

        if biter == 'gpu':
            ds_dst = []
            for src in ds_src:
                vlim = self._calc_batch_limit(src, batch_limit)
                cpu_batches = tuple(it.islice(src.values(), vlim))
                gpu_batches = list(map(lambda b: b.to(), cpu_batches))
                cnt += len(gpu_batches)
                # the `to` call returns the same instance if the tensor is
                # already on the GPU, so only deallocate batches copied over
                for cpu_batch, gpu_batch in zip(cpu_batches, gpu_batches):
                    if cpu_batch is not gpu_batch:
                        to_deallocate.append(cpu_batch)
                if not self.model_settings.cache_batches:
                    to_deallocate.extend(gpu_batches)
                ds_dst.append(gpu_batches)
        elif biter == 'cpu':
            ds_dst = []
            for src in ds_src:
                vlim = self._calc_batch_limit(src, batch_limit)
                batches = list(it.islice(src.values(), vlim))
                cnt += len(batches)
                if not self.model_settings.cache_batches:
                    to_deallocate.extend(batches)
                ds_dst.append(batches)
        elif biter == 'buffered':
            ds_dst = ds_src
            cnt = '?'
        else:
            raise ModelError(f'No such batch iteration method: {biter}')

        self._notify('prepare_datasets_end', biter)

        self._preproces_training(ds_dst[0])

        return cnt, ds_dst
Ejemplo n.º 11
0
 def _calc_batch_limit(self, src: Stash, batch_limit: Union[int,
                                                            float]) -> int:
     if batch_limit <= 0:
         raise ModelError(f'Batch limit must be positive: {batch_limit}')
     if isinstance(batch_limit, float):
         if batch_limit > 1.0:
             raise ModelError('Batch limit must be less than 1 ' +
                              f'when a float: {batch_limit}')
         vlim = round(len(src) * batch_limit)
         if logger.isEnabledFor(logging.DEBUG):
             logger.debug('batch limit calculated as a percentage: ' +
                          f'{vlim} = {len(src)} * {batch_limit}')
     else:
         vlim = batch_limit
     if isinstance(src, SplitStashContainer):
         desc = f' for {src.split_name}'
     else:
         desc = ''
     if logger.isEnabledFor(logging.INFO):
         logger.info(f'using batch limit: {vlim}{desc}')
     return vlim
Ejemplo n.º 12
0
    def last_result(self) -> ModelResult:
        """The last recorded result during an :meth:`.ModelExecutor.train` or
        :meth:`.ModelExecutor.test` invocation is used.

        """
        res = self.executor.model_result
        if res is None:
            rm: ModelResultManager = self.result_manager
            res = rm.load()
            if res is None:
                raise ModelError('No results found')
        return res
Ejemplo n.º 13
0
    def _execute(self, model: BaseNetworkModule, optimizer: Optimizer,
                 criterion, batch: Batch, split_type: DatasetSplitType) -> \
            Tuple[Tensor]:
        logger = self.logger
        cctx = SequenceNetworkContext(split_type, criterion)
        seq_out: SequenceNetworkOutput = model(batch, cctx)
        outcomes: Tensor = seq_out.predictions
        loss: Tensor = seq_out.loss

        if logger.isEnabledFor(logging.DEBUG):
            logger.debug(f'{batch.id}: output: {seq_out}')

        if seq_out.labels is not None:
            labels = seq_out.labels
        else:
            labels: Tensor = batch.get_labels()
            labels = self._encode_labels(labels)

        if logger.isEnabledFor(logging.DEBUG):
            if labels is not None:
                logger.debug(f'label shape: {labels.shape}')

        self._debug_output('after forward', labels, outcomes)

        # iterate over the error surface
        self._step(loss, split_type, optimizer, model)
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug(f'split: {split_type}, loss: {loss}')

        # transform the labels in the same manner as the predictions so tensor
        # shapes match
        if not self.model_settings.nominal_labels:
            labels = self._decode_outcomes(labels)
            if logger.isEnabledFor(logging.DEBUG):
                logger.debug(f'label nom decoded: {labels.shape}')

        if outcomes is None and split_type != DatasetSplitType.train:
            raise ModelError('Expecting predictions for all splits except ' +
                             f'{DatasetSplitType.train} on {split_type}')

        if logger.isEnabledFor(logging.DEBUG):
            if outcomes is not None:
                logger.debug(f'outcomes: {outcomes.shape}')
            if labels is not None:
                logger.debug(f'labels: {labels.shape}')

        loss, labels, outcomes, outputs = self.torch_config.to_cpu_deallocate(
            loss, labels, outcomes, seq_out.outputs)
        return loss, labels, outcomes, outputs
Ejemplo n.º 14
0
 def __post_init__(self):
     super().__init__()
     if not isinstance(self.dataset_stash, DatasetSplitStash) and False:
         raise ModelError('Expecting type DatasetSplitStash but ' +
                          f'got {self.dataset_stash.__class__}')
     self._model = None
     self._dealloc_model = False
     self.model_result: ModelResult = None
     self.batch_stash.delegate_attr: bool = True
     self._criterion_optimizer_scheduler = PersistedWork(
         '_criterion_optimizer_scheduler', self)
     self._result_manager = PersistedWork('_result_manager', self)
     self._train_manager = PersistedWork('_train_manager', self)
     self.cached_batches = {}
     self.debug = False
Ejemplo n.º 15
0
    def test(self, description: str = None) -> ModelResult:
        """Load the model from disk and test it.

        """
        if self.debuged:
            raise ModelError('Testing is not allowed in debug mode')
        executor = self.executor
        executor.load()
        logger.info('testing...')
        self._notify('test_start', description)
        with time('tested'):
            res = executor.test(description)
        if self.writer is not None:
            res.write(writer=self.writer)
        self._notify('test_end', description)
        return res
Ejemplo n.º 16
0
 def state_dict(self, destination=None, prefix='', *args, **kwargs):
     state = super().state_dict(destination, prefix, *args, **kwargs)
     if logger.isEnabledFor(logging.DEBUG):
         self._debug(f'state_dict: trainable: {self.trainable}')
     if not self.trainable:
         emb_key = self._get_emb_key(prefix)
         if logger.isEnabledFor(logging.DEBUG):
             self._debug(f'state_dict: embedding key: {emb_key}')
         if emb_key is not None:
             if emb_key not in state:
                 raise ModelError(f'No key {emb_key} in {state.keys()}')
             arr = state[emb_key]
             if arr is not None:
                 if logger.isEnabledFor(logging.DEBUG):
                     self._debug(f'state_dict: emb state: {arr.shape}')
                 assert arr.shape == self.embed_model.matrix.shape
             state[emb_key] = None
     return state
Ejemplo n.º 17
0
    def _execute(self, sets_name: str, description: str, func: Callable,
                 ds_src: tuple) -> bool:
        """Either train or test the model based on method ``func``.

        :param sets_name: the name of the data sets, which ``train`` or
                          ``test``

        :param func: the method to call to do the training or testing

        :param ds_src: a tuple of datasets in a form such as ``(train,
                       validation, test)`` (see :meth:`_get_dataset_splits`)

        :return: ``True`` if training/testing was successful, otherwise
                 `the an exception occured or early bail

        """
        to_deallocate: List[Batch] = []
        ds_dst: List[List[Batch]] = None
        batch_limit = self.model_settings.batch_limit
        biter = self.model_settings.batch_iteration

        if self.model_settings.cache_batches and biter == 'buffered':
            raise ModelError('Can not cache batches for batch ' +
                             'iteration setting \'buffered\'')

        if logger.isEnabledFor(logging.INFO):
            logger.info(f'batch iteration: {biter}, limit: {batch_limit}' +
                        f', caching: {self.model_settings.cache_batches}'
                        f', cached: {len(self.cached_batches)}')

        self._notify('execute_start', sets_name)

        self._gc(1)

        ds_dst = self.cached_batches.get(sets_name)
        if ds_dst is None:
            cnt = 0
            with time('loaded {cnt} batches'):
                cnt, ds_dst = self._prepare_datasets(batch_limit,
                                                     to_deallocate, ds_src)
            if self.model_settings.cache_batches:
                self.cached_batches[sets_name] = ds_dst

        if logger.isEnabledFor(logging.INFO):
            logger.info('train/test sets: ' +
                        f'{" ".join(map(lambda l: str(len(l)), ds_dst))}')

        try:
            with time(f'executed {sets_name}'):
                func(*ds_dst)
            if description is not None:
                res_name = f'{self.model_result.index}: {description}'
                self.model_result.name = res_name
            return True
        except EarlyBailError as e:
            logger.warning(f'<{e}>')
            self.reset()
            return False
        finally:
            self._notify('execute_end', sets_name)
            self._train_manager.clear()
            if logger.isEnabledFor(logging.INFO):
                logger.info(f'deallocating {len(to_deallocate)} batches')
            for batch in to_deallocate:
                if logger.isEnabledFor(logging.DEBUG):
                    logger.debug(f'deallocating: {batch}')
                batch.deallocate()
            self._gc(1)
            self.torch_config.empty_cache()
Ejemplo n.º 18
0
 def __getstate__(self):
     raise ModelError(f'Layers should not be pickeled: {self}')
Ejemplo n.º 19
0
    def _execute(self, model: BaseNetworkModule, optimizer: Optimizer,
                 criterion, batch: Batch, split_type: DatasetSplitType) -> \
            Tuple[Tensor]:
        """Execute one epoch of training, testing, validation or prediction.

        :param model: the model to excercise

        :param optimizer: the optimization algorithm (i.e. adam) to iterate

        :param criterion: the loss function (i.e. cross entropy loss) used for
                          the backward propogation step

        :param batch: contains the data to test, predict, and optionally the
                      labels for training and validation

        :param split_type: indicates if we're training, validating or testing

        :return: a tuple of the loss, labels, outcomes, and the output
                 (i.e. logits); the outcomes are the decoded
                 (:meth:`_decode_outcomes`) output and represent some ready to
                 use data, like argmax'd classification nominal label integers

        """
        logger = self.logger
        labels: Tensor = batch.get_labels()
        # forward pass, get our output, which are usually the logits
        output: Tensor = model(batch)

        # sanity check
        if output is None:
            raise ModelError('Null model output')

        # check for sane state with labels, and munge if necessary
        if labels is None:
            # sanity check
            if split_type != DatasetSplitType.test:
                raise ModelError('Expecting no split type on prediction, ' +
                                 f'but got: {split_type}')
            if logger.isEnabledFor(logging.DEBUG):
                logger.debug('skipping loss calculation on prediction execute')
            loss = None
        else:
            # put labels in a form to be used by the loss function
            labels = self._encode_labels(labels)
            self._debug_output('input', labels, output)

            # calculate the loss with the logps and the labels
            loss = criterion(output, labels)
            if logger.isEnabledFor(logging.DEBUG):
                logger.debug(f'split: {split_type}, loss: {loss}')

        # iterate over the error surface
        self._step(loss, split_type, optimizer, model)
        self._debug_output('output', labels, output)

        # apply the same decoding on the labels as the output if necessary
        if labels is not None and not self.model_settings.nominal_labels:
            labels = self._decode_outcomes(labels)
            if logger.isEnabledFor(logging.DEBUG):
                logger.debug(f'label nom decoded: {labels.shape}')

        outcomes = self._decode_outcomes(output)
        loss, labels, outcomes, output = self.torch_config.to_cpu_deallocate(
            loss, labels, outcomes, output)
        return loss, labels, outcomes, output