Esempio n. 1
0
 def __post_init__(self):
     super().__post_init__()
     PersistableContainer.__init__(self)
     if not isinstance(self.split_container, SplitKeyContainer):
         raise DatasetError('Expecting type SplitKeyContainer but ' +
                            f'got: {type(self.split_container)}')
     self._inst_split_name = None
     self._keys_by_split = PersistedWork('_keys_by_split', self)
     self._splits = PersistedWork('_splits', self)
Esempio n. 2
0
 def __post_init__(self):
     super().__init__()
     if not isinstance(self.dataset_stash, DatasetSplitStash) and False:
         raise ModelError('Expecting type DatasetSplitStash but ' +
                          f'got {self.dataset_stash.__class__}')
     self._model = None
     self._dealloc_model = False
     self.model_result: ModelResult = None
     self.batch_stash.delegate_attr: bool = True
     self._criterion_optimizer_scheduler = PersistedWork(
         '_criterion_optimizer_scheduler', self)
     self._result_manager = PersistedWork('_result_manager', self)
     self._train_manager = PersistedWork('_train_manager', self)
     self.cached_batches = {}
     self.debug = False
Esempio n. 3
0
 def __post_init__(self, config_factory: ConfigFactory):
     super().__init__()
     self._init_config_factory(config_factory)
     self._config_factory = PersistedWork('_config_factory', self)
     self._executor = PersistedWork('_executor', self)
     self.debuged = False
     if self.progress_bar_cols == 'term':
         try:
             term_width = os.get_terminal_size()[0]
             # make space for embedded validation loss messages
             self.progress_bar_cols = term_width - 5
         except OSError:
             logger.debug('unable to automatically determine ' +
                          'terminal width--skipping')
             self.progress_bar_cols = None
Esempio n. 4
0
 def __post_init__(self):
     super().__init__()
     if hasattr(self, '_data_points') and self._data_points is not None:
         self.data_point_ids = tuple(map(lambda d: d.id, self.data_points))
     self._decoded_state = PersistedWork('_decoded_state',
                                         self,
                                         transient=True)
     self.state = 'n'
Esempio n. 5
0
 def __post_init__(self):
     super().__post_init__()
     if self.partition_attr is None:
         raise DatasetError("Missing 'partition_attr' field")
     dfpath = self.split_labels_path
     if dfpath is None:
         dfpath = '_strat_split_labels'
     self._strat_split_labels = PersistedWork(dfpath, self, mkdir=True)
Esempio n. 6
0
    def __init__(self, config):
        """Initialize

        :param config: the application configuration
        """
        self.config = config
        self.download = config.download
        self._fetcher = PersistedWork('_fetcher', self, True)
Esempio n. 7
0
 def __post_init__(self, weighted_split_path: Path):
     super().__post_init__()
     if weighted_split_path is None:
         path = '_label_counts'
     else:
         file_name = f'weighted-labels-{self.weighted_split_name}.dat'
         path = weighted_split_path / file_name
     self._label_counts = PersistedWork(path, self)
Esempio n. 8
0
 def __post_init__(self, cache: bool):
     super().__init__()
     if self.cache_dir is not None and not self.cache_dir.exists():
         if logger.isEnabledFor(logging.DEBUG):
             logger.info(f'creating cache directory: {self.cache_dir}')
         self.cache_dir.mkdir(parents=True, exist_ok=True)
     if self.cased is None:
         if self.model_id.find('uncased') >= 0:
             self.cased = False
         else:
             logger.info("'cased' not given--assuming a cased model")
             self.cased = True
     self._tokenizer = PersistedWork('_tokenzier', self, cache)
     self._model = PersistedWork('_model', self, cache)
     if self.cache_dir is not None and not self.cache_dir.exists():
         if logger.isEnabledFor(logging.DEBUG):
             logger.info(f'creating cache directory: {self.cache_dir}')
         self.cache_dir.mkdir(parents=True, exist_ok=True)
     if logger.isEnabledFor(logging.DEBUG):
         logger.debug(f'id: {self.model_id}, cased: {self.cased}')
Esempio n. 9
0
    def from_struct(cls: type, struct: Dict[str, Any],
                    target_dir: Path) -> Distribution:
        """Return a distrbution directly from the data structure created from
        :class:`.Discoverer`.

        :param struct: the data structure given by :meth:`.Discoverer.freeze`
                       using ``flatten=True``

        :param target_dir: where the distribution will be *thawed*

        """
        self = cls(None, None, target_dir, PathTranslator(target_dir))
        self._struct = PersistedWork('_struct', self, initial_value=struct)
        return self
Esempio n. 10
0
    def __init__(self, use_gpu: bool = True, data_type: type = torch.float32,
                 cuda_device_index: int = None):
        """Initialize this configuration.

        :param use_gpu: whether or not to use CUDA/GPU

        :param data_type: the default data type to use when creating new
                          tensors in this configuration

        :param cuda_device_index: the CUDA device to use, which defaults to 0
                                  if CUDA if ``use_gpu`` is ``True``

        """
        super().__init__()
        logger.debug(f'use_gpu: {use_gpu}')
        self.use_gpu = use_gpu
        self.data_type = data_type
        # we can't globally cache this in case there are multiple instances of
        # this class for which have different values of `use_gpu`
        self._init_device_pw = PersistedWork('_init_device_pw', self)
        self._cpu_device_pw = PersistedWork('_cpu_device_pw', self, cache_global=True)
        self._cpu_device_pw._mark_deallocated()
        self._cuda_device_index = cuda_device_index
Esempio n. 11
0
 def __post_init__(self):
     super().__post_init__()
     if logger.isEnabledFor(logging.DEBUG):
         logger.debug('creating fd vec manager')
     if self.token_feature_ids is None:
         self.token_feature_ids = self.doc_parser.token_feature_ids
     else:
         feat_diff = self.token_feature_ids - self.doc_parser.token_feature_ids
         if len(feat_diff) > 0:
             fdiffs = ', '.join(feat_diff)
             raise VectorizerError(
                 'Parser token features do not exist in vectorizer: ' +
                 f'{self.token_feature_ids} - ' +
                 f'{self.doc_parser.token_feature_ids} = {fdiffs}')
     self._spacy_vectorizers = PersistedWork('_spacy_vectorizers', self)
Esempio n. 12
0
 def __post_init__(self, decoded_attributes):
     super().__post_init__()
     Deallocatable.__init__(self)
     # TODO: this class conflates key split and delegate stash functionality
     # in the `split_stash_container`.  An instance of this type serves the
     # purpose, but it need not be.  Instead it just needs to be both a
     # SplitKeyContainer and a Stash.  This probably should be split out in
     # to two different fields.
     cont = self.split_stash_container
     if not isinstance(cont, SplitStashContainer) \
        and (not isinstance(cont, SplitKeyContainer) or
             not isinstance(cont, Stash)):
         raise DeepLearnError('Expecting SplitStashContainer but got ' +
                              f'{self.split_stash_container.__class__}')
     self.data_point_id_sets_path.parent.mkdir(parents=True, exist_ok=True)
     self._batch_data_point_sets = PersistedWork(
         self.data_point_id_sets_path, self)
     self.priming = False
     self.decoded_attributes = decoded_attributes
     self._update_comp_stash_attribs()
Esempio n. 13
0
 def __post_init__(self):
     path = '_key_queue' if self.path is None else self.path
     self._key_queue = PersistedWork(path, self, mkdir=True)
     self._iter = 0
Esempio n. 14
0
 def __post_init__(self):
     super().__post_init__()
     self.manager_set = None
     self._vectorizers_pw = PersistedWork('_vectorizers_pw', self)
Esempio n. 15
0
 def __post_init__(self):
     super().__init__()
     self._data_inst = PersistedWork('_data_inst', self, transient=True)
Esempio n. 16
0
 def __post_init__(self):
     super().__init__()
     self._vec_dim = PersistedWork('_vec_dim', self, self.resource.cache)
Esempio n. 17
0
 def __post_init__(self):
     super().__post_init__()
     self._managers_pw = PersistedWork('_managers_pw', self)
Esempio n. 18
0
 def __post_init__(self):
     super().__post_init__()
     Deallocatable.__init__(self)
     if logger.isEnabledFor(logging.DEBUG):
         logger.debug(f'split stash post init: {self.dataframe_path}')
     self._dataframe = PersistedWork(self.dataframe_path, self, mkdir=True)
Esempio n. 19
0
 def __post_init__(self):
     super().__post_init__()
     self._keys_by_split = PersistedWork(self.key_path, self, mkdir=True)
Esempio n. 20
0
 def __post_init__(self):
     self._previous_results = PersistedWork(
         '_previous_results', self,
         cache_global=self.cache_previous_results)
Esempio n. 21
0
 def __post_init__(self):
     self.dealloc_resources = []
     self._cached_facade = PersistedWork('_cached_facade', self, True)
Esempio n. 22
0
 def __post_init__(self):
     super().__post_init__()
     self._model = PersistedWork('_model', self)
Esempio n. 23
0
 def __post_init__(self):
     self._data = PersistedWork(self.path, self, mkdir=True)