Example #1
0
 def _write_max_word_piece_token_length(self):
     with dealloc(self.create_facade()) as facade:
         facade.remove_expensive_vectorizers()
     self._test_transform()
     self._test_decode()
     logger.info('calculatating word piece length on data set...')
     # this takes a while since it iterates through the corpus
     with dealloc(self.create_facade()) as facade:
         mlen = facade.get_max_word_piece_len()
         print(f'max word piece token length: {mlen}')
Example #2
0
    def print_information(self, info_item: InfoItem = None):
        """Output facade data set, vectorizer and other configuration information.

        :param info_item: what to print

        """
        # see :class:`.FacadeApplicationFactory'
        def write_batch():
            for batch in it.islice(facade.batch_stash.values(), 2):
                batch.write()

        if not hasattr(self, '_no_op'):
            with dealloc(self.create_facade()) as facade:
                print(f'{facade.model_settings.model_name}:')
                fn_map = \
                    {None: facade.write,
                     InfoItem.meta: facade.batch_metadata.write,
                     InfoItem.param: facade.executor.write_settings,
                     InfoItem.model: facade.executor.write_model,
                     InfoItem.config: facade.config.write,
                     InfoItem.batch: write_batch}
                fn = fn_map.get(info_item)
                if fn is None:
                    raise DeepLearnError(f'No such info item: {info_item}')
                fn()
Example #3
0
 def create_facade(self) -> ModelFacade:
     """Create a new instance of the facade."""
     # we must create a new (non-shared) instance of the facade since it
     # will get deallcated after complete.
     config = self.config
     model_path = self.model_path
     if self.config_overwrites is not None:
         config = cp.deepcopy(config)
         config.merge(self.config_overwrites)
     if model_path is None:
         cf = ImportConfigFactory(config, **self.config_factory_args)
         facade: ModelFacade = cf.instance(self.facade_name)
         if logger.isEnabledFor(logging.DEBUG):
             logger.debug(f'created facade: {facade}')
         self.dealloc_resources.extend((cf, facade))
     else:
         if logger.isEnabledFor(logging.INFO):
             logger.info(f'loading model from {model_path}')
         with dealloc(ImportConfigFactory(
                 config, **self.config_factory_args)) as cf:
             cls: Type[ModelFacade] = cf.get_class(self.facade_name)
         facade: ModelFacade = cls.load_from_path(model_path)
         if logger.isEnabledFor(logging.DEBUG):
             logger.debug(f'created facade: {type(facade)} ' +
                          f'from path: {model_path}')
         self.dealloc_resources.append(facade)
     return facade
Example #4
0
    def train_test(self):
        """Train, test the model, then dump the results with a graph.

        """
        with dealloc(self.create_facade()) as facade:
            facade.train()
            facade.test()
            facade.persist_result()
Example #5
0
 def _test_transform(self):
     with dealloc(self.create_facade()) as facade:
         model = facade.transformer_trainable_embedding_model
         doc = facade.doc_parser.parse(self.sent)
         tdoc = model.tokenize(doc)
         tdoc.write()
         arr: Tensor = model.transform(tdoc)
         print(arr.shape)
Example #6
0
 def _test_decode(self):
     with dealloc(self.create_facade()) as facade:
         sents = tuple(it.islice(facade.feature_stash.values(), 3))
         doc = FeatureDocument(sents)
         vec = facade.language_vectorizer_manager['syn']
         from zensols.util.log import loglevel
         with loglevel('zensols.deepnlp'):
             vec.encode(doc)
Example #7
0
    def train(self):
        """Train the model and dump the results, including a graph of the
        train/validation loss.

        """
        with dealloc(self.create_facade()) as facade:
            facade.train()
            facade.persist_result()
Example #8
0
    def train_production(self):
        """Train, test the model on train and test datasets, then dump the results with
        a graph.

        """
        with dealloc(self.create_facade()) as facade:
            facade.train_production()
            facade.test()
            facade.persist_result()
Example #9
0
    def debug(self, debug_value: int = None):
        """Debug the model.

        :param debug_value: the executor debugging level

        """
        debug_value = True if debug_value is None else debug_value
        with dealloc(self.create_facade()) as facade:
            facade.debug(debug_value)
Example #10
0
    def majority_label_metrics(self, res_id: str = None):
        """Show majority label metrics of the test dataset using a previous result set.

        :param res_id: the result ID or use the last if not given

        """
        with dealloc(self.create_facade()) as facade:
            pred_factory: PredictionsDataFrameFactory = \
                facade.get_predictions_factory(name=res_id)
            pred_factory.majority_label_metrics.write()
Example #11
0
    def result(self, res_id: str = None):
        """Show the last results.

        :param res_id: the result ID or use the last if not given

        """
        with dealloc(self.create_facade()) as facade:
            df_fac: PredictionsDataFrameFactory = \
                facade.get_predictions_factory(name=res_id)
            df_fac.result.write()
Example #12
0
    def test(self, model_path: Path = None):
        """Test an existing model the model and dump the results of the test.

        :param model_path: the path to the model or use the last trained model
                           if not provided

        """
        self.model_path = model_path
        with dealloc(self.create_facade()) as facade:
            facade.test()
Example #13
0
def load():
    Deallocatable.ALLOCATION_TRACKING = True
    from pathlib import Path
    path = Path('target/iris/model')
    with dealloc(IrisModelFacade.load_from_path(path)) as facade:
        facade.reload()
        facade.writer = None
        res = facade.test()
        res.write(include_converged=True)
        facade.plot_result(save=True)
Example #14
0
    def compare_results(self, res_id_a: str, res_id_b: str):
        """Compare two previous archived result sets.

        :param res_id_a: the first result ID to compare

        :param res_id_b: the second result ID to compare

        """
        with dealloc(self.create_facade()) as facade:
            rm: ModelResultComparer = facade.result_manager
            diff = ModelResultComparer(rm, res_id_a, res_id_b)
            diff.write()
Example #15
0
    def predict_text(self, text_input: str, verbose: bool = False):
        """Classify ad-hoc text and output the results..

        :param text_input: the sentence to classify or standard in if not given

        :param verbose: if given, print the long format version of the document

        """
        sents = self._get_sentences(text_input)
        with dealloc(self.create_facade()) as facade:
            docs: Tuple[FeatureDocument] = facade.predict(sents)
            for doc in docs:
                if verbose:
                    doc.write()
                else:
                    print(doc)
Example #16
0
    def predict_text(self, text_input: str, verbose: bool = False):
        """Classify ad-hoc text and output the results..

        :param text_input: the sentence to classify or standard in if not given

        :param verbose: if given, print the long format version of the document

        """
        sents = self._get_sentences(text_input)
        with dealloc(self.create_facade()) as facade:
            pred: Settings = facade.predict(sents)
            docs: Tuple[FeatureDocument] = pred.docs
            classes: Tuple[str] = pred.classes
            for labels, doc in zip(classes, docs):
                for label, tok in zip(labels, doc.token_iter()):
                    print(label, tok)
Example #17
0
    def result_summary(self, out_file: Path = None,
                       include_validation: bool = False):
        """Create a summary of all archived results.

        :param out_file: the output path

        :param validation: whether or not to include validation results

        """
        if out_file is None:
            out_file = Path('result-summary.csv')
        with dealloc(self.create_facade()) as facade:
            rm: ModelResultManager = facade.result_manager
            self._enable_cli_logging(facade)
            reporter = ModelResultReporter(rm)
            reporter.include_validation = include_validation
            reporter.dump(out_file)
Example #18
0
    def predict(self, sentence: str):
        """Predict several movie review test sentences.

        :param sentence: the sentence to classify

        """
        if sentence is None:
            sents = [
                "If you sometimes like to go to the movies to have fun , Wasabi is a good place to start .",
                'There are a few stabs at absurdist comedy ... but mostly the humor is of the sweet , gentle and occasionally cloying kind that has become an Iranian specialty .',
                'Terrible', 'Great movie', 'Wonderful, great, awesome, 100%',
                'Terrible, aweful, worst movie'
            ]
        else:
            sents = [sentence]
        with dealloc(self.create_facade()) as facade:
            docs: Tuple[Review] = facade.predict(sents)
            for doc in docs:
                doc.write()
Example #19
0
    def metrics(self, sort: str = 'wF1', res_id: str = None,
                out_file: Path = None):
        """Write a spreadhseet of label performance metrics for a previously trained
        and tested model.

        :param sort_col: the column to sort results

        :param res_id: the result ID or use the last if not given

        :param out_file: the output path

        """
        if out_file is None:
            out_file = Path('metrics.csv')
        with dealloc(self.create_facade()) as facade:
            df = facade.get_predictions_factory(name=res_id).metrics_dataframe
            df = df.sort_values(sort, ascending=False).reset_index(drop=True)
            df.to_csv(out_file)
            self._enable_cli_logging(facade)
            logger.info(f'wrote: {out_file}')
Example #20
0
    def predictions(self, res_id: str = None, out_file: Path = None):
        """Write predictions to a CSV file.

        :param res_id: the result ID or use the last if not given

        :param out_file: the output path

        """
        with dealloc(self.create_facade()) as facade:
            if out_file is None:
                out_file = Path(f'{facade.executor.model_name}.csv')
            try:
                df = facade.get_predictions(name=res_id)
            except ModelError as e:
                raise ApplicationError(
                    'Could not predict, probably need to train a model ' +
                    f'first: {e}') from e
            df.to_csv(out_file)
            self._enable_cli_logging(facade)
            if logger.isEnabledFor(logging.INFO):
                logger.info(f'wrote predictions: {out_file}')
Example #21
0
    def batch_sample(self):
        """Print what's contained in this app specific batch.

        """
        import numpy as np
        with dealloc(self.create_facade()) as facade:
            stash: BatchStash = facade.batch_stash
            batch: Batch
            for batch in it.islice(stash.values(), 3):
                classes = batch.get_label_classes()
                uks = np.unique(np.array(classes))
                if len(uks) > 1 or True:
                    print(batch.split_name)
                    batch.write()
                    print(classes)
                    print(batch.has_labels)
                    for dp in batch.data_points:
                        if len(dp.doc) > 1:
                            print(dp.doc.polarity)
                            for s in dp.doc:
                                print(s)
                            print('-' * 30)
Example #22
0
    def batch(self, limit: int = None, clear_type: ClearType = ClearType.none,
              split: bool = False):
        """Create batches if not already, print statistics on the dataset.

        :param clear_type: what to delete to force recreate

        :param limit: the number of batches to create

        :param split: also write the stratified splits if available

        """
        with dealloc(self.create_facade()) as facade:
            self._enable_cli_logging(facade)
            if clear_type == ClearType.batch:
                logger.info('clearing batches')
                facade.batch_stash.clear()
            elif clear_type == ClearType.source:
                facade.batch_stash.clear_all()
                facade.batch_stash.clear()
            facade.dataset_stash.write()
            if split:
                self._write_batch_splits(facade)
Example #23
0
 def assert_label_mapping(self):
     """Confirm the the mapping of the labels is correct."""
     with dealloc(self.create_facade()) as facade:
         facade.assert_label_mapping()
Example #24
0
 def stats(self):
     """Print out the corpus statistics."""
     with dealloc(self.create_facade()) as facade:
         facade.write_corpus_stats()
Example #25
0
 def result_ids(self):
     """Show all archived result IDs."""
     with dealloc(self.create_facade()) as facade:
         rm: ModelResultManager = facade.result_manager
         print('\n'.join(rm.results_stash.keys()))
Example #26
0
    def early_stop(self):
        """Stops the execution of training the model.

        """
        with dealloc(self.create_facade()) as facade:
            facade.stop_training()