Exemple #1
0
def get_export_params(
        config,
        output_dir=None,
        project=None, name=None,
        model_version=None,
        exporter_type=None,
        return_labels=None,
        is_remote=None,
):
    """Combine export parameters from the config file and cli arguments.

    :param config: `dict` The export block of the config.
    :param output_dir: `str` The base of export paths. (defaults to './models')
    :param project: `str` The name of the project this model is for.
    :param name: `str` The name of this model (often the use case for it, `ner`, `intent` etc).
    :param model_version: `str` The version of this model.
    :param exporter_type: `str` The name of the exporter to use (defaults to 'default')
    :param return_labels: `str` Should labels be returned? (defaults to False)
    :param is_remote: `str` Should the bundle be split into client and server dirs.

    :returns: `Tuple[str, str, str, str, str, bool, bool]`
        The output_dir, project, name, model_version, exporter_type, return_labels, and remote
    """
    project = project if project is not None else config.get('project')
    name = name if name is not None else config.get('name')
    output_dir = output_dir if output_dir is not None else config.get('output_dir', './models')
    output_dir = os.path.expanduser(output_dir)
    model_version = model_version if model_version is not None else config.get('model_version')
    exporter_type = exporter_type if exporter_type is not None else config.get('type', config.get('exporter_type', 'default'))
    return_labels = return_labels if return_labels is not None else config.get('return_labels', False)
    return_labels = str2bool(return_labels)
    is_remote = is_remote if is_remote is not None else config.get('is_remote', True)
    is_remote = str2bool(is_remote)
    return output_dir, project, name, model_version, exporter_type, return_labels, is_remote
Exemple #2
0
def _infer_type_or_str(x):
    try:
        return str2bool(x)
    except:
        try:
            return float(x)
        except ValueError:
            return x
Exemple #3
0
def _infer_type_or_str(x):
    try:
        return str2bool(x)
    except:
        try:
            return float(x)
        except ValueError:
            return x
def test_get_export_config():
    config = {
        'project': rand_str(),
        'name': rand_str(),
        'output_dir': os.path.join(rand_str(), rand_str()),
        'model_version': str(random.randint(1, 5)),
        'exporter_type': rand_str(),
        'return_labels': random.choice(['true', 'false']),
        'is_remote': random.choice(['true', 'false']),
    }
    o, p, n, v, e, l, r = get_export_params(config)
    assert o == config['output_dir']
    assert p == config['project']
    assert n == config['name']
    assert v == config['model_version']
    assert e == config['exporter_type']
    assert l == str2bool(config['return_labels'])
    assert r == str2bool(config['is_remote'])
Exemple #5
0
 def test():
     in_ = make_data()
     c = make_data()
     config = {
         'output_dir': c.dir,
         'project': c.proj,
         'name': c.name,
         'model_version': c.version,
         'exporter_type': rand_str(),
         'return_labels': random.choice(['true', 'false']),
         'is_remote': random.choice(['true', 'false']),
     }
     in_output, gold_output = choice(in_.dir, config, 'output_dir')
     gold_output = './models' if gold_output is None else gold_output
     in_project, gold_project = choice(in_.proj, config, 'project')
     in_name, gold_name = choice(in_.name, config, 'name')
     in_version, gold_version = choice(in_.version, config, 'model_version')
     in_export, gold_export = choice(rand_str(), config, 'exporter_type')
     gold_export = gold_export if gold_export is not None else 'default'
     in_labels, gold_labels = choice(random.choice(['true', 'false']),
                                     config, 'return_labels')
     gold_labels = str2bool(
         gold_labels) if gold_labels is not None else False
     in_remote, gold_remote = choice(random.choice(['true', 'false']),
                                     config, 'is_remote')
     gold_remote = str2bool(
         gold_remote) if gold_remote is not None else True
     o, p, n, v, e, l, r = get_export_params(
         config,
         in_output,
         in_project,
         in_name,
         in_version,
         in_export,
         in_labels,
         in_remote,
     )
     assert o == gold_output
     assert p == gold_project
     assert n == gold_name
     assert v == gold_version
     assert e == gold_export
     assert l == gold_labels
     assert r == gold_remote
Exemple #6
0
    def load(self, task_name=None):
        if self.name == 'tf':
            from eight_mile.tf.layers import set_tf_log_level, set_tf_eager_debug
            set_tf_log_level(os.getenv("MEAD_TF_LOG_LEVEL", "ERROR"))
            set_tf_eager_debug(str2bool(os.getenv("MEAD_TF_EAGER_DEBUG", "FALSE")))

        base_pkg_name = 'baseline.{}'.format(self.name)
        # Backends may not be downloaded to the cache, they must exist locally
        mod = import_user_module(base_pkg_name)
        import_user_module('baseline.{}.optz'.format(self.name))
        import_user_module('baseline.{}.embeddings'.format(self.name))
        import_user_module('mead.{}.exporters'.format(self.name))
        if task_name is not None:
            try:
                import_user_module(f'{base_pkg_name}.{task_name}')
            except:
                logger.warning(f"No module found [{base_pkg_name}.{task_name}]")
        self.transition_mask = mod.transition_mask
Exemple #7
0
    def train(self, checkpoint=None):
        """This method delegates to several sub-hooks in order to complete training.
        1. call `_load_dataset()` which initializes the `DataFeed` fields of this class
        2. call `baseline.save_vectorizers()` which write out the bound `vectorizers` fields to a file in the `basedir`
        3. call `baseline.train.fit()` which executes the training procedure and  yields a saved model
        4. call `baseline.zip_files()` which zips all files in the `basedir` with the same `PID` as this process
        5. call `_close_reporting_hooks()` which lets the reporting hooks know that the job is finished
        :return: Nothing
        """
        self._reorganize_params()
        baseline.save_vectorizers(self.get_basedir(), self.vectorizers)
        self._load_dataset()

        model_params = self.config_params['model']
        model_params['features'] = self._get_features()
        model_params['labels'] = self._get_labels()
        model_params['task'] = self.task_name()
        train_params = self.config_params['train']
        train_params['checkpoint'] = checkpoint
        baseline.train.fit(model_params, self.train_data, self.valid_data, self.test_data, **train_params)
        if str2bool(self.config_params.get('zip_checkpoint', True)):
            baseline.zip_files(self.get_basedir())
        self._close_reporting_hooks()