def test_get_export_input_override(): project = rand_str() name = rand_str() output_dir = os.path.join(rand_str(), rand_str()) model_version = str(random.randint(1, 5)) exporter_type = rand_str() return_labels = random.choice([True, False]) is_remote = random.choice([True, False]) config = { 'project': rand_str(), 'name': rand_str(), 'output_dir': os.path.join(rand_str(), rand_str()), 'model_version': str(random.randint(1, 5)), 'exporter_type': rand_str(), 'return_labels': not return_labels, 'is_remote': not is_remote, } o, p, n, v, e, l, r = get_export_params(config, output_dir, project, name, model_version, exporter_type, return_labels, is_remote) assert o == output_dir assert p == project assert n == name assert v == model_version assert e == exporter_type assert l == return_labels assert r == is_remote
def test_get_export_str2bool_called(): return_labels = random.choice(['true', 'false']) is_remote = random.choice(['true', 'false']) with patch('mead.utils.str2bool') as b_patch: _ = get_export_params({}, return_labels=return_labels, is_remote=is_remote) assert b_patch.call_args_list == [call(return_labels), call(is_remote)]
def test_get_export_defaults(): o, p, n, v, e, l, r = get_export_params({}) assert o == './models' assert p is None assert n is None assert v is None assert e == 'default' assert l is False assert r is True
def test_get_export_config(): config = { 'project': rand_str(), 'name': rand_str(), 'output_dir': os.path.join(rand_str(), rand_str()), 'model_version': str(random.randint(1, 5)), 'exporter_type': rand_str(), 'return_labels': random.choice(['true', 'false']), 'is_remote': random.choice(['true', 'false']), } o, p, n, v, e, l, r = get_export_params(config) assert o == config['output_dir'] assert p == config['project'] assert n == config['name'] assert v == config['model_version'] assert e == config['exporter_type'] assert l == str2bool(config['return_labels']) assert r == str2bool(config['is_remote'])
def test(): in_ = make_data() c = make_data() config = { 'output_dir': c.dir, 'project': c.proj, 'name': c.name, 'model_version': c.version, 'exporter_type': rand_str(), 'return_labels': random.choice(['true', 'false']), 'is_remote': random.choice(['true', 'false']), } in_output, gold_output = choice(in_.dir, config, 'output_dir') gold_output = './models' if gold_output is None else gold_output in_project, gold_project = choice(in_.proj, config, 'project') in_name, gold_name = choice(in_.name, config, 'name') in_version, gold_version = choice(in_.version, config, 'model_version') in_export, gold_export = choice(rand_str(), config, 'exporter_type') gold_export = gold_export if gold_export is not None else 'default' in_labels, gold_labels = choice(random.choice(['true', 'false']), config, 'return_labels') gold_labels = str2bool( gold_labels) if gold_labels is not None else False in_remote, gold_remote = choice(random.choice(['true', 'false']), config, 'is_remote') gold_remote = str2bool( gold_remote) if gold_remote is not None else True o, p, n, v, e, l, r = get_export_params( config, in_output, in_project, in_name, in_version, in_export, in_labels, in_remote, ) assert o == gold_output assert p == gold_project assert n == gold_name assert v == gold_version assert e == gold_export assert l == gold_labels assert r == gold_remote
def test_get_export_output_expanded(): output_dir = "~/example" gold_output_dir = os.path.expanduser(output_dir) o, _, _, _, _, _, _ = get_export_params({}, output_dir) assert o == gold_output_dir
def test_get_export_type_in_config(): config = {'type': rand_str()} _, _, _, _, e, _, _ = get_export_params(config) assert e == config['type']
def main(): parser = argparse.ArgumentParser(description='Export a model') parser.add_argument('--config', help='configuration for an experiment', required=True, type=convert_path) parser.add_argument('--settings', help='configuration for mead', required=False, default=DEFAULT_SETTINGS_LOC, type=convert_path) parser.add_argument('--modules', help='modules to load', default=[], nargs='+', required=False) parser.add_argument('--datasets', help='json library of dataset labels') parser.add_argument('--vecs', help='index of vectorizers: local file, remote URL or hub mead-ml/ref', default='config/vecs.json', type=convert_path) parser.add_argument('--logging', help='json file for logging', default='config/logging.json', type=convert_path) parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm']) parser.add_argument('--exporter_type', help="exporter type (default 'default')", default=None) parser.add_argument('--return_labels', help='if true, the exported model returns actual labels else ' 'the indices for labels vocab (default False)', default=None) parser.add_argument('--model', help='model name', required=True, type=unzip_files) parser.add_argument('--model_version', help='model_version', default=None) parser.add_argument('--output_dir', help="output dir (default './models')", default=None) parser.add_argument('--project', help='Name of project, used in path first', default=None) parser.add_argument('--name', help='Name of the model, used second in the path', default=None) parser.add_argument('--beam', help='beam_width', default=30, type=int) parser.add_argument('--nbest_input', help='Is the input to this model N-best', default=False, type=str2bool) parser.add_argument('--is_remote', help='if True, separate items for remote server and client. If False bundle everything together (default True)', default=None) parser.add_argument('--backend', help='The deep learning backend to use') parser.add_argument('--reporting', help='reporting hooks', nargs='+') parser.add_argument('--use_version', help='Should we use the version?', type=str2bool, default=True) parser.add_argument('--use_all_features', help='If a feature is found via vectorizer and not in embeddings, should we include it?', type=str2bool, default=False) parser.add_argument('--zip', help='Should we zip the results?', type=str2bool, default=False) args, overrides = parser.parse_known_args() configure_logger(args.logging) config_params = read_config_stream(args.config) config_params = parse_and_merge_overrides(config_params, overrides, pre='x') try: args.settings = read_config_stream(args.settings) except Exception: logger.warning('Warning: no mead-settings file was found at [{}]'.format(args.settings)) args.settings = {} task_name = config_params.get('task', 'classify') if args.task is None else args.task # Remove multigpu references os.environ['CUDA_VISIBLE_DEVICES'] = "" os.environ['NV_GPU'] = "" if 'gpus' in config_params.get('train', {}): del config_params['train']['gpus'] if task_name == 'seq2seq' and 'beam' not in config_params: config_params['beam'] = args.beam config_params['modules'] = config_params.get('modules', []) + args.modules if args.backend is not None: config_params['backend'] = normalize_backend(args.backend) cmd_hooks = args.reporting if args.reporting is not None else [] config_hooks = config_params.get('reporting') if config_params.get('reporting') is not None else [] reporting = parse_extra_args(set(chain(cmd_hooks, config_hooks)), overrides) config_params['reporting'] = reporting args.vecs = read_config_stream(args.vecs) task = mead.Task.get_task_specific(task_name, args.settings) output_dir, project, name, model_version, exporter_type, return_labels, is_remote = get_export_params( config_params.get('export', {}), args.output_dir, args.project, args.name, args.model_version, args.exporter_type, args.return_labels, args.is_remote, ) # Here we reuse code in `.read_config` which needs a dataset index (when used with mead-train) # but when used with mead-export it is not needed. This is a dummy dataset index that will work # It means we don't need to pass it in datasets = [{'label': config_params['dataset']}] task.read_config(config_params, datasets, args.vecs, exporter_type=exporter_type) feature_exporter_field_map = create_feature_exporter_field_map(config_params['features']) exporter = create_exporter(task, exporter_type, return_labels=return_labels, feature_exporter_field_map=feature_exporter_field_map, nbest_input=args.nbest_input) exporter.run(args.model, output_dir, project, name, model_version, remote=is_remote, use_version=args.use_version, zip_results=args.zip, use_all_features=args.use_all_features)
def main(): parser = argparse.ArgumentParser( description='Create an Embeddings Service') parser.add_argument('--config', help='JSON Configuration for an experiment', type=convert_path, default="$MEAD_CONFIG") parser.add_argument('--settings', help='JSON Configuration for mead', default='config/mead-settings.json', type=convert_path) parser.add_argument('--datasets', help='json library of dataset labels', default='config/datasets.json', type=convert_path) parser.add_argument('--embeddings', help='json library of embeddings', default='config/embeddings.json', type=convert_path) parser.add_argument('--backend', help='The deep learning backend to use') parser.add_argument('--export', help='Should this create a export bundle?', default=True, type=str2bool) parser.add_argument('--exporter_type', help="exporter type (default 'default')", default=None) parser.add_argument('--model_version', help='model_version', default=None) parser.add_argument('--output_dir', help="output dir (default './models')", default=None) parser.add_argument('--project', help='Name of project, used in path first', default=None) parser.add_argument('--name', help='Name of the model, used second in the path', default=None) parser.add_argument( '--is_remote', help= 'if True, separate items for remote server and client. If False bundle everything together (default True)', default=None) args, reporting_args = parser.parse_known_args() config_params = read_config_stream(args.config) try: args.settings = read_config_stream(args.settings) except: logger.warning( 'Warning: no mead-settings file was found at [{}]'.format( args.settings)) args.settings = {} args.datasets = read_config_stream(args.datasets) args.embeddings = read_config_stream(args.embeddings) if args.backend is not None: config_params['backend'] = normalize_backend(args.backend) os.environ['CUDA_VISIBLE_DEVICES'] = "" os.environ['NV_GPU'] = "" if 'gpus' in config_params.get('train', {}): del config_params['train']['gpus'] config_params['task'] = 'servable_embeddings' task = mead.Task.get_task_specific(config_params['task'], args.settings) task.read_config(config_params, args.datasets, reporting_args=[], config_file=deepcopy(config_params)) task.initialize(args.embeddings) to_zip = False if args.export else True task.train(None, zip_model=to_zip) if args.export: model = os.path.abspath(task.get_basedir()) output_dir, project, name, model_version, exporter_type, return_labels, is_remote = get_export_params( config_params.get('export', {}), args.output_dir, args.project, args.name, args.model_version, args.exporter_type, False, args.is_remote, ) feature_exporter_field_map = create_feature_exporter_field_map( config_params['features']) exporter = create_exporter( task, exporter_type, return_labels=return_labels, feature_exporter_field_map=feature_exporter_field_map) exporter.run(model, output_dir, project, name, model_version, remote=is_remote)
def main(): parser = argparse.ArgumentParser(description='Export a model') parser.add_argument('--config', help='JSON Configuration for an experiment', required=True, type=convert_path) parser.add_argument('--settings', help='JSON Configuration for mead', required=False, default='config/mead-settings.json', type=convert_path) parser.add_argument('--modules', help='modules to load', default=[], nargs='+', required=False) parser.add_argument('--datasets', help='json library of dataset labels', default='config/datasets.json', type=convert_path) parser.add_argument('--logging', help='json file for logging', default='config/logging.json', type=convert_path) parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm']) parser.add_argument('--exporter_type', help="exporter type (default 'default')", default=None) parser.add_argument('--return_labels', help='if true, the exported model returns actual labels else ' 'the indices for labels vocab (default False)', default=None) parser.add_argument('--model', help='model name', required=True, type=unzip_files) parser.add_argument('--model_version', help='model_version', default=None) parser.add_argument('--output_dir', help="output dir (default './models')", default=None) parser.add_argument('--project', help='Name of project, used in path first', default=None) parser.add_argument('--name', help='Name of the model, used second in the path', default=None) parser.add_argument('--beam', help='beam_width', default=30, type=int) parser.add_argument('--is_remote', help='if True, separate items for remote server and client. If False bundle everything together (default True)', default=None) args = parser.parse_args() configure_logger(args.logging) config_params = read_config_file(args.config) try: args.settings = read_config_stream(args.settings) except: logger.warning('Warning: no mead-settings file was found at [{}]'.format(args.settings)) args.settings = {} task_name = config_params.get('task', 'classify') if args.task is None else args.task # Remove multigpu references os.environ['CUDA_VISIBLE_DEVICES'] = "" os.environ['NV_GPU'] = "" if 'gpus' in config_params.get('train', {}): del config_params['train']['gpus'] if task_name == 'seq2seq' and 'beam' not in config_params: config_params['beam'] = args.beam config_params['modules'] = config_params.get('modules', []) + args.modules task = mead.Task.get_task_specific(task_name, args.settings) output_dir, project, name, model_version, exporter_type, return_labels, is_remote = get_export_params( config_params.get('export', {}), args.output_dir, args.project, args.name, args.model_version, args.exporter_type, args.return_labels, args.is_remote, ) task.read_config(config_params, args.datasets, exporter_type=exporter_type) feature_exporter_field_map = create_feature_exporter_field_map(config_params['features']) exporter = create_exporter(task, exporter_type, return_labels=return_labels, feature_exporter_field_map=feature_exporter_field_map) exporter.run(args.model, output_dir, project, name, model_version, remote=is_remote)