def main(): """Driver program to execute chores.""" parser = argparse.ArgumentParser(description='Run chores') parser.add_argument('file', help='A chore YAML file.') parser.add_argument('--cred', help='cred file', default="/etc/odind/odin-cred.yml") parser.add_argument('--label', required=True) parser.add_argument('--modules', nargs='+', default=[], help='Addon modules to load') args = parser.parse_args() for addon in args.modules: import_user_module(addon) cred_params = read_config_stream(args.cred) store = create_store_backend(**cred_params['jobs_db']) config = read_config_stream(args.file) previous = store.get_previous(args.label) parent_details = store.get_parent(args.label) results = {prev_job_details['name']: prev_job_details for prev_job_details in previous} results['parent'] = parent_details results = run_chores(config, results) results = {'chore_context': results} LOGGER.info(results) job_details = store.get(args.label) job_details.update(results) store.set(job_details)
def main(): parser = argparse.ArgumentParser(description='Create an Embeddings Service') parser.add_argument('--config', help='JSON Configuration for an experiment', type=convert_path, default="$MEAD_CONFIG") parser.add_argument('--settings', help='JSON Configuration for mead', default='config/mead-settings.json', type=convert_path) parser.add_argument('--datasets', help='json library of dataset labels', default='config/datasets.json', type=convert_path) parser.add_argument('--embeddings', help='json library of embeddings', default='config/embeddings.json', type=convert_path) parser.add_argument('--backend', help='The deep learning backend to use') parser.add_argument('--export', help='Should this create a export bundle?', default=True, type=str2bool) parser.add_argument('--exporter_type', help="exporter type (default 'default')", default=None) parser.add_argument('--model_version', help='model_version', default=None) parser.add_argument('--output_dir', help="output dir (default './models')", default=None) parser.add_argument('--project', help='Name of project, used in path first', default=None) parser.add_argument('--name', help='Name of the model, used second in the path', default=None) parser.add_argument('--is_remote', help='if True, separate items for remote server and client. If False bundle everything together (default True)', default=None) args, reporting_args = parser.parse_known_args() config_params = read_config_stream(args.config) try: args.settings = read_config_stream(args.settings) except: logger.warning('Warning: no mead-settings file was found at [{}]'.format(args.settings)) args.settings = {} args.datasets = read_config_stream(args.datasets) args.embeddings = read_config_stream(args.embeddings) if args.backend is not None: config_params['backend'] = normalize_backend(args.backend) os.environ['CUDA_VISIBLE_DEVICES'] = "" os.environ['NV_GPU'] = "" if 'gpus' in config_params.get('train', {}): del config_params['train']['gpus'] config_params['task'] = 'servable-embeddings' task = mead.Task.get_task_specific(config_params['task'], args.settings) task.read_config(config_params, args.datasets, reporting_args=[], config_file=deepcopy(config_params)) task.initialize(args.embeddings) to_zip = False if args.export else True task.train(None, zip_model=to_zip) if args.export: model = os.path.abspath(task.get_basedir()) output_dir, project, name, model_version, exporter_type, return_labels, is_remote = get_export_params( config_params.get('export', {}), args.output_dir, args.project, args.name, args.model_version, args.exporter_type, False, args.is_remote, ) feature_exporter_field_map = create_feature_exporter_field_map(config_params['features']) exporter = create_exporter(task, exporter_type, return_labels=return_labels, feature_exporter_field_map=feature_exporter_field_map) exporter.run(model, output_dir, project, name, model_version, remote=is_remote)
def main(): parser = argparse.ArgumentParser(description='Train a text classifier') parser.add_argument('--config', help='JSON Configuration for an experiment', type=convert_path, default="$MEAD_CONFIG") parser.add_argument('--settings', help='JSON Configuration for mead', default='config/mead-settings.json', type=convert_path) parser.add_argument('--datasets', help='json library of dataset labels', default='config/datasets.json', type=convert_path) parser.add_argument('--embeddings', help='json library of embeddings', default='config/embeddings.json', type=convert_path) parser.add_argument('--logging', help='json file for logging', default='config/logging.json', type=convert_path) parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm']) parser.add_argument('--gpus', help='Number of GPUs (defaults to 1)', type=int) parser.add_argument('--reporting', help='reporting hooks', nargs='+') args, reporting_args = parser.parse_known_args() config_params = read_config_stream(args.config) args.settings = read_config_stream(args.settings) args.datasets = read_config_stream(args.datasets) args.embeddings = read_config_stream(args.embeddings) args.logging = read_config_stream(args.logging) if args.gpus is not None: config_params['model']['gpus'] = args.gpus if args.reporting is not None: reporting = parse_extra_args(args.reporting, reporting_args) config_params['reporting'] = reporting task_name = config_params.get( 'task', 'classify') if args.task is None else args.task print('Task: [{}]'.format(task_name)) task = mead.Task.get_task_specific(task_name, args.logging, args.settings) task.read_config(config_params, args.datasets, reporting_args=reporting_args, config_file=args.config) task.initialize(args.embeddings) task.train()
def main(): parser = argparse.ArgumentParser(description="Get the mead hash of a config.") parser.add_argument('config', help='JSON/YML Configuration for an experiment: local file or remote URL', type=convert_path, default="$MEAD_CONFIG") args = parser.parse_args() config = read_config_stream(args.config) print(hash_config(config))
def main(): """Clean up a job """ parser = argparse.ArgumentParser(description='Clean up a job') parser.add_argument('work', help='Job') parser.add_argument('--cred', help='cred file', type=convert_path, required=True) parser.add_argument('--db', action='store_true', help="Also remove from the jobs db") parser.add_argument('--fs', action='store_true', help="Also remove from the filesystem") parser.add_argument('--data_dir', help="The root of where data is saved.") args = parser.parse_args() cred_params = read_config_stream(args.cred) store = create_store_backend(**cred_params['jobs_db']) cleaned = cleanup(args.work, store, purge_db=args.db, purge_fs=args.fs, data_dir=args.data_dir) print("Results of this request:") print_table(cleaned)
def main(): """Take in a job and get back its status TODO: support passing in specific Job IDs and regex """ parser = argparse.ArgumentParser(description='Get job status') parser.add_argument('work', help='Pipeline or Job') parser.add_argument('--cred', help='cred file', type=convert_path, required=True) parser.add_argument('--format', help='Format the output', default="human") parser.add_argument('--columns', nargs="+", default=[], help="Columns of the status to show.") parser.add_argument('--all', action='store_true', help="Show all columns of the status message.") args = parser.parse_args() cred_params = read_config_stream(args.cred) store = create_store_backend(**cred_params['jobs_db']) work = store.parents_like(args.work) if not work: print('No job found') for parent in work: try: show_status(*get_status(parent, store), columns=set(args.columns), all_cols=args.all) except Exception: print('ERROR: Skipping {}'.format(parent))
def main(): parser = argparse.ArgumentParser(description='Train a text classifier') parser.add_argument('--config', help='JSON Configuration for an experiment', type=convert_path, default="$MEAD_CONFIG") parser.add_argument('--settings', help='JSON Configuration for mead', default='config/mead-settings.json', type=convert_path) parser.add_argument('--datasets', help='json library of dataset labels', default='config/datasets.json', type=convert_path) parser.add_argument('--embeddings', help='json library of embeddings', default='config/embeddings.json', type=convert_path) parser.add_argument('--logging', help='json file for logging', default='config/logging.json', type=convert_path) parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm']) parser.add_argument('--gpus', help='Number of GPUs (defaults to number available)', type=int, default=-1) parser.add_argument('--basedir', help='Override the base directory where models are stored', type=str) parser.add_argument('--reporting', help='reporting hooks', nargs='+') parser.add_argument('--backend', help='The deep learning backend to use') parser.add_argument('--checkpoint', help='Restart training from this checkpoint') args, reporting_args = parser.parse_known_args() args.logging = read_config_stream(args.logging) configure_logger(args.logging) config_params = read_config_stream(args.config) try: args.settings = read_config_stream(args.settings) except: logger.warning('Warning: no mead-settings file was found at [{}]'.format(args.settings)) args.settings = {} args.datasets = read_config_stream(args.datasets) args.embeddings = read_config_stream(args.embeddings) if args.gpus is not None: config_params['model']['gpus'] = args.gpus if args.basedir is not None: config_params['basedir'] = args.basedir if args.backend is not None: config_params['backend'] = normalize_backend(args.backend) cmd_hooks = args.reporting if args.reporting is not None else [] config_hooks = config_params.get('reporting') if config_params.get('reporting') is not None else [] reporting = parse_extra_args(set(chain(cmd_hooks, config_hooks)), reporting_args) config_params['reporting'] = reporting task_name = config_params.get('task', 'classify') if args.task is None else args.task logger.info('Task: [{}]'.format(task_name)) task = mead.Task.get_task_specific(task_name, args.settings) task.read_config(config_params, args.datasets, reporting_args=reporting_args, config_file=deepcopy(config_params)) task.initialize(args.embeddings) task.train(args.checkpoint)
def main(): parser = argparse.ArgumentParser(description='Train a text classifier') parser.add_argument('--config', help='JSON Configuration for an experiment', type=convert_path, default="$MEAD_CONFIG") parser.add_argument('--settings', help='JSON Configuration for mead', default='config/mead-settings.json', type=convert_path) parser.add_argument('--datasets', help='json library of dataset labels', default='config/datasets.json', type=convert_path) parser.add_argument('--embeddings', help='json library of embeddings', default='config/embeddings.json', type=convert_path) parser.add_argument('--logging', help='json file for logging', default='config/logging.json', type=convert_path) parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm']) parser.add_argument('--backend', help='The deep learning backend to use') parser.add_argument('--num_iters', type=int, default=5) parser.add_argument('--max_lr', type=float, default=10) parser.add_argument('--smooth', type=float, default=0.05) parser.add_argument('--use_val', type=str2bool, default=False) parser.add_argument('--log', type=str2bool, default=True) parser.add_argument('--diverge_threshold', type=int, default=5) args, reporting_args = parser.parse_known_args() config_params = read_config_stream(args.config) try: args.settings = read_config_stream(args.settings) except: print('Warning: no mead-settings file was found at [{}]'.format(args.config)) args.settings = {} args.datasets = read_config_stream(args.datasets) args.embeddings = read_config_stream(args.embeddings) args.logging = read_config_stream(args.logging) if args.backend is not None: config_params['backend'] = normalize_backend(args.backend) config_params['reporting'] = {} config_params['train']['fit_func'] = "lr-find" config_params['train']['lr_scheduler_type'] = 'warmup_linear' config_params['train']['smooth_beta'] = args.smooth config_params['train']['use_val'] = args.use_val config_params['train']['log_scale'] = args.log config_params['train']['diverge_threshold'] = args.diverge_threshold config_params['train']['be'] = config_params['backend'] task_name = config_params.get('task', 'classify') if args.task is None else args.task print('Task: [{}]'.format(task_name)) task = mead.Task.get_task_specific(task_name, args.logging, args.settings) task.read_config(config_params, args.datasets, reporting_args=reporting_args, config_file=deepcopy(config_params)) task.initialize(args.embeddings) task.train()
def main(): parser = argparse.ArgumentParser(description='Train a text classifier') parser.add_argument('--config', help='JSON Configuration for an experiment', type=convert_path, default="$MEAD_CONFIG") parser.add_argument('--settings', help='JSON Configuration for mead', default='config/mead-settings.json', type=convert_path) parser.add_argument('--datasets', help='json library of dataset labels', default='config/datasets.json', type=convert_path) parser.add_argument('--embeddings', help='json library of embeddings', default='config/embeddings.json', type=convert_path) parser.add_argument('--logging', help='json file for logging', default='config/logging.json', type=convert_path) parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm']) parser.add_argument('--backend', help='The deep learning backend to use') parser.add_argument('--num_iters', type=int, default=5) parser.add_argument('--max_lr', type=float, default=10) parser.add_argument('--smooth', type=float, default=0.05) parser.add_argument('--use_val', type=str2bool, default=False) parser.add_argument('--log', type=str2bool, default=True) parser.add_argument('--diverge_threshold', type=int, default=5) args, reporting_args = parser.parse_known_args() config_params = read_config_stream(args.config) try: args.settings = read_config_stream(args.settings) except: print('Warning: no mead-settings file was found at [{}]'.format(args.config)) args.settings = {} args.datasets = read_config_stream(args.datasets) args.embeddings = read_config_stream(args.embeddings) args.logging = read_config_stream(args.logging) if args.backend is not None: config_params['backend'] = normalize_backend(args.backend) config_params['reporting'] = {} config_params['train']['fit_func'] = "lr-find" config_params['train']['lr_scheduler_type'] = 'warmup_linear' config_params['train']['smooth_beta'] = args.smooth config_params['train']['use_val'] = args.use_val config_params['train']['log_scale'] = args.log config_params['train']['diverge_threshold'] = args.diverge_threshold config_params['train']['be'] = config_params['backend'] task_name = config_params.get('task', 'classify') if args.task is None else args.task print('Task: [{}]'.format(task_name)) task = mead.Task.get_task_specific(task_name, args.logging, args.settings) task.read_config(config_params, args.datasets, reporting_args=reporting_args, config_file=deepcopy(config_params)) task.initialize(args.embeddings) task.train()
def test_read_config_stream_env(env, gold_data): data = read_config_stream(env) assert data == gold_data
def test_read_config_stream_file(): file_name = os.path.join(data_loc, 'test_json.json') with mock.patch('baseline.utils.read_config_file') as read_patch: read_config_stream(file_name) read_patch.assert_called_once_with(file_name)
def main(): parser = argparse.ArgumentParser(description='Train a text classifier') parser.add_argument('--config', help='JSON Configuration for an experiment', type=convert_path, default="$MEAD_CONFIG") parser.add_argument('--settings', help='JSON Configuration for mead', default='config/mead-settings.json', type=convert_path) parser.add_argument('--datasets', help='json library of dataset labels', default='config/datasets.json', type=convert_path) parser.add_argument('--embeddings', help='json library of embeddings', default='config/embeddings.json', type=convert_path) parser.add_argument('--logging', help='json file for logging', default='config/logging.json', type=convert_path) parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm']) parser.add_argument('--gpus', help='Number of GPUs (defaults to number available)', type=int, default=-1) parser.add_argument('--reporting', help='reporting hooks', nargs='+') parser.add_argument('--backend', help='The deep learning backend to use') args, reporting_args = parser.parse_known_args() config_params = read_config_stream(args.config) try: args.settings = read_config_stream(args.settings) except: print('Warning: no mead-settings file was found at [{}]'.format( args.config)) args.settings = {} args.datasets = read_config_stream(args.datasets) args.embeddings = read_config_stream(args.embeddings) args.logging = read_config_stream(args.logging) if args.gpus is not None: config_params['model']['gpus'] = args.gpus if args.backend is not None: config_params['backend'] = normalize_backend(args.backend) cmd_hooks = args.reporting if args.reporting is not None else [] config_hooks = config_params.get('reporting') if config_params.get( 'reporting') is not None else [] reporting = parse_extra_args(set(chain(cmd_hooks, config_hooks)), reporting_args) config_params['reporting'] = reporting task_name = config_params.get( 'task', 'classify') if args.task is None else args.task print('Task: [{}]'.format(task_name)) task = mead.Task.get_task_specific(task_name, args.logging, args.settings) task.read_config(config_params, args.datasets, reporting_args=reporting_args, config_file=deepcopy(config_params)) task.initialize(args.embeddings) task.train()
def main(): parser = argparse.ArgumentParser(description='Train a text classifier') parser.add_argument('--config', help='configuration for an experiment', type=convert_path, default="$MEAD_CONFIG") parser.add_argument('--settings', help='configuration for mead', default=DEFAULT_SETTINGS_LOC, type=convert_path) parser.add_argument('--datasets', help='index of dataset labels', type=convert_path) parser.add_argument('--modules', help='modules to load', default=[], nargs='+', required=False) parser.add_argument('--mod_train_file', help='override the training set') parser.add_argument('--mod_valid_file', help='override the validation set') parser.add_argument('--mod_test_file', help='override the test set') parser.add_argument('--embeddings', help='index of embeddings', type=convert_path) parser.add_argument('--logging', help='config file for logging', default=DEFAULT_LOGGING_LOC, type=convert_path) parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm']) parser.add_argument('--gpus', help='Number of GPUs (defaults to number available)', type=int, default=-1) parser.add_argument( '--basedir', help='Override the base directory where models are stored', type=str) parser.add_argument('--reporting', help='reporting hooks', nargs='+') parser.add_argument('--backend', help='The deep learning backend to use') parser.add_argument('--checkpoint', help='Restart training from this checkpoint') args, reporting_args = parser.parse_known_args() config_params = read_config_stream(args.config) if args.basedir is not None: config_params['basedir'] = args.basedir task_name = config_params.get( 'task', 'classify') if args.task is None else args.task args.logging = read_config_stream(args.logging) configure_logger(args.logging, config_params.get('basedir', './{}'.format(task_name))) try: args.settings = read_config_stream(args.settings) except: logger.warning( 'Warning: no mead-settings file was found at [{}]'.format( args.settings)) args.settings = {} args.datasets = args.datasets if args.datasets else args.settings.get( 'datasets', convert_path(DEFAULT_DATASETS_LOC)) args.datasets = read_config_stream(args.datasets) if args.mod_train_file or args.mod_valid_file or args.mod_test_file: logging.warning( 'Warning: overriding the training/valid/test data with user-specified files' ' different from what was specified in the dataset index. Creating a new key for this entry' ) update_datasets(args.datasets, config_params, args.mod_train_file, args.mod_valid_file, args.mod_test_file) args.embeddings = args.embeddings if args.embeddings else args.settings.get( 'embeddings', convert_path(DEFAULT_EMBEDDINGS_LOC)) args.embeddings = read_config_stream(args.embeddings) if args.gpus is not None: config_params['model']['gpus'] = args.gpus if args.backend is None and 'backend' in args.settings: args.backend = args.settings['backend'] if args.backend is not None: config_params['backend'] = normalize_backend(args.backend) config_params['modules'] = list( set(chain(config_params.get('modules', []), args.modules))) cmd_hooks = args.reporting if args.reporting is not None else [] config_hooks = config_params.get('reporting') if config_params.get( 'reporting') is not None else [] reporting = parse_extra_args(set(chain(cmd_hooks, config_hooks)), reporting_args) config_params['reporting'] = reporting logger.info('Task: [{}]'.format(task_name)) task = mead.Task.get_task_specific(task_name, args.settings) task.read_config(config_params, args.datasets, reporting_args=reporting_args) task.initialize(args.embeddings) task.train(args.checkpoint)
def main(): parser = argparse.ArgumentParser(description='Export a model') parser.add_argument('--config', help='configuration for an experiment', required=True, type=convert_path) parser.add_argument('--settings', help='configuration for mead', required=False, default=DEFAULT_SETTINGS_LOC, type=convert_path) parser.add_argument('--modules', help='modules to load', default=[], nargs='+', required=False) parser.add_argument('--logging', help='json file for logging', default=DEFAULT_LOGGING_LOC, type=convert_path) parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm']) parser.add_argument('--exporter_type', help="exporter type (default 'default')", default=None) parser.add_argument( '--return_labels', help='if true, the exported model returns actual labels else ' 'the indices for labels vocab (default False)', default=None) parser.add_argument('--model', help='model name', required=True, type=unzip_files) parser.add_argument('--model_version', help='model_version', default=None) parser.add_argument('--output_dir', help="output dir (default './models')", default=None) parser.add_argument('--project', help='Name of project, used in path first', default=None) parser.add_argument('--name', help='Name of the model, used second in the path', default=None) parser.add_argument('--beam', help='beam_width', default=30, type=int) parser.add_argument( '--is_remote', help= 'if True, separate items for remote server and client. If False bundle everything together (default True)', default=None) args = parser.parse_args() configure_logger(args.logging) config_params = read_config_stream(args.config) try: args.settings = read_config_stream(args.settings) except Exception: logger.warning( 'Warning: no mead-settings file was found at [{}]'.format( args.settings)) args.settings = {} task_name = config_params.get( 'task', 'classify') if args.task is None else args.task # Remove multigpu references os.environ['CUDA_VISIBLE_DEVICES'] = "" os.environ['NV_GPU'] = "" if 'gpus' in config_params.get('train', {}): del config_params['train']['gpus'] if task_name == 'seq2seq' and 'beam' not in config_params: config_params['beam'] = args.beam config_params['modules'] = config_params.get('modules', []) + args.modules task = mead.Task.get_task_specific(task_name, args.settings) output_dir, project, name, model_version, exporter_type, return_labels, is_remote = get_export_params( config_params.get('export', {}), args.output_dir, args.project, args.name, args.model_version, args.exporter_type, args.return_labels, args.is_remote, ) # Here we reuse code in `.read_config` which needs a dataset index (when used with mead-train) # but when used with mead-export it is not needed. This is a dummy dataset index that will work # It means we don't need to pass it in datasets = [{'label': config_params['dataset']}] task.read_config(config_params, datasets, exporter_type=exporter_type) feature_exporter_field_map = create_feature_exporter_field_map( config_params['features']) exporter = create_exporter( task, exporter_type, return_labels=return_labels, feature_exporter_field_map=feature_exporter_field_map) exporter.run(args.model, output_dir, project, name, model_version, remote=is_remote)
def main(): # pylint: disable=too-many-statements """Select a model for export if one meets the criteria """ parser = argparse.ArgumentParser( description='Select a model for export if one meets the criteria') parser.add_argument('--cred', help='cred file', default="/etc/odind/odin-cred.yml") parser.add_argument('--type', help='Policy type', required=True) parser.add_argument( '--label', required=True, help= "The odin task label for this selecting task, used to access the store" ) parser.add_argument('--models', required=True, nargs='+') parser.add_argument( '--dataset', help="(deprecated) The name of the dataset to evaluate", required=False) parser.add_argument('--task', required=False) parser.add_argument('--metric', default='acc') parser.add_argument('--user_cmp', default=None) parser.add_argument( '--config', help='(deprecated) JSON Configuration for an experiment', type=convert_path) parser.add_argument( '--settings', help='JSON Configuration for mead', required=False, default='config/mead-settings.json', type=convert_path, ) parser.add_argument('--datasets', help='(deprecated) json library of dataset labels', type=convert_path) parser.add_argument('--logging', help='json file for logging', default='config/logging.json', type=convert_path) parser.add_argument('--data_root', help='Data directory', default='/data') parser.add_argument('--xpctl_api_url', help='XPCTL api', type=str) args = parser.parse_args() if args.datasets is not None: LOGGER.warning( "--datasets is unused and unneeded for calls to `odin-select`") if args.config is not None: LOGGER.warning( "--config is unused and unneeded for calls to `odin-select`") if args.dataset is not None: LOGGER.warning( "--dataset is unused and unneeded for calls to `odin-select`") cred_params = read_config_stream(args.cred) store = create_store_backend(**cred_params['jobs_db']) args.store = store xpctl_url = args.xpctl_api_url if args.xpctl_api_url is not None else cred_params[ 'reporting_db']['host'] args.api = xpctl_client(host=xpctl_url) params = vars(args) del params['cred'] policy = create_export_policy(args.type, params) results = policy.select(args.models) if results: print(results) job_details = store.get(args.label) outputs = job_details.get("outputs", {}) if outputs is None: outputs = {} job_details['outputs'] = outputs outputs.update(results) store.set(job_details)
def test_read_config_stream_str(gold_data): input_ = json.dumps(gold_data) data = read_config_stream(input_) assert data == gold_data
def test_read_config_stream_env(env, gold_data): data = read_config_stream(env) assert data == gold_data
def test_read_config_stream_str(gold_data): input_ = json.dumps(gold_data) data = read_config_stream(input_) assert data == gold_data
def main(): parser = argparse.ArgumentParser(description='Export a model') parser.add_argument('--config', help='configuration for an experiment', required=True, type=convert_path) parser.add_argument('--settings', help='configuration for mead', required=False, default=DEFAULT_SETTINGS_LOC, type=convert_path) parser.add_argument('--modules', help='modules to load', default=[], nargs='+', required=False) parser.add_argument('--datasets', help='json library of dataset labels') parser.add_argument('--vecs', help='index of vectorizers: local file, remote URL or hub mead-ml/ref', default='config/vecs.json', type=convert_path) parser.add_argument('--logging', help='json file for logging', default='config/logging.json', type=convert_path) parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm']) parser.add_argument('--exporter_type', help="exporter type (default 'default')", default=None) parser.add_argument('--return_labels', help='if true, the exported model returns actual labels else ' 'the indices for labels vocab (default False)', default=None) parser.add_argument('--model', help='model name', required=True, type=unzip_files) parser.add_argument('--model_version', help='model_version', default=None) parser.add_argument('--output_dir', help="output dir (default './models')", default=None) parser.add_argument('--project', help='Name of project, used in path first', default=None) parser.add_argument('--name', help='Name of the model, used second in the path', default=None) parser.add_argument('--beam', help='beam_width', default=30, type=int) parser.add_argument('--nbest_input', help='Is the input to this model N-best', default=False, type=str2bool) parser.add_argument('--is_remote', help='if True, separate items for remote server and client. If False bundle everything together (default True)', default=None) parser.add_argument('--backend', help='The deep learning backend to use') parser.add_argument('--reporting', help='reporting hooks', nargs='+') parser.add_argument('--use_version', help='Should we use the version?', type=str2bool, default=True) parser.add_argument('--use_all_features', help='If a feature is found via vectorizer and not in embeddings, should we include it?', type=str2bool, default=False) parser.add_argument('--zip', help='Should we zip the results?', type=str2bool, default=False) args, overrides = parser.parse_known_args() configure_logger(args.logging) config_params = read_config_stream(args.config) config_params = parse_and_merge_overrides(config_params, overrides, pre='x') try: args.settings = read_config_stream(args.settings) except Exception: logger.warning('Warning: no mead-settings file was found at [{}]'.format(args.settings)) args.settings = {} task_name = config_params.get('task', 'classify') if args.task is None else args.task # Remove multigpu references os.environ['CUDA_VISIBLE_DEVICES'] = "" os.environ['NV_GPU'] = "" if 'gpus' in config_params.get('train', {}): del config_params['train']['gpus'] if task_name == 'seq2seq' and 'beam' not in config_params: config_params['beam'] = args.beam config_params['modules'] = config_params.get('modules', []) + args.modules if args.backend is not None: config_params['backend'] = normalize_backend(args.backend) cmd_hooks = args.reporting if args.reporting is not None else [] config_hooks = config_params.get('reporting') if config_params.get('reporting') is not None else [] reporting = parse_extra_args(set(chain(cmd_hooks, config_hooks)), overrides) config_params['reporting'] = reporting args.vecs = read_config_stream(args.vecs) task = mead.Task.get_task_specific(task_name, args.settings) output_dir, project, name, model_version, exporter_type, return_labels, is_remote = get_export_params( config_params.get('export', {}), args.output_dir, args.project, args.name, args.model_version, args.exporter_type, args.return_labels, args.is_remote, ) # Here we reuse code in `.read_config` which needs a dataset index (when used with mead-train) # but when used with mead-export it is not needed. This is a dummy dataset index that will work # It means we don't need to pass it in datasets = [{'label': config_params['dataset']}] task.read_config(config_params, datasets, args.vecs, exporter_type=exporter_type) feature_exporter_field_map = create_feature_exporter_field_map(config_params['features']) exporter = create_exporter(task, exporter_type, return_labels=return_labels, feature_exporter_field_map=feature_exporter_field_map, nbest_input=args.nbest_input) exporter.run(args.model, output_dir, project, name, model_version, remote=is_remote, use_version=args.use_version, zip_results=args.zip, use_all_features=args.use_all_features)
def main(): parser = argparse.ArgumentParser(description='Evaluate on a dataset') parser.add_argument('--model', required=True) parser.add_argument('--dataset', required=True) parser.add_argument('--settings', default=DEFAULT_SETTINGS_LOC, type=convert_path) parser.add_argument('--modules', nargs="+", default=[]) parser.add_argument('--reporting', nargs="+") parser.add_argument('--logging', default=DEFAULT_LOGGING_LOC, type=convert_path) parser.add_argument('--task', default='classify', choices={'classify', 'tagger', 'seq2seq', 'lm'}) parser.add_argument('--backend', default='tf') parser.add_argument('--reader', default='default') parser.add_argument('--trim', default=True, type=str2bool) parser.add_argument('--batchsz', default=50) parser.add_argument('--trainer', default='default') parser.add_argument('--output', default=None) parser.add_argument('--remote') parser.add_argument( '--features', help= '(optional) features in the format feature_name:index (column # in conll) or ' 'just feature names (assumed sequential)', default=[], nargs='+', ) parser.add_argument('--device', default='cpu') # our parse_extra_args doesn't handle lists :/ parser.add_argument('--pair_suffix', nargs='+', default=[]) args, extra_args = parser.parse_known_args() args.batchsz = args.batchsz if args.task != 'lm' else 1 named_fields = { str(v): k for k, v in feature_index_mapping(args.features).items() } reader_options = parse_extra_args(['reader'], extra_args)['reader'] reader_options = process_reader_options(reader_options) verbose_options = parse_extra_args(['verbose'], extra_args)['verbose'] trainer_options = parse_extra_args(['trainer'], extra_args)['trainer'] if 'span_type' not in trainer_options: trainer_options['span_type'] = 'iobes' model_options = parse_extra_args(['model'], extra_args)['model'] args.logging = read_config_stream(args.logging) configure_logger(args.logging) try: args.settings = read_config_stream(args.settings) except: logger.warning( 'Warning: no mead-settings file was found at [{}]'.format( args.settings)) args.settings = {} backend = Backend(args.backend) backend.load(args.task) for module in args.modules: import_user_module(module) reporting = parse_extra_args( args.reporting if args.reporting is not None else [], extra_args) reporting_hooks, reporting = merge_reporting_with_settings( reporting, args.settings) reporting_fns = [ x.step for x in create_reporting(reporting_hooks, reporting, {'task': args.task}) ] service = get_service(args.task) model = service.load(args.model, backend=args.backend, remote=args.remote, device=args.device, **model_options) vectorizers = get_vectorizers(args.task, model) reader = create_reader(args.task, vectorizers, args.trim, type=args.reader, named_fields=named_fields, pair_suffix=args.pair_suffix, **reader_options) reader = patch_reader(args.task, model, reader) data, txts = load_data(args.task, reader, model, args.dataset, args.batchsz) if args.task == 'seq2seq': trainer_options['tgt_rlut'] = { v: k for k, v in model.tgt_vocab.items() } trainer = get_trainer(model, args.trainer, verbose_options, backend.name, gpu=args.device != 'cpu', nogpu=args.device == 'cpu', **trainer_options) if args.task == 'classify': _ = trainer.test(data, reporting_fns=reporting_fns, phase='Test', verbose=verbose_options, output=args.output, txts=txts, **model_options) elif args.task == 'tagger': _ = trainer.test(data, reporting_fns=reporting_fns, phase='Test', verbose=verbose_options, conll_output=args.output, txts=txts, **model_options) else: _ = trainer.test(data, reporting_fns=reporting_fns, phase='Test', verbose=verbose_options, **model_options)
def test_read_config_stream_file(): file_name = os.path.join(data_loc, 'test_json.json') with mock.patch('baseline.utils.read_config_file') as read_patch: read_config_stream(file_name) read_patch.assert_called_once_with(file_name)