def main(): """Use `asyncio` to connect to a websocket and request a pipeline, wait. """ signal.signal(signal.SIGINT, lambda *args, **kwargs: exit(0)) parser = argparse.ArgumentParser( description='HTTP or Websocket-based Pipeline scheduler') parser.add_argument('work', help='Job') parser.add_argument('--host', default=ODIN_URL, type=str) parser.add_argument('--port', default=ODIN_PORT) parser.add_argument('--token', help="File where JWT token can reside", default=os.path.expanduser("~/.odin.token")) parser.add_argument('--username', '-u', help="Username", default=getuser()) parser.add_argument('--password', '-p', help="Password") parser.add_argument( '--scheme', choices={'http', 'wss', 'ws', 'https'}, default=ODIN_SCHEME, help= 'Connection protocol, use `http` for REST, use `wss` for remote connections and `ws` for localhost', ) args, overrides = parser.parse_known_args() context = parse_and_merge_overrides({}, overrides, pre='x') url = f'{args.scheme}://{args.host}:{args.port}' if args.scheme.startswith('ws'): if context: LOGGER.warning("Context is ignored by web-socket tier") asyncio.get_event_loop().run_until_complete( schedule_pipeline(url, args.work)) else: jwt_token = get_jwt_token(url, args.token, args.username, args.password) try: schedule_pipeline_http(url, jwt_token, args.work, context) except ValueError: # Try deleting the token file and start again if os.path.exists(args.token): os.remove(args.token) jwt_token = get_jwt_token(url, args.token, args.username, args.password) schedule_pipeline_http(url, jwt_token, args.work, context)
def main(): parser = argparse.ArgumentParser(description='Train a text classifier') parser.add_argument( '--config', help= 'JSON/YML Configuration for an experiment: local file or remote URL', type=convert_path, default="$MEAD_CONFIG") parser.add_argument('--settings', help='JSON/YML Configuration for mead', default=DEFAULT_SETTINGS_LOC, type=convert_path) parser.add_argument('--task_modules', help='tasks to load, must be local', default=[], nargs='+', required=False) parser.add_argument( '--datasets', help= 'index of dataset labels: local file, remote URL or mead-ml/hub ref', type=convert_path) parser.add_argument( '--modules', help='modules to load: local files, remote URLs or mead-ml/hub refs', default=[], nargs='+', required=False) parser.add_argument('--mod_train_file', help='override the training set') parser.add_argument('--mod_valid_file', help='override the validation set') parser.add_argument('--mod_test_file', help='override the test set') parser.add_argument('--fit_func', help='override the fit function') parser.add_argument( '--embeddings', help='index of embeddings: local file, remote URL or mead-ml/hub ref', type=convert_path) parser.add_argument( '--vecs', help='index of vectorizers: local file, remote URL or hub mead-ml/ref', type=convert_path) parser.add_argument('--logging', help='json file for logging', default=DEFAULT_LOGGING_LOC, type=convert_path) parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm']) parser.add_argument('--gpus', help='Number of GPUs (defaults to number available)', type=int, default=-1) parser.add_argument( '--basedir', help='Override the base directory where models are stored', type=str) parser.add_argument('--reporting', help='reporting hooks', nargs='+') parser.add_argument('--backend', help='The deep learning backend to use') parser.add_argument('--checkpoint', help='Restart training from this checkpoint') parser.add_argument( '--prefer_eager', help="If running in TensorFlow, should we prefer eager model", type=str2bool) args, overrides = parser.parse_known_args() config_params = read_config_stream(args.config) config_params = parse_and_merge_overrides(config_params, overrides, pre='x') if args.basedir is not None: config_params['basedir'] = args.basedir # task_module overrides are not allowed via hub or HTTP, must be defined locally for task in args.task_modules: import_user_module(task) task_name = config_params.get( 'task', 'classify') if args.task is None else args.task args.logging = read_config_stream(args.logging) configure_logger(args.logging, config_params.get('basedir', './{}'.format(task_name))) try: args.settings = read_config_stream(args.settings) except: logger.warning( 'Warning: no mead-settings file was found at [{}]'.format( args.settings)) args.settings = {} args.datasets = args.settings.get( 'datasets', convert_path( DEFAULT_DATASETS_LOC)) if args.datasets is None else args.datasets args.datasets = read_config_stream(args.datasets) if args.mod_train_file or args.mod_valid_file or args.mod_test_file: logging.warning( 'Warning: overriding the training/valid/test data with user-specified files' ' different from what was specified in the dataset index. Creating a new key for this entry' ) update_datasets(args.datasets, config_params, args.mod_train_file, args.mod_valid_file, args.mod_test_file) args.embeddings = args.settings.get( 'embeddings', convert_path(DEFAULT_EMBEDDINGS_LOC) ) if args.embeddings is None else args.embeddings args.embeddings = read_config_stream(args.embeddings) args.vecs = args.settings.get('vecs', convert_path( DEFAULT_VECTORIZERS_LOC)) if args.vecs is None else args.vecs args.vecs = read_config_stream(args.vecs) if args.gpus: # why does it go to model and not to train? config_params['train']['gpus'] = args.gpus if args.fit_func: config_params['train']['fit_func'] = args.fit_func if args.backend: config_params['backend'] = normalize_backend(args.backend) config_params['modules'] = list( set(chain(config_params.get('modules', []), args.modules))) cmd_hooks = args.reporting if args.reporting is not None else [] config_hooks = config_params.get('reporting') if config_params.get( 'reporting') is not None else [] reporting = parse_extra_args(set(chain(cmd_hooks, config_hooks)), overrides) config_params['reporting'] = reporting logger.info('Task: [{}]'.format(task_name)) task = mead.Task.get_task_specific(task_name, args.settings) task.read_config(config_params, args.datasets, args.vecs, reporting_args=overrides, prefer_eager=args.prefer_eager) task.initialize(args.embeddings) task.train(args.checkpoint)
def main(): parser = argparse.ArgumentParser(description='Export a model') parser.add_argument('--config', help='configuration for an experiment', required=True, type=convert_path) parser.add_argument('--settings', help='configuration for mead', required=False, default=DEFAULT_SETTINGS_LOC, type=convert_path) parser.add_argument('--modules', help='modules to load', default=[], nargs='+', required=False) parser.add_argument('--datasets', help='json library of dataset labels') parser.add_argument('--vecs', help='index of vectorizers: local file, remote URL or hub mead-ml/ref', default='config/vecs.json', type=convert_path) parser.add_argument('--logging', help='json file for logging', default='config/logging.json', type=convert_path) parser.add_argument('--task', help='task to run', choices=['classify', 'tagger', 'seq2seq', 'lm']) parser.add_argument('--exporter_type', help="exporter type (default 'default')", default=None) parser.add_argument('--return_labels', help='if true, the exported model returns actual labels else ' 'the indices for labels vocab (default False)', default=None) parser.add_argument('--model', help='model name', required=True, type=unzip_files) parser.add_argument('--model_version', help='model_version', default=None) parser.add_argument('--output_dir', help="output dir (default './models')", default=None) parser.add_argument('--project', help='Name of project, used in path first', default=None) parser.add_argument('--name', help='Name of the model, used second in the path', default=None) parser.add_argument('--beam', help='beam_width', default=30, type=int) parser.add_argument('--nbest_input', help='Is the input to this model N-best', default=False, type=str2bool) parser.add_argument('--is_remote', help='if True, separate items for remote server and client. If False bundle everything together (default True)', default=None) parser.add_argument('--backend', help='The deep learning backend to use') parser.add_argument('--reporting', help='reporting hooks', nargs='+') parser.add_argument('--use_version', help='Should we use the version?', type=str2bool, default=True) parser.add_argument('--use_all_features', help='If a feature is found via vectorizer and not in embeddings, should we include it?', type=str2bool, default=False) parser.add_argument('--zip', help='Should we zip the results?', type=str2bool, default=False) args, overrides = parser.parse_known_args() configure_logger(args.logging) config_params = read_config_stream(args.config) config_params = parse_and_merge_overrides(config_params, overrides, pre='x') try: args.settings = read_config_stream(args.settings) except Exception: logger.warning('Warning: no mead-settings file was found at [{}]'.format(args.settings)) args.settings = {} task_name = config_params.get('task', 'classify') if args.task is None else args.task # Remove multigpu references os.environ['CUDA_VISIBLE_DEVICES'] = "" os.environ['NV_GPU'] = "" if 'gpus' in config_params.get('train', {}): del config_params['train']['gpus'] if task_name == 'seq2seq' and 'beam' not in config_params: config_params['beam'] = args.beam config_params['modules'] = config_params.get('modules', []) + args.modules if args.backend is not None: config_params['backend'] = normalize_backend(args.backend) cmd_hooks = args.reporting if args.reporting is not None else [] config_hooks = config_params.get('reporting') if config_params.get('reporting') is not None else [] reporting = parse_extra_args(set(chain(cmd_hooks, config_hooks)), overrides) config_params['reporting'] = reporting args.vecs = read_config_stream(args.vecs) task = mead.Task.get_task_specific(task_name, args.settings) output_dir, project, name, model_version, exporter_type, return_labels, is_remote = get_export_params( config_params.get('export', {}), args.output_dir, args.project, args.name, args.model_version, args.exporter_type, args.return_labels, args.is_remote, ) # Here we reuse code in `.read_config` which needs a dataset index (when used with mead-train) # but when used with mead-export it is not needed. This is a dummy dataset index that will work # It means we don't need to pass it in datasets = [{'label': config_params['dataset']}] task.read_config(config_params, datasets, args.vecs, exporter_type=exporter_type) feature_exporter_field_map = create_feature_exporter_field_map(config_params['features']) exporter = create_exporter(task, exporter_type, return_labels=return_labels, feature_exporter_field_map=feature_exporter_field_map, nbest_input=args.nbest_input) exporter.run(args.model, output_dir, project, name, model_version, remote=is_remote, use_version=args.use_version, zip_results=args.zip, use_all_features=args.use_all_features)