def run(self, args=None): """Main entrypoint.""" parser = argparse.ArgumentParser() parser.add_argument('-s', '--storage_config', default=None, help=('Configuration of available storages as a file or a JSON string. ' 'Setting "-" will read from the standard input.')) parser.add_argument('-t', '--task_id', default=None, help="Identifier of this run.") parser.add_argument('-i', '--image', default="?", help="Full URL (registry/image:tag) of the image used for this run.") parser.add_argument('-b', '--beat_url', default=None, help=("Endpoint that listens to beat requests " "(push notifications of activity).")) parser.add_argument('-bi', '--beat_interval', default=30, type=int, help="Interval of beat requests in seconds.") parser.add_argument('--statistics_url', default=None, help=('Endpoint that listens to statistics summaries generated ' 'at the end of the execution')) parser.add_argument('-ms', '--model_storage', default=os.environ["MODELS_DIR"], help='Model storage in the form <storage_id>:[<path>].') parser.add_argument('-msr', '--model_storage_read', default=None, help=('Model storage to read from, in the form <storage_id>:[<path>] ' '(defaults to model_storage).')) parser.add_argument('-msw', '--model_storage_write', default=None, help=('Model storage to write to, in the form <storage_id>:[<path>] ' '(defaults to model_storage).')) parser.add_argument('-c', '--config', default=None, help=('Configuration as a file or a JSON string. ' 'Setting "-" will read from the standard input.')) parser.add_argument('--config_update_mode', choices=['default', 'merge', 'replace'], default='default', help=('How to update the parent task configuration with the given ' 'configuration. ' '"default": automatic mode based on the configuration, ' '"merge": recursively update configuration fields, ' '"replace": replace the top-most fields.')) parser.add_argument('-m', '--model', default=None, help='Model to load.') parser.add_argument('-g', '--gpuid', default="0", help="Comma-separated list of 0-indexed GPU identifiers.") parser.add_argument('--no_push', default=False, action='store_true', help='Do not push model.') self.declare_arguments(parser) args = parser.parse_args(args=args) if args.task_id is None: args.task_id = str(uuid.uuid4()) self._task_id = args.task_id self._image = args.image start_beat_service( os.uname()[1], args.beat_url, args.task_id, interval=args.beat_interval) self._storage = StorageClient( config=load_config(args.storage_config) if args.storage_config else None) if args.model_storage_read is None: args.model_storage_read = args.model_storage if args.model_storage_write is None: args.model_storage_write = args.model_storage self._model_storage_read = args.model_storage_read self._model_storage_write = args.model_storage_write # for backward compatibility - convert singleton in int args.gpuid = args.gpuid.split(',') args.gpuid = [int(g) for g in args.gpuid] if len(args.gpuid) == 1: args.gpuid = args.gpuid[0] self._gpuid = args.gpuid self._config = load_config(args.config) if args.config is not None else None self._model = args.model self._no_push = args.no_push logger.info('Starting executing utility %s=%s', self.name, args.image) start_time = time.time() stats = self.exec_function(args) end_time = time.time() logger.info('Finished executing utility in %s seconds', str(end_time-start_time)) if args.statistics_url is not None: requests.post(args.statistics_url, json={ 'task_id': self._task_id, 'start_time': start_time, 'end_time': end_time, 'statistics': stats or {} })
def run(self): """Main entrypoint.""" parser = argparse.ArgumentParser() parser.add_argument( '-c', '--config', default=None, help=('Configuration as a file or a JSON string. ' 'Setting "-" will read from the standard input.')) parser.add_argument( '-s', '--storage_config', default=None, help= ('Configuration of available storages as a file or a JSON string. ' 'Setting "-" will read from the standard input.')) parser.add_argument( '-ms', '--model_storage', help='Model storage in the form <storage_id>:[<path>].') parser.add_argument('-m', '--model', default=None, help='Model to load.') parser.add_argument( '-g', '--gpuid', default="0", help= "Comma-separated list of 1-indexed GPU identifiers (0 for CPU).") parser.add_argument('-t', '--task_id', default=None, help="Identifier of this run.") parser.add_argument( '-i', '--image', default="?", help="Full URL (registry/image:tag) of the image used for this run." ) parser.add_argument('-b', '--beat_url', default=None, help=("Endpoint that listens to beat requests " "(push notifications of activity).")) parser.add_argument('-bi', '--beat_interval', default=30, type=int, help="Interval of beat requests in seconds.") subparsers = parser.add_subparsers(help='Run type', dest='cmd') parser_train = subparsers.add_parser('train', help='Run a training.') parser_trans = subparsers.add_parser('trans', help='Run a translation.') parser_trans.add_argument('-i', '--input', required=True, help='Input file.') parser_trans.add_argument('-o', '--output', required=True, help='Output file.') parser_serve = subparsers.add_parser('serve', help='Serve a model.') parser_serve.add_argument('-hs', '--host', default="0.0.0.0", help='Serving hostname.') parser_serve.add_argument('-p', '--port', type=int, default=4000, help='Serving port.') parser.build_vocab = subparsers.add_parser( 'preprocess', help='Sample and preprocess corpus.') args = parser.parse_args() if args.config is None and args.model is None: parser.error( 'at least one of --config or --model options must be set') if not self._stateless and args.cmd != 'preprocess' and not args.model_storage: parser.error('argument -ms/--model_storage is required') if args.task_id is None: args.task_id = str(uuid.uuid4()) # for backward compatibility - convert singleton in int args.gpuid = args.gpuid.split(',') args.gpuid = [int(g) for g in args.gpuid] if len(args.gpuid) == 1: args.gpuid = args.gpuid[0] start_beat_service(os.uname()[1], args.beat_url, args.task_id, interval=args.beat_interval) config = load_config(args.config) if args.config is not None else {} parent_model = args.model or config.get('model') storage = StorageClient(tmp_dir=self._tmp_dir, config=load_config(args.storage_config) if args.storage_config else None) if parent_model is not None and not self._stateless: # Download model locally and merge the configuration. remote_model_path = storage.join(args.model_storage, parent_model) model_path = os.path.join(self._models_dir, parent_model) fetch_model(storage, remote_model_path, model_path) with open(os.path.join(model_path, 'config.json'), 'r') as config_file: model_config = json.load(config_file) config = merge_config(model_config, config) else: model_path = None if args.cmd == 'train': self.train_wrapper(args.task_id, config, storage, args.model_storage, args.image, parent_model=parent_model, model_path=model_path, gpuid=args.gpuid) elif args.cmd == 'trans': if parent_model is None: raise ValueError('translation requires a model') self.trans_wrapper(config, model_path, storage, args.input, args.output, gpuid=args.gpuid) elif args.cmd == 'serve': if parent_model is None: raise ValueError('serving requires a model') self.serve_wrapper(config, model_path, args.host, args.port, gpuid=args.gpuid) elif args.cmd == 'preprocess': self.preprocess(config, storage)
def run(self): """Main entrypoint.""" parser = argparse.ArgumentParser() parser.add_argument( '-c', '--config', default=None, help=('Configuration as a file or a JSON string. ' 'Setting "-" will read from the standard input.')) parser.add_argument( '-s', '--storage_config', default=None, help= ('Configuration of available storages as a file or a JSON string. ' 'Setting "-" will read from the standard input.')) parser.add_argument( '-ms', '--model_storage', required=not self._stateless, help='Model storage in the form <storage_id>:[<path>].') parser.add_argument('-m', '--model', default=None, help='Model to load.') parser.add_argument('-g', '--gpuid', default=0, type=int, help="1-indexed GPU identifier (0 for CPU).") parser.add_argument('-t', '--task_id', default=None, help="Identifier of this run.") parser.add_argument( '-i', '--image', default="?", help="Full URL (registry/image:tag) of the image used for this run." ) parser.add_argument('-b', '--beat_url', default=None, help=("Endpoint that listens to beat requests " "(push notifications of activity).")) parser.add_argument('-bi', '--beat_interval', default=30, type=int, help="Interval of beat requests in seconds.") subparsers = parser.add_subparsers(help='Run type', dest='cmd') parser_train = subparsers.add_parser('train', help='Run a training.') parser_trans = subparsers.add_parser('trans', help='Run a translation.') parser_trans.add_argument('-i', '--input', required=True, help='Input file.') parser_trans.add_argument('-o', '--output', required=True, help='Output file.') args = parser.parse_args() if args.config is None and args.model is None: parser.error( 'at least one of --config or --model options must be set') if args.task_id is None: args.task_id = str(uuid.uuid4()) start_beat_service(os.uname()[1], args.beat_url, args.task_id, interval=args.beat_interval) config = load_config(args.config) if args.config is not None else {} parent_model = args.model or config.get('model') storage = StorageClient(config=load_config(args.storage_config) if args.storage_config else None) if parent_model is not None and not self._stateless: # Download model locally and merge the configuration. remote_model_path = storage.join(args.model_storage, parent_model) model_path = os.path.join(self._models_dir, parent_model) fetch_model(storage, remote_model_path, model_path) with open(os.path.join(model_path, 'config.json'), 'r') as config_file: model_config = json.load(config_file) config = merge_config(model_config, config) else: model_path = None if args.cmd == 'train': self.train_wrapper(args.task_id, config, storage, args.model_storage, args.image, model_path=model_path, gpuid=args.gpuid) elif parent_model is None: raise ValueError('translation requires a model') elif args.cmd == 'trans': self.trans_wrapper(config, model_path, storage, args.input, args.output, gpuid=args.gpuid)
def run(self, args=None): """Main entrypoint.""" parser = argparse.ArgumentParser() parser.add_argument( '-c', '--config', default=None, help=('Configuration as a file or a JSON string. ' 'Setting "-" will read from the standard input.')) parser.add_argument( '-s', '--storage_config', default=None, help= ('Configuration of available storages as a file or a JSON string. ' 'Setting "-" will read from the standard input.')) parser.add_argument( '-ms', '--model_storage', default=None, help='Model storage in the form <storage_id>:[<path>].') parser.add_argument( '-msr', '--model_storage_read', default=None, help=( 'Model storage to read from, in the form <storage_id>:[<path>] ' '(defaults to model_storage).')) parser.add_argument( '-msw', '--model_storage_write', default=None, help=( 'Model storage to write to, in the form <storage_id>:[<path>] ' '(defaults to model_storage).')) parser.add_argument('-m', '--model', default=None, help='Model to load.') parser.add_argument( '-g', '--gpuid', default="0", help= "Comma-separated list of 1-indexed GPU identifiers (0 for CPU).") parser.add_argument('-t', '--task_id', default=None, help="Identifier of this run.") parser.add_argument( '-i', '--image', default="?", help="Full URL (registry/image:tag) of the image used for this run." ) parser.add_argument('-b', '--beat_url', default=None, help=("Endpoint that listens to beat requests " "(push notifications of activity).")) parser.add_argument('-bi', '--beat_interval', default=30, type=int, help="Interval of beat requests in seconds.") parser.add_argument('--no_push', default=False, action='store_true', help='Do not push model.') subparsers = parser.add_subparsers(help='Run type', dest='cmd') parser_train = subparsers.add_parser('train', help='Run a training.') parser_trans = subparsers.add_parser('trans', help='Run a translation.') parser_trans.add_argument('-i', '--input', required=True, nargs='+', help='Input file.') parser_trans.add_argument('-o', '--output', required=True, nargs='+', help='Output file.') parser_trans.add_argument('--as_release', default=False, action='store_true', help='Translate from a released model.') parser_release = subparsers.add_parser( 'release', help='Release a model for serving.') parser_release.add_argument( '-d', '--destination', default=None, help='Released model storage (defaults to the model storage).') parser_serve = subparsers.add_parser('serve', help='Serve a model.') parser_serve.add_argument('-hs', '--host', default="0.0.0.0", help='Serving hostname.') parser_serve.add_argument('-p', '--port', type=int, default=4000, help='Serving port.') parser_preprocess = subparsers.add_parser( 'preprocess', help='Sample and preprocess corpus.') parser_preprocess.add_argument('--build_model', default=False, action='store_true', help='Preprocess data into a model.') parser.build_vocab = subparsers.add_parser('buildvocab', help='Build vocabularies.') args = parser.parse_args(args=args) if args.config is None and args.model is None: parser.error( 'at least one of --config or --model options must be set') if args.model_storage_read is None: args.model_storage_read = args.model_storage if args.model_storage_write is None: args.model_storage_write = args.model_storage if (not self._stateless and (args.cmd != 'preprocess' or args.build_model) and (args.model_storage_write is None or args.model_storage_write is None)): parser.error('Missing model storage argument') if args.task_id is None: args.task_id = str(uuid.uuid4()) # for backward compatibility - convert singleton in int args.gpuid = args.gpuid.split(',') args.gpuid = [int(g) for g in args.gpuid] if len(args.gpuid) == 1: args.gpuid = args.gpuid[0] start_beat_service(os.uname()[1], args.beat_url, args.task_id, interval=args.beat_interval) config = load_config(args.config) if args.config is not None else {} parent_model = args.model or config.get('model') storage = StorageClient(tmp_dir=self._tmp_dir, config=load_config(args.storage_config) if args.storage_config else None) if parent_model is not None and not self._stateless: # Download model locally and merge the configuration. remote_model_path = storage.join(args.model_storage_read, parent_model) model_path = os.path.join(self._models_dir, parent_model) fetch_model(storage, remote_model_path, model_path) with open(os.path.join(model_path, 'config.json'), 'r') as config_file: model_config = json.load(config_file) if 'modelType' not in model_config: if parent_model.endswith('_release'): model_config['modelType'] = 'release' else: model_config['modelType'] = 'checkpoint' config = merge_config(copy.deepcopy(model_config), config) else: model_path = None model_config = None if args.cmd == 'train': if (parent_model is not None and config['modelType'] not in ('checkpoint', 'base', 'preprocess')): raise ValueError( 'cannot train from a model that is not a training checkpoint, ' 'a base model, or a preprocess model') self.train_wrapper(args.task_id, config, storage, args.model_storage_write, args.image, parent_model=parent_model, model_path=model_path, model_config=model_config, gpuid=args.gpuid, push_model=not args.no_push) elif args.cmd == 'buildvocab': self.build_vocab(args.task_id, config, storage, args.model_storage_write, args.image, push_model=not args.no_push) elif args.cmd == 'trans': if (not self._stateless and (parent_model is None or config['modelType'] != 'checkpoint')): raise ValueError('translation requires a training checkpoint') self.trans_wrapper(config, model_path, storage, args.input, args.output, as_release=args.as_release, gpuid=args.gpuid) elif args.cmd == 'release': if (not self._stateless and (parent_model is None or config['modelType'] != 'checkpoint')): raise ValueError('releasing requires a training checkpoint') if args.destination is None: args.destination = args.model_storage_write self.release_wrapper(config, model_path, storage, args.image, args.destination, gpuid=args.gpuid, push_model=not args.no_push) elif args.cmd == 'serve': if (not self._stateless and (parent_model is None or config['modelType'] != 'release')): raise ValueError('serving requires a released model') self.serve_wrapper(config, model_path, args.host, args.port, gpuid=args.gpuid) elif args.cmd == 'preprocess': if not args.build_model: self.preprocess(config, storage) else: if (parent_model is not None and config['modelType'] not in ('checkpoint', 'base')): raise ValueError( 'cannot preprocess from a model that is not a training ' 'checkpoint or a base model') self.preprocess_into_model(args.task_id, config, storage, args.model_storage_write, args.image, parent_model=parent_model, model_path=model_path, push_model=not args.no_push)
def run(self, args=None): """Main entrypoint.""" parser = argparse.ArgumentParser() parser.add_argument( "-s", "--storage_config", default=None, help= ("Configuration of available storages as a file or a JSON string. " 'Setting "-" will read from the standard input.'), ) parser.add_argument("-t", "--task_id", default=None, help="Identifier of this run.") parser.add_argument( "-i", "--image", default="?", help= "Full URL (registry/image:tag) of the image used for this run.", ) parser.add_argument( "-b", "--beat_url", default=None, help=("Endpoint that listens to beat requests " "(push notifications of activity)."), ) parser.add_argument( "-bi", "--beat_interval", default=30, type=int, help="Interval of beat requests in seconds.", ) parser.add_argument( "--statistics_url", default=None, help=("Endpoint that listens to statistics summaries generated " "at the end of the execution"), ) parser.add_argument( "-ms", "--model_storage", default=os.environ["MODELS_DIR"], help="Model storage in the form <storage_id>:[<path>].", ) parser.add_argument( "-msr", "--model_storage_read", default=None, help=( "Model storage to read from, in the form <storage_id>:[<path>] " "(defaults to model_storage)."), ) parser.add_argument( "-msw", "--model_storage_write", default=None, help=( "Model storage to write to, in the form <storage_id>:[<path>] " "(defaults to model_storage)."), ) parser.add_argument( "-c", "--config", default=None, help=("Configuration as a file or a JSON string. " 'Setting "-" will read from the standard input.'), ) parser.add_argument( "--config_update_mode", choices=["default", "merge", "replace"], default="default", help=("How to update the parent task configuration with the given " "configuration. " '"default": automatic mode based on the configuration, ' '"merge": recursively update configuration fields, ' '"replace": replace the top-most fields.'), ) parser.add_argument("-m", "--model", default=None, help="Model to load.") parser.add_argument( "-g", "--gpuid", default="0", help="Comma-separated list of 0-indexed GPU identifiers.", ) parser.add_argument("--no_push", default=False, action="store_true", help="Do not push model.") self.declare_arguments(parser) args = parser.parse_args(args=args) if args.task_id is None: args.task_id = str(uuid.uuid4()) self._task_id = args.task_id self._image = args.image start_beat_service(os.uname()[1], args.beat_url, args.task_id, interval=args.beat_interval) self._storage = StorageClient(config=load_config(args.storage_config) if args.storage_config else None) if args.model_storage_read is None: args.model_storage_read = args.model_storage if args.model_storage_write is None: args.model_storage_write = args.model_storage self._model_storage_read = args.model_storage_read self._model_storage_write = args.model_storage_write # for backward compatibility - convert singleton in int args.gpuid = args.gpuid.split(",") args.gpuid = [int(g) for g in args.gpuid] if len(args.gpuid) == 1: args.gpuid = args.gpuid[0] self._gpuid = args.gpuid self._config = load_config( args.config) if args.config is not None else None self._model = args.model self._no_push = args.no_push logger.info("Starting executing utility %s=%s", self.name, args.image) start_time = time.time() stats = self.exec_function(args) end_time = time.time() logger.info("Finished executing utility in %.1f seconds", end_time - start_time) if args.statistics_url is not None: requests.post( args.statistics_url, json={ "task_id": self._task_id, "start_time": start_time, "end_time": end_time, "statistics": stats or {}, }, )