Exemple #1
0
    def run(self):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument(
            '-c',
            '--config',
            default=None,
            help=('Configuration as a file or a JSON string. '
                  'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-s',
            '--storage_config',
            default=None,
            help=
            ('Configuration of available storages as a file or a JSON string. '
             'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-ms',
            '--model_storage',
            help='Model storage in the form <storage_id>:[<path>].')
        parser.add_argument('-m',
                            '--model',
                            default=None,
                            help='Model to load.')
        parser.add_argument(
            '-g',
            '--gpuid',
            default="0",
            help=
            "Comma-separated list of 1-indexed GPU identifiers (0 for CPU).")
        parser.add_argument('-t',
                            '--task_id',
                            default=None,
                            help="Identifier of this run.")
        parser.add_argument(
            '-i',
            '--image',
            default="?",
            help="Full URL (registry/image:tag) of the image used for this run."
        )
        parser.add_argument('-b',
                            '--beat_url',
                            default=None,
                            help=("Endpoint that listens to beat requests "
                                  "(push notifications of activity)."))
        parser.add_argument('-bi',
                            '--beat_interval',
                            default=30,
                            type=int,
                            help="Interval of beat requests in seconds.")

        subparsers = parser.add_subparsers(help='Run type', dest='cmd')
        parser_train = subparsers.add_parser('train', help='Run a training.')

        parser_trans = subparsers.add_parser('trans',
                                             help='Run a translation.')
        parser_trans.add_argument('-i',
                                  '--input',
                                  required=True,
                                  help='Input file.')
        parser_trans.add_argument('-o',
                                  '--output',
                                  required=True,
                                  help='Output file.')

        parser_serve = subparsers.add_parser('serve', help='Serve a model.')
        parser_serve.add_argument('-hs',
                                  '--host',
                                  default="0.0.0.0",
                                  help='Serving hostname.')
        parser_serve.add_argument('-p',
                                  '--port',
                                  type=int,
                                  default=4000,
                                  help='Serving port.')

        parser.build_vocab = subparsers.add_parser(
            'preprocess', help='Sample and preprocess corpus.')

        args = parser.parse_args()
        if args.config is None and args.model is None:
            parser.error(
                'at least one of --config or --model options must be set')
        if not self._stateless and args.cmd != 'preprocess' and not args.model_storage:
            parser.error('argument -ms/--model_storage is required')
        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        # for backward compatibility - convert singleton in int
        args.gpuid = args.gpuid.split(',')
        args.gpuid = [int(g) for g in args.gpuid]
        if len(args.gpuid) == 1:
            args.gpuid = args.gpuid[0]

        start_beat_service(os.uname()[1],
                           args.beat_url,
                           args.task_id,
                           interval=args.beat_interval)

        config = load_config(args.config) if args.config is not None else {}
        parent_model = args.model or config.get('model')

        storage = StorageClient(tmp_dir=self._tmp_dir,
                                config=load_config(args.storage_config)
                                if args.storage_config else None)

        if parent_model is not None and not self._stateless:
            # Download model locally and merge the configuration.
            remote_model_path = storage.join(args.model_storage, parent_model)
            model_path = os.path.join(self._models_dir, parent_model)
            fetch_model(storage, remote_model_path, model_path)
            with open(os.path.join(model_path, 'config.json'),
                      'r') as config_file:
                model_config = json.load(config_file)
            config = merge_config(model_config, config)
        else:
            model_path = None

        if args.cmd == 'train':
            self.train_wrapper(args.task_id,
                               config,
                               storage,
                               args.model_storage,
                               args.image,
                               parent_model=parent_model,
                               model_path=model_path,
                               gpuid=args.gpuid)
        elif args.cmd == 'trans':
            if parent_model is None:
                raise ValueError('translation requires a model')
            self.trans_wrapper(config,
                               model_path,
                               storage,
                               args.input,
                               args.output,
                               gpuid=args.gpuid)
        elif args.cmd == 'serve':
            if parent_model is None:
                raise ValueError('serving requires a model')
            self.serve_wrapper(config,
                               model_path,
                               args.host,
                               args.port,
                               gpuid=args.gpuid)
        elif args.cmd == 'preprocess':
            self.preprocess(config, storage)
Exemple #2
0
    def run(self, args=None):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument(
            '-s',
            '--storage_config',
            default=None,
            help=
            ('Configuration of available storages as a file or a JSON string. '
             'Setting "-" will read from the standard input.'))
        parser.add_argument('-t',
                            '--task_id',
                            default=None,
                            help="Identifier of this run.")
        parser.add_argument(
            '-i',
            '--image',
            default="?",
            help="Full URL (registry/image:tag) of the image used for this run."
        )
        parser.add_argument('-b',
                            '--beat_url',
                            default=None,
                            help=("Endpoint that listens to beat requests "
                                  "(push notifications of activity)."))
        parser.add_argument('-bi',
                            '--beat_interval',
                            default=30,
                            type=int,
                            help="Interval of beat requests in seconds.")
        parser.add_argument(
            '--statistics_url',
            default=None,
            help=('Endpoint that listens to statistics summaries generated '
                  'at the end of the execution'))

        parser.add_argument(
            '-ms',
            '--model_storage',
            default=None,
            help='Model storage in the form <storage_id>:[<path>].')
        parser.add_argument(
            '-msr',
            '--model_storage_read',
            default=None,
            help=(
                'Model storage to read from, in the form <storage_id>:[<path>] '
                '(defaults to model_storage).'))
        parser.add_argument(
            '-msw',
            '--model_storage_write',
            default=None,
            help=(
                'Model storage to write to, in the form <storage_id>:[<path>] '
                '(defaults to model_storage).'))
        parser.add_argument(
            '-c',
            '--config',
            default=None,
            help=('Configuration as a file or a JSON string. '
                  'Setting "-" will read from the standard input.'))
        parser.add_argument('-m',
                            '--model',
                            default=None,
                            help='Model to load.')
        parser.add_argument(
            '-g',
            '--gpuid',
            default="0",
            help=
            "Comma-separated list of 1-indexed GPU identifiers (0 for CPU).")
        parser.add_argument('--no_push',
                            default=False,
                            action='store_true',
                            help='Do not push model.')

        self.declare_arguments(parser)
        args = parser.parse_args(args=args)

        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        self._task_id = args.task_id
        self._image = args.image

        start_beat_service(os.uname()[1],
                           args.beat_url,
                           args.task_id,
                           interval=args.beat_interval)

        self._storage = StorageClient(tmp_dir=self._tmp_dir,
                                      config=load_config(args.storage_config)
                                      if args.storage_config else None)

        if args.model_storage_read is None:
            args.model_storage_read = args.model_storage
        if args.model_storage_write is None:
            args.model_storage_write = args.model_storage

        self._model_storage_read = args.model_storage_read
        self._model_storage_write = args.model_storage_write

        # for backward compatibility - convert singleton in int
        args.gpuid = args.gpuid.split(',')
        args.gpuid = [int(g) for g in args.gpuid]
        if len(args.gpuid) == 1:
            args.gpuid = args.gpuid[0]

        self._gpuid = args.gpuid

        self._config = load_config(
            args.config) if args.config is not None else None
        self._model = args.model
        self._no_push = args.no_push

        logger.info('Starting executing utility %s=%s', self.name, args.image)
        start_time = time.time()
        stats = self.exec_function(args)
        end_time = time.time()
        logger.info('Finished executing utility in %s seconds',
                    str(end_time - start_time))

        if args.statistics_url is not None:
            requests.post(args.statistics_url,
                          json={
                              'task_id': self._task_id,
                              'start_time': start_time,
                              'end_time': end_time,
                              'statistics': stats or {}
                          })
    def run(self, args=None):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument(
            '-c',
            '--config',
            default=None,
            help=('Configuration as a file or a JSON string. '
                  'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-s',
            '--storage_config',
            default=None,
            help=
            ('Configuration of available storages as a file or a JSON string. '
             'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-ms',
            '--model_storage',
            default=None,
            help='Model storage in the form <storage_id>:[<path>].')
        parser.add_argument(
            '-msr',
            '--model_storage_read',
            default=None,
            help=(
                'Model storage to read from, in the form <storage_id>:[<path>] '
                '(defaults to model_storage).'))
        parser.add_argument(
            '-msw',
            '--model_storage_write',
            default=None,
            help=(
                'Model storage to write to, in the form <storage_id>:[<path>] '
                '(defaults to model_storage).'))
        parser.add_argument('-m',
                            '--model',
                            default=None,
                            help='Model to load.')
        parser.add_argument(
            '-g',
            '--gpuid',
            default="0",
            help=
            "Comma-separated list of 1-indexed GPU identifiers (0 for CPU).")
        parser.add_argument('-t',
                            '--task_id',
                            default=None,
                            help="Identifier of this run.")
        parser.add_argument(
            '-i',
            '--image',
            default="?",
            help="Full URL (registry/image:tag) of the image used for this run."
        )
        parser.add_argument('-b',
                            '--beat_url',
                            default=None,
                            help=("Endpoint that listens to beat requests "
                                  "(push notifications of activity)."))
        parser.add_argument('-bi',
                            '--beat_interval',
                            default=30,
                            type=int,
                            help="Interval of beat requests in seconds.")
        parser.add_argument('--no_push',
                            default=False,
                            action='store_true',
                            help='Do not push model.')

        subparsers = parser.add_subparsers(help='Run type', dest='cmd')
        parser_train = subparsers.add_parser('train', help='Run a training.')

        parser_trans = subparsers.add_parser('trans',
                                             help='Run a translation.')
        parser_trans.add_argument('-i',
                                  '--input',
                                  required=True,
                                  nargs='+',
                                  help='Input file.')
        parser_trans.add_argument('-o',
                                  '--output',
                                  required=True,
                                  nargs='+',
                                  help='Output file.')
        parser_trans.add_argument('--as_release',
                                  default=False,
                                  action='store_true',
                                  help='Translate from a released model.')

        parser_release = subparsers.add_parser(
            'release', help='Release a model for serving.')
        parser_release.add_argument(
            '-d',
            '--destination',
            default=None,
            help='Released model storage (defaults to the model storage).')

        parser_serve = subparsers.add_parser('serve', help='Serve a model.')
        parser_serve.add_argument('-hs',
                                  '--host',
                                  default="0.0.0.0",
                                  help='Serving hostname.')
        parser_serve.add_argument('-p',
                                  '--port',
                                  type=int,
                                  default=4000,
                                  help='Serving port.')

        parser_preprocess = subparsers.add_parser(
            'preprocess', help='Sample and preprocess corpus.')
        parser_preprocess.add_argument('--build_model',
                                       default=False,
                                       action='store_true',
                                       help='Preprocess data into a model.')
        parser.build_vocab = subparsers.add_parser('buildvocab',
                                                   help='Build vocabularies.')

        args = parser.parse_args(args=args)
        if args.config is None and args.model is None:
            parser.error(
                'at least one of --config or --model options must be set')
        if args.model_storage_read is None:
            args.model_storage_read = args.model_storage
        if args.model_storage_write is None:
            args.model_storage_write = args.model_storage
        if (not self._stateless
                and (args.cmd != 'preprocess' or args.build_model)
                and (args.model_storage_write is None
                     or args.model_storage_write is None)):
            parser.error('Missing model storage argument')
        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        # for backward compatibility - convert singleton in int
        args.gpuid = args.gpuid.split(',')
        args.gpuid = [int(g) for g in args.gpuid]
        if len(args.gpuid) == 1:
            args.gpuid = args.gpuid[0]

        start_beat_service(os.uname()[1],
                           args.beat_url,
                           args.task_id,
                           interval=args.beat_interval)

        config = load_config(args.config) if args.config is not None else {}
        parent_model = args.model or config.get('model')

        storage = StorageClient(tmp_dir=self._tmp_dir,
                                config=load_config(args.storage_config)
                                if args.storage_config else None)

        if parent_model is not None and not self._stateless:
            # Download model locally and merge the configuration.
            remote_model_path = storage.join(args.model_storage_read,
                                             parent_model)
            model_path = os.path.join(self._models_dir, parent_model)
            fetch_model(storage, remote_model_path, model_path)
            with open(os.path.join(model_path, 'config.json'),
                      'r') as config_file:
                model_config = json.load(config_file)
            if 'modelType' not in model_config:
                if parent_model.endswith('_release'):
                    model_config['modelType'] = 'release'
                else:
                    model_config['modelType'] = 'checkpoint'
            config = merge_config(copy.deepcopy(model_config), config)
        else:
            model_path = None
            model_config = None

        if args.cmd == 'train':
            if (parent_model is not None and config['modelType']
                    not in ('checkpoint', 'base', 'preprocess')):
                raise ValueError(
                    'cannot train from a model that is not a training checkpoint, '
                    'a base model, or a preprocess model')
            self.train_wrapper(args.task_id,
                               config,
                               storage,
                               args.model_storage_write,
                               args.image,
                               parent_model=parent_model,
                               model_path=model_path,
                               model_config=model_config,
                               gpuid=args.gpuid,
                               push_model=not args.no_push)
        elif args.cmd == 'buildvocab':
            self.build_vocab(args.task_id,
                             config,
                             storage,
                             args.model_storage_write,
                             args.image,
                             push_model=not args.no_push)
        elif args.cmd == 'trans':
            if (not self._stateless and
                (parent_model is None or config['modelType'] != 'checkpoint')):
                raise ValueError('translation requires a training checkpoint')
            self.trans_wrapper(config,
                               model_path,
                               storage,
                               args.input,
                               args.output,
                               as_release=args.as_release,
                               gpuid=args.gpuid)
        elif args.cmd == 'release':
            if (not self._stateless and
                (parent_model is None or config['modelType'] != 'checkpoint')):
                raise ValueError('releasing requires a training checkpoint')
            if args.destination is None:
                args.destination = args.model_storage_write
            self.release_wrapper(config,
                                 model_path,
                                 storage,
                                 args.image,
                                 args.destination,
                                 gpuid=args.gpuid,
                                 push_model=not args.no_push)
        elif args.cmd == 'serve':
            if (not self._stateless and
                (parent_model is None or config['modelType'] != 'release')):
                raise ValueError('serving requires a released model')
            self.serve_wrapper(config,
                               model_path,
                               args.host,
                               args.port,
                               gpuid=args.gpuid)
        elif args.cmd == 'preprocess':
            if not args.build_model:
                self.preprocess(config, storage)
            else:
                if (parent_model is not None
                        and config['modelType'] not in ('checkpoint', 'base')):
                    raise ValueError(
                        'cannot preprocess from a model that is not a training '
                        'checkpoint or a base model')
                self.preprocess_into_model(args.task_id,
                                           config,
                                           storage,
                                           args.model_storage_write,
                                           args.image,
                                           parent_model=parent_model,
                                           model_path=model_path,
                                           push_model=not args.no_push)
Exemple #4
0
class Utility(object):
    """Base class for utilities."""
    def __init__(self):
        self._corpus_dir = os.getenv('CORPUS_DIR', '/root/corpus')
        workspace_dir = os.getenv('WORKSPACE_DIR', '/root/workspace')
        self._output_dir = os.path.join(workspace_dir, 'output')
        self._data_dir = os.path.join(workspace_dir, 'data')
        self._shared_dir = os.path.join(workspace_dir, 'shared')
        self._tmp_dir = os.path.join(workspace_dir, 'tmp')
        if not os.path.exists(self._output_dir):
            os.makedirs(self._output_dir)
        if not os.path.exists(self._data_dir):
            os.makedirs(self._data_dir)
        if not os.path.exists(self._shared_dir):
            os.makedirs(self._shared_dir)
        if not os.path.exists(self._tmp_dir):
            os.makedirs(self._tmp_dir)

    @property
    @abc.abstractmethod
    def name(self):
        raise NotImplementedError()

    @abc.abstractmethod
    def declare_arguments(self, parser):
        raise NotImplementedError()

    @abc.abstractmethod
    def exec_function(self, args):
        """Launch the utility with provided params
        """
        raise NotImplementedError()

    def run(self, args=None):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument(
            '-s',
            '--storage_config',
            default=None,
            help=
            ('Configuration of available storages as a file or a JSON string. '
             'Setting "-" will read from the standard input.'))
        parser.add_argument('-t',
                            '--task_id',
                            default=None,
                            help="Identifier of this run.")
        parser.add_argument(
            '-i',
            '--image',
            default="?",
            help="Full URL (registry/image:tag) of the image used for this run."
        )
        parser.add_argument('-b',
                            '--beat_url',
                            default=None,
                            help=("Endpoint that listens to beat requests "
                                  "(push notifications of activity)."))
        parser.add_argument('-bi',
                            '--beat_interval',
                            default=30,
                            type=int,
                            help="Interval of beat requests in seconds.")
        parser.add_argument(
            '--statistics_url',
            default=None,
            help=('Endpoint that listens to statistics summaries generated '
                  'at the end of the execution'))

        parser.add_argument(
            '-ms',
            '--model_storage',
            default=None,
            help='Model storage in the form <storage_id>:[<path>].')
        parser.add_argument(
            '-msr',
            '--model_storage_read',
            default=None,
            help=(
                'Model storage to read from, in the form <storage_id>:[<path>] '
                '(defaults to model_storage).'))
        parser.add_argument(
            '-msw',
            '--model_storage_write',
            default=None,
            help=(
                'Model storage to write to, in the form <storage_id>:[<path>] '
                '(defaults to model_storage).'))
        parser.add_argument(
            '-c',
            '--config',
            default=None,
            help=('Configuration as a file or a JSON string. '
                  'Setting "-" will read from the standard input.'))
        parser.add_argument('-m',
                            '--model',
                            default=None,
                            help='Model to load.')
        parser.add_argument(
            '-g',
            '--gpuid',
            default="0",
            help=
            "Comma-separated list of 1-indexed GPU identifiers (0 for CPU).")
        parser.add_argument('--no_push',
                            default=False,
                            action='store_true',
                            help='Do not push model.')

        self.declare_arguments(parser)
        args = parser.parse_args(args=args)

        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        self._task_id = args.task_id
        self._image = args.image

        start_beat_service(os.uname()[1],
                           args.beat_url,
                           args.task_id,
                           interval=args.beat_interval)

        self._storage = StorageClient(tmp_dir=self._tmp_dir,
                                      config=load_config(args.storage_config)
                                      if args.storage_config else None)

        if args.model_storage_read is None:
            args.model_storage_read = args.model_storage
        if args.model_storage_write is None:
            args.model_storage_write = args.model_storage

        self._model_storage_read = args.model_storage_read
        self._model_storage_write = args.model_storage_write

        # for backward compatibility - convert singleton in int
        args.gpuid = args.gpuid.split(',')
        args.gpuid = [int(g) for g in args.gpuid]
        if len(args.gpuid) == 1:
            args.gpuid = args.gpuid[0]

        self._gpuid = args.gpuid

        self._config = load_config(
            args.config) if args.config is not None else None
        self._model = args.model
        self._no_push = args.no_push

        logger.info('Starting executing utility %s=%s', self.name, args.image)
        start_time = time.time()
        stats = self.exec_function(args)
        end_time = time.time()
        logger.info('Finished executing utility in %s seconds',
                    str(end_time - start_time))

        if args.statistics_url is not None:
            requests.post(args.statistics_url,
                          json={
                              'task_id': self._task_id,
                              'start_time': start_time,
                              'end_time': end_time,
                              'statistics': stats or {}
                          })

    def _merge_multi_training_files(self, data_path, train_dir, source,
                                    target):
        merged_dir = os.path.join(self._data_dir, 'merged')
        if not os.path.exists(merged_dir):
            os.mkdir(merged_dir)
        merged_path = os.path.join(merged_dir, train_dir)
        logger.info('Merging training data to %s/train.{%s,%s}', merged_path,
                    source, target)
        data.merge_files_in_directory(data_path, merged_path, source, target)
        return merged_path

    def convert_to_local_file(self, nextval):
        new_val = []
        for val in nextval:
            inputs = val.split(',')
            local_inputs = []
            for remote_input in inputs:
                local_input = os.path.join(
                    self._data_dir,
                    self._storage.split(remote_input)[-1])
                self._storage.get_file(remote_input, local_input)
                local_inputs.append(local_input)
            new_val.append(','.join(local_inputs))
        return new_val
    def run(self):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument(
            '-c',
            '--config',
            default=None,
            help=('Configuration as a file or a JSON string. '
                  'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-s',
            '--storage_config',
            default=None,
            help=
            ('Configuration of available storages as a file or a JSON string. '
             'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-ms',
            '--model_storage',
            required=not self._stateless,
            help='Model storage in the form <storage_id>:[<path>].')
        parser.add_argument('-m',
                            '--model',
                            default=None,
                            help='Model to load.')
        parser.add_argument('-g',
                            '--gpuid',
                            default=0,
                            type=int,
                            help="1-indexed GPU identifier (0 for CPU).")
        parser.add_argument('-t',
                            '--task_id',
                            default=None,
                            help="Identifier of this run.")
        parser.add_argument(
            '-i',
            '--image',
            default="?",
            help="Full URL (registry/image:tag) of the image used for this run."
        )
        parser.add_argument('-b',
                            '--beat_url',
                            default=None,
                            help=("Endpoint that listens to beat requests "
                                  "(push notifications of activity)."))
        parser.add_argument('-bi',
                            '--beat_interval',
                            default=30,
                            type=int,
                            help="Interval of beat requests in seconds.")

        subparsers = parser.add_subparsers(help='Run type', dest='cmd')
        parser_train = subparsers.add_parser('train', help='Run a training.')

        parser_trans = subparsers.add_parser('trans',
                                             help='Run a translation.')
        parser_trans.add_argument('-i',
                                  '--input',
                                  required=True,
                                  help='Input file.')
        parser_trans.add_argument('-o',
                                  '--output',
                                  required=True,
                                  help='Output file.')

        args = parser.parse_args()
        if args.config is None and args.model is None:
            parser.error(
                'at least one of --config or --model options must be set')
        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        start_beat_service(os.uname()[1],
                           args.beat_url,
                           args.task_id,
                           interval=args.beat_interval)

        config = load_config(args.config) if args.config is not None else {}
        parent_model = args.model or config.get('model')

        storage = StorageClient(config=load_config(args.storage_config)
                                if args.storage_config else None)

        if parent_model is not None and not self._stateless:
            # Download model locally and merge the configuration.
            remote_model_path = storage.join(args.model_storage, parent_model)
            model_path = os.path.join(self._models_dir, parent_model)
            fetch_model(storage, remote_model_path, model_path)
            with open(os.path.join(model_path, 'config.json'),
                      'r') as config_file:
                model_config = json.load(config_file)
            config = merge_config(model_config, config)
        else:
            model_path = None

        if args.cmd == 'train':
            self.train_wrapper(args.task_id,
                               config,
                               storage,
                               args.model_storage,
                               args.image,
                               model_path=model_path,
                               gpuid=args.gpuid)
        elif parent_model is None:
            raise ValueError('translation requires a model')
        elif args.cmd == 'trans':
            self.trans_wrapper(config,
                               model_path,
                               storage,
                               args.input,
                               args.output,
                               gpuid=args.gpuid)