Python StorageClient Examples

Programming Language: Python

Namespace/Package Name: nmtwizard.storage

Class/Type: StorageClient

Examples at hotexamples.com: 5

Python StorageClient - 5 examples found. These are the top rated real world Python examples of nmtwizard.storage.StorageClient extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

StorageClient(3)

join(3)

get_directory(1)

get_file(1)

split(1)

Example #1

Show file

    def run(self):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument(
            '-c',
            '--config',
            default=None,
            help=('Configuration as a file or a JSON string. '
                  'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-s',
            '--storage_config',
            default=None,
            help=
            ('Configuration of available storages as a file or a JSON string. '
             'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-ms',
            '--model_storage',
            help='Model storage in the form <storage_id>:[<path>].')
        parser.add_argument('-m',
                            '--model',
                            default=None,
                            help='Model to load.')
        parser.add_argument(
            '-g',
            '--gpuid',
            default="0",
            help=
            "Comma-separated list of 1-indexed GPU identifiers (0 for CPU).")
        parser.add_argument('-t',
                            '--task_id',
                            default=None,
                            help="Identifier of this run.")
        parser.add_argument(
            '-i',
            '--image',
            default="?",
            help="Full URL (registry/image:tag) of the image used for this run."
        )
        parser.add_argument('-b',
                            '--beat_url',
                            default=None,
                            help=("Endpoint that listens to beat requests "
                                  "(push notifications of activity)."))
        parser.add_argument('-bi',
                            '--beat_interval',
                            default=30,
                            type=int,
                            help="Interval of beat requests in seconds.")

        subparsers = parser.add_subparsers(help='Run type', dest='cmd')
        parser_train = subparsers.add_parser('train', help='Run a training.')

        parser_trans = subparsers.add_parser('trans',
                                             help='Run a translation.')
        parser_trans.add_argument('-i',
                                  '--input',
                                  required=True,
                                  help='Input file.')
        parser_trans.add_argument('-o',
                                  '--output',
                                  required=True,
                                  help='Output file.')

        parser_serve = subparsers.add_parser('serve', help='Serve a model.')
        parser_serve.add_argument('-hs',
                                  '--host',
                                  default="0.0.0.0",
                                  help='Serving hostname.')
        parser_serve.add_argument('-p',
                                  '--port',
                                  type=int,
                                  default=4000,
                                  help='Serving port.')

        parser.build_vocab = subparsers.add_parser(
            'preprocess', help='Sample and preprocess corpus.')

        args = parser.parse_args()
        if args.config is None and args.model is None:
            parser.error(
                'at least one of --config or --model options must be set')
        if not self._stateless and args.cmd != 'preprocess' and not args.model_storage:
            parser.error('argument -ms/--model_storage is required')
        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        # for backward compatibility - convert singleton in int
        args.gpuid = args.gpuid.split(',')
        args.gpuid = [int(g) for g in args.gpuid]
        if len(args.gpuid) == 1:
            args.gpuid = args.gpuid[0]

        start_beat_service(os.uname()[1],
                           args.beat_url,
                           args.task_id,
                           interval=args.beat_interval)

        config = load_config(args.config) if args.config is not None else {}
        parent_model = args.model or config.get('model')

        storage = StorageClient(tmp_dir=self._tmp_dir,
                                config=load_config(args.storage_config)
                                if args.storage_config else None)

        if parent_model is not None and not self._stateless:
            # Download model locally and merge the configuration.
            remote_model_path = storage.join(args.model_storage, parent_model)
            model_path = os.path.join(self._models_dir, parent_model)
            fetch_model(storage, remote_model_path, model_path)
            with open(os.path.join(model_path, 'config.json'),
                      'r') as config_file:
                model_config = json.load(config_file)
            config = merge_config(model_config, config)
        else:
            model_path = None

        if args.cmd == 'train':
            self.train_wrapper(args.task_id,
                               config,
                               storage,
                               args.model_storage,
                               args.image,
                               parent_model=parent_model,
                               model_path=model_path,
                               gpuid=args.gpuid)
        elif args.cmd == 'trans':
            if parent_model is None:
                raise ValueError('translation requires a model')
            self.trans_wrapper(config,
                               model_path,
                               storage,
                               args.input,
                               args.output,
                               gpuid=args.gpuid)
        elif args.cmd == 'serve':
            if parent_model is None:
                raise ValueError('serving requires a model')
            self.serve_wrapper(config,
                               model_path,
                               args.host,
                               args.port,
                               gpuid=args.gpuid)
        elif args.cmd == 'preprocess':
            self.preprocess(config, storage)

Example #2

Show file

    def run(self, args=None):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument(
            '-s',
            '--storage_config',
            default=None,
            help=
            ('Configuration of available storages as a file or a JSON string. '
             'Setting "-" will read from the standard input.'))
        parser.add_argument('-t',
                            '--task_id',
                            default=None,
                            help="Identifier of this run.")
        parser.add_argument(
            '-i',
            '--image',
            default="?",
            help="Full URL (registry/image:tag) of the image used for this run."
        )
        parser.add_argument('-b',
                            '--beat_url',
                            default=None,
                            help=("Endpoint that listens to beat requests "
                                  "(push notifications of activity)."))
        parser.add_argument('-bi',
                            '--beat_interval',
                            default=30,
                            type=int,
                            help="Interval of beat requests in seconds.")
        parser.add_argument(
            '--statistics_url',
            default=None,
            help=('Endpoint that listens to statistics summaries generated '
                  'at the end of the execution'))

        parser.add_argument(
            '-ms',
            '--model_storage',
            default=None,
            help='Model storage in the form <storage_id>:[<path>].')
        parser.add_argument(
            '-msr',
            '--model_storage_read',
            default=None,
            help=(
                'Model storage to read from, in the form <storage_id>:[<path>] '
                '(defaults to model_storage).'))
        parser.add_argument(
            '-msw',
            '--model_storage_write',
            default=None,
            help=(
                'Model storage to write to, in the form <storage_id>:[<path>] '
                '(defaults to model_storage).'))
        parser.add_argument(
            '-c',
            '--config',
            default=None,
            help=('Configuration as a file or a JSON string. '
                  'Setting "-" will read from the standard input.'))
        parser.add_argument('-m',
                            '--model',
                            default=None,
                            help='Model to load.')
        parser.add_argument(
            '-g',
            '--gpuid',
            default="0",
            help=
            "Comma-separated list of 1-indexed GPU identifiers (0 for CPU).")
        parser.add_argument('--no_push',
                            default=False,
                            action='store_true',
                            help='Do not push model.')

        self.declare_arguments(parser)
        args = parser.parse_args(args=args)

        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        self._task_id = args.task_id
        self._image = args.image

        start_beat_service(os.uname()[1],
                           args.beat_url,
                           args.task_id,
                           interval=args.beat_interval)

        self._storage = StorageClient(tmp_dir=self._tmp_dir,
                                      config=load_config(args.storage_config)
                                      if args.storage_config else None)

        if args.model_storage_read is None:
            args.model_storage_read = args.model_storage
        if args.model_storage_write is None:
            args.model_storage_write = args.model_storage

        self._model_storage_read = args.model_storage_read
        self._model_storage_write = args.model_storage_write

        # for backward compatibility - convert singleton in int
        args.gpuid = args.gpuid.split(',')
        args.gpuid = [int(g) for g in args.gpuid]
        if len(args.gpuid) == 1:
            args.gpuid = args.gpuid[0]

        self._gpuid = args.gpuid

        self._config = load_config(
            args.config) if args.config is not None else None
        self._model = args.model
        self._no_push = args.no_push

        logger.info('Starting executing utility %s=%s', self.name, args.image)
        start_time = time.time()
        stats = self.exec_function(args)
        end_time = time.time()
        logger.info('Finished executing utility in %s seconds',
                    str(end_time - start_time))

        if args.statistics_url is not None:
            requests.post(args.statistics_url,
                          json={
                              'task_id': self._task_id,
                              'start_time': start_time,
                              'end_time': end_time,
                              'statistics': stats or {}
                          })

Example #3

Show file

File: framework.py Project: yuhonghong66/nmt-wizard-docker

    def run(self, args=None):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument(
            '-c',
            '--config',
            default=None,
            help=('Configuration as a file or a JSON string. '
                  'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-s',
            '--storage_config',
            default=None,
            help=
            ('Configuration of available storages as a file or a JSON string. '
             'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-ms',
            '--model_storage',
            default=None,
            help='Model storage in the form <storage_id>:[<path>].')
        parser.add_argument(
            '-msr',
            '--model_storage_read',
            default=None,
            help=(
                'Model storage to read from, in the form <storage_id>:[<path>] '
                '(defaults to model_storage).'))
        parser.add_argument(
            '-msw',
            '--model_storage_write',
            default=None,
            help=(
                'Model storage to write to, in the form <storage_id>:[<path>] '
                '(defaults to model_storage).'))
        parser.add_argument('-m',
                            '--model',
                            default=None,
                            help='Model to load.')
        parser.add_argument(
            '-g',
            '--gpuid',
            default="0",
            help=
            "Comma-separated list of 1-indexed GPU identifiers (0 for CPU).")
        parser.add_argument('-t',
                            '--task_id',
                            default=None,
                            help="Identifier of this run.")
        parser.add_argument(
            '-i',
            '--image',
            default="?",
            help="Full URL (registry/image:tag) of the image used for this run."
        )
        parser.add_argument('-b',
                            '--beat_url',
                            default=None,
                            help=("Endpoint that listens to beat requests "
                                  "(push notifications of activity)."))
        parser.add_argument('-bi',
                            '--beat_interval',
                            default=30,
                            type=int,
                            help="Interval of beat requests in seconds.")
        parser.add_argument('--no_push',
                            default=False,
                            action='store_true',
                            help='Do not push model.')

        subparsers = parser.add_subparsers(help='Run type', dest='cmd')
        parser_train = subparsers.add_parser('train', help='Run a training.')

        parser_trans = subparsers.add_parser('trans',
                                             help='Run a translation.')
        parser_trans.add_argument('-i',
                                  '--input',
                                  required=True,
                                  nargs='+',
                                  help='Input file.')
        parser_trans.add_argument('-o',
                                  '--output',
                                  required=True,
                                  nargs='+',
                                  help='Output file.')
        parser_trans.add_argument('--as_release',
                                  default=False,
                                  action='store_true',
                                  help='Translate from a released model.')

        parser_release = subparsers.add_parser(
            'release', help='Release a model for serving.')
        parser_release.add_argument(
            '-d',
            '--destination',
            default=None,
            help='Released model storage (defaults to the model storage).')

        parser_serve = subparsers.add_parser('serve', help='Serve a model.')
        parser_serve.add_argument('-hs',
                                  '--host',
                                  default="0.0.0.0",
                                  help='Serving hostname.')
        parser_serve.add_argument('-p',
                                  '--port',
                                  type=int,
                                  default=4000,
                                  help='Serving port.')

        parser_preprocess = subparsers.add_parser(
            'preprocess', help='Sample and preprocess corpus.')
        parser_preprocess.add_argument('--build_model',
                                       default=False,
                                       action='store_true',
                                       help='Preprocess data into a model.')
        parser.build_vocab = subparsers.add_parser('buildvocab',
                                                   help='Build vocabularies.')

        args = parser.parse_args(args=args)
        if args.config is None and args.model is None:
            parser.error(
                'at least one of --config or --model options must be set')
        if args.model_storage_read is None:
            args.model_storage_read = args.model_storage
        if args.model_storage_write is None:
            args.model_storage_write = args.model_storage
        if (not self._stateless
                and (args.cmd != 'preprocess' or args.build_model)
                and (args.model_storage_write is None
                     or args.model_storage_write is None)):
            parser.error('Missing model storage argument')
        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        # for backward compatibility - convert singleton in int
        args.gpuid = args.gpuid.split(',')
        args.gpuid = [int(g) for g in args.gpuid]
        if len(args.gpuid) == 1:
            args.gpuid = args.gpuid[0]

        start_beat_service(os.uname()[1],
                           args.beat_url,
                           args.task_id,
                           interval=args.beat_interval)

        config = load_config(args.config) if args.config is not None else {}
        parent_model = args.model or config.get('model')

        storage = StorageClient(tmp_dir=self._tmp_dir,
                                config=load_config(args.storage_config)
                                if args.storage_config else None)

        if parent_model is not None and not self._stateless:
            # Download model locally and merge the configuration.
            remote_model_path = storage.join(args.model_storage_read,
                                             parent_model)
            model_path = os.path.join(self._models_dir, parent_model)
            fetch_model(storage, remote_model_path, model_path)
            with open(os.path.join(model_path, 'config.json'),
                      'r') as config_file:
                model_config = json.load(config_file)
            if 'modelType' not in model_config:
                if parent_model.endswith('_release'):
                    model_config['modelType'] = 'release'
                else:
                    model_config['modelType'] = 'checkpoint'
            config = merge_config(copy.deepcopy(model_config), config)
        else:
            model_path = None
            model_config = None

        if args.cmd == 'train':
            if (parent_model is not None and config['modelType']
                    not in ('checkpoint', 'base', 'preprocess')):
                raise ValueError(
                    'cannot train from a model that is not a training checkpoint, '
                    'a base model, or a preprocess model')
            self.train_wrapper(args.task_id,
                               config,
                               storage,
                               args.model_storage_write,
                               args.image,
                               parent_model=parent_model,
                               model_path=model_path,
                               model_config=model_config,
                               gpuid=args.gpuid,
                               push_model=not args.no_push)
        elif args.cmd == 'buildvocab':
            self.build_vocab(args.task_id,
                             config,
                             storage,
                             args.model_storage_write,
                             args.image,
                             push_model=not args.no_push)
        elif args.cmd == 'trans':
            if (not self._stateless and
                (parent_model is None or config['modelType'] != 'checkpoint')):
                raise ValueError('translation requires a training checkpoint')
            self.trans_wrapper(config,
                               model_path,
                               storage,
                               args.input,
                               args.output,
                               as_release=args.as_release,
                               gpuid=args.gpuid)
        elif args.cmd == 'release':
            if (not self._stateless and
                (parent_model is None or config['modelType'] != 'checkpoint')):
                raise ValueError('releasing requires a training checkpoint')
            if args.destination is None:
                args.destination = args.model_storage_write
            self.release_wrapper(config,
                                 model_path,
                                 storage,
                                 args.image,
                                 args.destination,
                                 gpuid=args.gpuid,
                                 push_model=not args.no_push)
        elif args.cmd == 'serve':
            if (not self._stateless and
                (parent_model is None or config['modelType'] != 'release')):
                raise ValueError('serving requires a released model')
            self.serve_wrapper(config,
                               model_path,
                               args.host,
                               args.port,
                               gpuid=args.gpuid)
        elif args.cmd == 'preprocess':
            if not args.build_model:
                self.preprocess(config, storage)
            else:
                if (parent_model is not None
                        and config['modelType'] not in ('checkpoint', 'base')):
                    raise ValueError(
                        'cannot preprocess from a model that is not a training '
                        'checkpoint or a base model')
                self.preprocess_into_model(args.task_id,
                                           config,
                                           storage,
                                           args.model_storage_write,
                                           args.image,
                                           parent_model=parent_model,
                                           model_path=model_path,
                                           push_model=not args.no_push)

Example #4

Show file

class Utility(object):
    """Base class for utilities."""
    def __init__(self):
        self._corpus_dir = os.getenv('CORPUS_DIR', '/root/corpus')
        workspace_dir = os.getenv('WORKSPACE_DIR', '/root/workspace')
        self._output_dir = os.path.join(workspace_dir, 'output')
        self._data_dir = os.path.join(workspace_dir, 'data')
        self._shared_dir = os.path.join(workspace_dir, 'shared')
        self._tmp_dir = os.path.join(workspace_dir, 'tmp')
        if not os.path.exists(self._output_dir):
            os.makedirs(self._output_dir)
        if not os.path.exists(self._data_dir):
            os.makedirs(self._data_dir)
        if not os.path.exists(self._shared_dir):
            os.makedirs(self._shared_dir)
        if not os.path.exists(self._tmp_dir):
            os.makedirs(self._tmp_dir)

    @property
    @abc.abstractmethod
    def name(self):
        raise NotImplementedError()

    @abc.abstractmethod
    def declare_arguments(self, parser):
        raise NotImplementedError()

    @abc.abstractmethod
    def exec_function(self, args):
        """Launch the utility with provided params
        """
        raise NotImplementedError()

    def run(self, args=None):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument(
            '-s',
            '--storage_config',
            default=None,
            help=
            ('Configuration of available storages as a file or a JSON string. '
             'Setting "-" will read from the standard input.'))
        parser.add_argument('-t',
                            '--task_id',
                            default=None,
                            help="Identifier of this run.")
        parser.add_argument(
            '-i',
            '--image',
            default="?",
            help="Full URL (registry/image:tag) of the image used for this run."
        )
        parser.add_argument('-b',
                            '--beat_url',
                            default=None,
                            help=("Endpoint that listens to beat requests "
                                  "(push notifications of activity)."))
        parser.add_argument('-bi',
                            '--beat_interval',
                            default=30,
                            type=int,
                            help="Interval of beat requests in seconds.")
        parser.add_argument(
            '--statistics_url',
            default=None,
            help=('Endpoint that listens to statistics summaries generated '
                  'at the end of the execution'))

        parser.add_argument(
            '-ms',
            '--model_storage',
            default=None,
            help='Model storage in the form <storage_id>:[<path>].')
        parser.add_argument(
            '-msr',
            '--model_storage_read',
            default=None,
            help=(
                'Model storage to read from, in the form <storage_id>:[<path>] '
                '(defaults to model_storage).'))
        parser.add_argument(
            '-msw',
            '--model_storage_write',
            default=None,
            help=(
                'Model storage to write to, in the form <storage_id>:[<path>] '
                '(defaults to model_storage).'))
        parser.add_argument(
            '-c',
            '--config',
            default=None,
            help=('Configuration as a file or a JSON string. '
                  'Setting "-" will read from the standard input.'))
        parser.add_argument('-m',
                            '--model',
                            default=None,
                            help='Model to load.')
        parser.add_argument(
            '-g',
            '--gpuid',
            default="0",
            help=
            "Comma-separated list of 1-indexed GPU identifiers (0 for CPU).")
        parser.add_argument('--no_push',
                            default=False,
                            action='store_true',
                            help='Do not push model.')

        self.declare_arguments(parser)
        args = parser.parse_args(args=args)

        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        self._task_id = args.task_id
        self._image = args.image

        start_beat_service(os.uname()[1],
                           args.beat_url,
                           args.task_id,
                           interval=args.beat_interval)

        self._storage = StorageClient(tmp_dir=self._tmp_dir,
                                      config=load_config(args.storage_config)
                                      if args.storage_config else None)

        if args.model_storage_read is None:
            args.model_storage_read = args.model_storage
        if args.model_storage_write is None:
            args.model_storage_write = args.model_storage

        self._model_storage_read = args.model_storage_read
        self._model_storage_write = args.model_storage_write

        # for backward compatibility - convert singleton in int
        args.gpuid = args.gpuid.split(',')
        args.gpuid = [int(g) for g in args.gpuid]
        if len(args.gpuid) == 1:
            args.gpuid = args.gpuid[0]

        self._gpuid = args.gpuid

        self._config = load_config(
            args.config) if args.config is not None else None
        self._model = args.model
        self._no_push = args.no_push

        logger.info('Starting executing utility %s=%s', self.name, args.image)
        start_time = time.time()
        stats = self.exec_function(args)
        end_time = time.time()
        logger.info('Finished executing utility in %s seconds',
                    str(end_time - start_time))

        if args.statistics_url is not None:
            requests.post(args.statistics_url,
                          json={
                              'task_id': self._task_id,
                              'start_time': start_time,
                              'end_time': end_time,
                              'statistics': stats or {}
                          })

    def _merge_multi_training_files(self, data_path, train_dir, source,
                                    target):
        merged_dir = os.path.join(self._data_dir, 'merged')
        if not os.path.exists(merged_dir):
            os.mkdir(merged_dir)
        merged_path = os.path.join(merged_dir, train_dir)
        logger.info('Merging training data to %s/train.{%s,%s}', merged_path,
                    source, target)
        data.merge_files_in_directory(data_path, merged_path, source, target)
        return merged_path

    def convert_to_local_file(self, nextval):
        new_val = []
        for val in nextval:
            inputs = val.split(',')
            local_inputs = []
            for remote_input in inputs:
                local_input = os.path.join(
                    self._data_dir,
                    self._storage.split(remote_input)[-1])
                self._storage.get_file(remote_input, local_input)
                local_inputs.append(local_input)
            new_val.append(','.join(local_inputs))
        return new_val

Example #5

Show file

File: framework.py Project: jsenellart/nmt-wizard-docker

    def run(self):
        """Main entrypoint."""
        parser = argparse.ArgumentParser()
        parser.add_argument(
            '-c',
            '--config',
            default=None,
            help=('Configuration as a file or a JSON string. '
                  'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-s',
            '--storage_config',
            default=None,
            help=
            ('Configuration of available storages as a file or a JSON string. '
             'Setting "-" will read from the standard input.'))
        parser.add_argument(
            '-ms',
            '--model_storage',
            required=not self._stateless,
            help='Model storage in the form <storage_id>:[<path>].')
        parser.add_argument('-m',
                            '--model',
                            default=None,
                            help='Model to load.')
        parser.add_argument('-g',
                            '--gpuid',
                            default=0,
                            type=int,
                            help="1-indexed GPU identifier (0 for CPU).")
        parser.add_argument('-t',
                            '--task_id',
                            default=None,
                            help="Identifier of this run.")
        parser.add_argument(
            '-i',
            '--image',
            default="?",
            help="Full URL (registry/image:tag) of the image used for this run."
        )
        parser.add_argument('-b',
                            '--beat_url',
                            default=None,
                            help=("Endpoint that listens to beat requests "
                                  "(push notifications of activity)."))
        parser.add_argument('-bi',
                            '--beat_interval',
                            default=30,
                            type=int,
                            help="Interval of beat requests in seconds.")

        subparsers = parser.add_subparsers(help='Run type', dest='cmd')
        parser_train = subparsers.add_parser('train', help='Run a training.')

        parser_trans = subparsers.add_parser('trans',
                                             help='Run a translation.')
        parser_trans.add_argument('-i',
                                  '--input',
                                  required=True,
                                  help='Input file.')
        parser_trans.add_argument('-o',
                                  '--output',
                                  required=True,
                                  help='Output file.')

        args = parser.parse_args()
        if args.config is None and args.model is None:
            parser.error(
                'at least one of --config or --model options must be set')
        if args.task_id is None:
            args.task_id = str(uuid.uuid4())

        start_beat_service(os.uname()[1],
                           args.beat_url,
                           args.task_id,
                           interval=args.beat_interval)

        config = load_config(args.config) if args.config is not None else {}
        parent_model = args.model or config.get('model')

        storage = StorageClient(config=load_config(args.storage_config)
                                if args.storage_config else None)

        if parent_model is not None and not self._stateless:
            # Download model locally and merge the configuration.
            remote_model_path = storage.join(args.model_storage, parent_model)
            model_path = os.path.join(self._models_dir, parent_model)
            fetch_model(storage, remote_model_path, model_path)
            with open(os.path.join(model_path, 'config.json'),
                      'r') as config_file:
                model_config = json.load(config_file)
            config = merge_config(model_config, config)
        else:
            model_path = None

        if args.cmd == 'train':
            self.train_wrapper(args.task_id,
                               config,
                               storage,
                               args.model_storage,
                               args.image,
                               model_path=model_path,
                               gpuid=args.gpuid)
        elif parent_model is None:
            raise ValueError('translation requires a model')
        elif args.cmd == 'trans':
            self.trans_wrapper(config,
                               model_path,
                               storage,
                               args.input,
                               args.output,
                               gpuid=args.gpuid)