def main(): parser = argparse.ArgumentParser() parser.add_argument('-c', '--config', default=None, required=True, help='Storages configuration file.') parser.add_argument('--info', '-v', action='store_true', help='info mode') parser.add_argument('--verbose', '-vv', action='store_true', help='verbose mode') subparsers = parser.add_subparsers(help='command help', dest='cmd') subparsers.required = True parser_list = subparsers.add_parser('list', help='list file on a storage') parser_list.add_argument('--recursive', '-r', action='store_true', help='recursive listing') parser_list.add_argument('storage', type=resolvedpath, help='path to list') parser_get = subparsers.add_parser('get', help='download a file or directory') parser_get.add_argument('storage', type=resolvedpath, help='path to file or directory to download, directory must ends with /') parser_get.add_argument('local', type=str, help='local path') parser_get = subparsers.add_parser('push', help='upload a file or directory') parser_get.add_argument('local', type=str, help='local path to file or directory to upload') parser_get.add_argument('storage', type=resolvedpath, help='remote path') parser_stat = subparsers.add_parser('stat', help='returns stat on a remote file/directory') parser_stat.add_argument('storage', type=resolvedpath, help='remote path') args = parser.parse_args() if args.info: logging.basicConfig(level=logging.INFO) if args.verbose: logging.basicConfig(level=logging.DEBUG) with open(args.config) as jsonf: config = json.load(jsonf) # support configuration from automatic tests if 'storages' in config: config = config['storages'] client = StorageClient(config=config) if args.cmd == "list": listdir = client.listdir(args.storage, args.recursive) for k in sorted(listdir.keys()): if listdir[k].get("is_dir"): print("dir", k) else: date = datetime.fromtimestamp(listdir[k]["last_modified"]) print(" ", "%10d" % listdir[k]["size"], date.strftime("%Y-%m-%dT%H:%M:%S"), k) elif args.cmd == "get": directory = args.storage.endswith('/') if directory: if os.path.isfile(args.local): raise ValueError("%s should be a directory", args.local) client.get_directory(args.storage, args.local) else: client.get_file(args.storage, args.local) elif args.cmd == "push": client.push(args.local, args.storage) elif args.cmd == "stat": print(client.stat(args.storage))
class Utility(object): """Base class for utilities.""" def __init__(self): self._corpus_dir = os.getenv('CORPUS_DIR') workspace_dir = os.getenv('WORKSPACE_DIR', '/root/workspace') self._output_dir = os.path.join(workspace_dir, 'output') self._data_dir = os.path.join(workspace_dir, 'data') self._shared_dir = os.path.join(workspace_dir, 'shared') self._tmp_dir = tempfile.mkdtemp() try: if not os.path.exists(self._output_dir): os.makedirs(self._output_dir) if not os.path.exists(self._data_dir): os.makedirs(self._data_dir) if not os.path.exists(self._shared_dir): os.makedirs(self._shared_dir) except OSError: pass @property @abc.abstractmethod def name(self): raise NotImplementedError() @abc.abstractmethod def declare_arguments(self, parser): raise NotImplementedError() @abc.abstractmethod def exec_function(self, args): """Launch the utility with provided params """ raise NotImplementedError() def run(self, args=None): """Main entrypoint.""" parser = argparse.ArgumentParser() parser.add_argument('-s', '--storage_config', default=None, help=('Configuration of available storages as a file or a JSON string. ' 'Setting "-" will read from the standard input.')) parser.add_argument('-t', '--task_id', default=None, help="Identifier of this run.") parser.add_argument('-i', '--image', default="?", help="Full URL (registry/image:tag) of the image used for this run.") parser.add_argument('-b', '--beat_url', default=None, help=("Endpoint that listens to beat requests " "(push notifications of activity).")) parser.add_argument('-bi', '--beat_interval', default=30, type=int, help="Interval of beat requests in seconds.") parser.add_argument('--statistics_url', default=None, help=('Endpoint that listens to statistics summaries generated ' 'at the end of the execution')) parser.add_argument('-ms', '--model_storage', default=os.environ["MODELS_DIR"], help='Model storage in the form <storage_id>:[<path>].') parser.add_argument('-msr', '--model_storage_read', default=None, help=('Model storage to read from, in the form <storage_id>:[<path>] ' '(defaults to model_storage).')) parser.add_argument('-msw', '--model_storage_write', default=None, help=('Model storage to write to, in the form <storage_id>:[<path>] ' '(defaults to model_storage).')) parser.add_argument('-c', '--config', default=None, help=('Configuration as a file or a JSON string. ' 'Setting "-" will read from the standard input.')) parser.add_argument('--config_update_mode', choices=['default', 'merge', 'replace'], default='default', help=('How to update the parent task configuration with the given ' 'configuration. ' '"default": automatic mode based on the configuration, ' '"merge": recursively update configuration fields, ' '"replace": replace the top-most fields.')) parser.add_argument('-m', '--model', default=None, help='Model to load.') parser.add_argument('-g', '--gpuid', default="0", help="Comma-separated list of 0-indexed GPU identifiers.") parser.add_argument('--no_push', default=False, action='store_true', help='Do not push model.') self.declare_arguments(parser) args = parser.parse_args(args=args) if args.task_id is None: args.task_id = str(uuid.uuid4()) self._task_id = args.task_id self._image = args.image start_beat_service( os.uname()[1], args.beat_url, args.task_id, interval=args.beat_interval) self._storage = StorageClient( config=load_config(args.storage_config) if args.storage_config else None) if args.model_storage_read is None: args.model_storage_read = args.model_storage if args.model_storage_write is None: args.model_storage_write = args.model_storage self._model_storage_read = args.model_storage_read self._model_storage_write = args.model_storage_write # for backward compatibility - convert singleton in int args.gpuid = args.gpuid.split(',') args.gpuid = [int(g) for g in args.gpuid] if len(args.gpuid) == 1: args.gpuid = args.gpuid[0] self._gpuid = args.gpuid self._config = load_config(args.config) if args.config is not None else None self._model = args.model self._no_push = args.no_push logger.info('Starting executing utility %s=%s', self.name, args.image) start_time = time.time() stats = self.exec_function(args) end_time = time.time() logger.info('Finished executing utility in %s seconds', str(end_time-start_time)) if args.statistics_url is not None: requests.post(args.statistics_url, json={ 'task_id': self._task_id, 'start_time': start_time, 'end_time': end_time, 'statistics': stats or {} }) def convert_to_local_file(self, nextval, is_dir = False): new_val = [] for val in nextval: inputs = val.split(',') local_inputs = [] for remote_input in inputs: local_input = os.path.join(self._data_dir, self._storage.split(remote_input)[-1]) if is_dir: self._storage.get_directory(remote_input, local_input) else: self._storage.get_file(remote_input, local_input) local_inputs.append(local_input) new_val.append(','.join(local_inputs)) return new_val
def main(): parser = argparse.ArgumentParser() parser.add_argument('-c', '--config', default=None, required=True, help='Storages configuration file.') parser.add_argument('--info', '-v', action='store_true', help='info mode') parser.add_argument('--verbose', '-vv', action='store_true', help='verbose mode') subparsers = parser.add_subparsers(help='command help', dest='cmd') subparsers.required = True parser_list = subparsers.add_parser('list', help='list file on a storage') parser_list.add_argument('--recursive', '-r', action='store_true', help='recursive listing') parser_list.add_argument('storage', type=resolvedpath, help='path to list') parser_get = subparsers.add_parser('get', help='download a file or directory') parser_get.add_argument( 'storage', type=resolvedpath, help='path to file or directory to download, directory must ends with /' ) parser_get.add_argument('local', type=str, help='local path') parser_get = subparsers.add_parser('push', help='upload a file or directory') parser_get.add_argument('local', type=str, help='local path to file or directory to upload') parser_get.add_argument( 'storage', type=resolvedpath, help='path to file or directory to download, directory must ends with /' ) parser_get = subparsers.add_parser('delete', help='delete a corpus') parser_get.add_argument( 'storage', type=resolvedpath, help='path to file or directory to download, directory must ends with /' ) parser_get.add_argument('corpusId', type=str, help='corpus id') parser_stat = subparsers.add_parser( 'stat', help='returns stat on a remote file/directory') parser_stat.add_argument( 'storage', type=resolvedpath, help='path to file or directory to download, directory must ends with /' ) parser_get = subparsers.add_parser( 'stream_corpus_manager', help='Export a corpus in TMX(default) or biText') parser_get.add_argument( 'storage', type=resolvedpath, help='path to file or directory to download, directory must ends with /' ) parser_get.add_argument('corpusId', type=str, help='corpus id') parser_get.add_argument( 'format', type=check_format, help='Format of the corpus (application/x-tmx+xml, text/bitext)') parser_search = subparsers.add_parser( 'search', help='list corpus segments identified ' 'by corpus id') parser_search.add_argument('storage', type=resolvedpath, help='remote path') parser_search.add_argument('id', help='remote id') parser_search.add_argument('search_query', type=resolvedjson, help='query text for search') parser_search.add_argument('skip', default=0, help='number of segments skip (default 0)') parser_search.add_argument( 'limit', default=0, help='number of segments returned (default 0 meaning all)') parser_search = subparsers.add_parser( 'seg_delete', help='Delete segments identified by id') parser_search.add_argument('storage', type=resolvedpath, help='remote path') parser_search.add_argument('corpus_id', help='corpus id') parser_search.add_argument('ids', help='list segment id') parser_stream = subparsers.add_parser( 'stream', help='print out specific corpus by name') parser_stream.add_argument('storage', type=resolvedpath, help='remote path') args = parser.parse_args() if args.info: logging.basicConfig(level=logging.INFO) if args.verbose: logging.basicConfig(level=logging.DEBUG) with open(args.config) as jsonf: config = json.load(jsonf) # support configuration from automatic tests if 'storages' in config: config = config['storages'] client = StorageClient(config=config) if args.cmd == "list": listdir = client.listdir(args.storage, args.recursive) for k in sorted(listdir.keys()): if listdir[k].get("is_dir"): print("dir", k) else: date = datetime.fromtimestamp(listdir[k]["last_modified"]) if "entries" in listdir[k]: size = listdir[k]["entries"] else: size = listdir[k]["size"] print(" ", "%10d" % size, date.strftime("%Y-%m-%dT%H:%M:%S"), k) elif args.cmd == "get": directory = args.storage.endswith('/') if directory: if os.path.isfile(args.local): raise ValueError("%s should be a directory" % args.local) client.get_directory(args.storage, args.local) else: client.get_file(args.storage, args.local) elif args.cmd == "push": client.push(args.local, args.storage) elif args.cmd == "delete": client.delete_corpus_manager(args.storage, args.corpusId) elif args.cmd == "stat": print(client.stat(args.storage)) elif args.cmd == "stream_corpus_manager": byte_result = b'' for chunk in client.stream_corpus_manager(args.storage, args.corpusId, args.format): if chunk: byte_result += chunk sys.stdout.write(byte_result.decode("utf-8")) elif args.cmd == "stream": byte_result = b'' for chunk in client.stream(args.storage): if chunk: byte_result += chunk sys.stdout.write(byte_result.decode("utf-8")) elif args.cmd == "search": print( client.search(args.storage, args.id, args.search_query, args.skip, args.limit)) elif args.cmd == "seg_delete": print(client.seg_delete(args.storage, args.corpus_id, args.ids)) elif args.cmd == "seg_add": print(client.seg_add(args.storage, args.corpus_id, args.ids))