def list(self): om = get_omega(self.args) pattern = self.args.get('<pattern>') regexp = self.args.get('--regexp') or self.args.get('-E') raw = self.args.get('--raw') kwargs = dict(regexp=pattern) if regexp else dict(pattern=pattern) self.logger.info(om.models.list(raw=raw, **kwargs))
def shell(self): om = get_omega(self.args) use_ipython = False try: import IPython except: self.logger.warn("you should pip install ipython for convenience") else: use_ipython = True # ipython if use_ipython: IPython.embed(header='omegaml is available as the om variable', colors='neutral') return # default console import code try: import gnureadline except: self.logger.warn( "you should pip install gnureadline for convenience") variables = {} variables.update(locals()) shell = code.InteractiveConsole(locals=variables) shell.interact()
def get(self): om = get_omega(self.args) local = self.args['<path>'] name = self.args['<name>'] notebook = om.jobs.get(name) nbformat.write(notebook, local) self.logger.debug(local)
def status(self): om = get_omega(self.args) labels = self.args.get('labels') stats = self.args.get('stats') workers = not (labels or stats) if workers: pprint(om.runtime.workers()) elif labels: queues = om.runtime.queues() pprint({ worker: [ q.get('name') for q in details if not q.get('name').startswith('amq') ] for worker, details in queues.items() }) elif stats: stats = om.runtime.stats() pprint({ worker: { 'size': details['pool']['max-concurrency'], 'tasks': {task: count for task, count in details['total'].items()} } for worker, details in stats.items() })
def do_import(self): om = get_omega(self.args) names = self.args.get('<prefix/name>') archive = Path(self.args.get('--path', './mlops-export')) dolist = self.args.get('--list') promote = self.args.get('--promote') pattern = '|'.join(names) if (not archive.is_file() and not archive.exists() and archive.parent.exists()): archives = list(archive.parent.glob(f'{archive.name}*')) if len(archives) > 1: archive = Path( self.ask("Select an archive", options=archives, select=True, default=1)) assert archive.exists(), f"No mlops-export {archive} exists" self.print(f"Processing {archive}") if dolist: arc = OmegaExporter.archive(archive) self.print(list(arc.members)) else: exp = OmegaExporter(om) arcfile = exp.from_archive(archive, pattern=pattern, progressfn=print, promote=promote) self.print("Imported objects:") self.print(arcfile)
def result(self): from celery.result import AsyncResult om = get_omega(self.args) task_id = self.args.get('<taskid>') result = AsyncResult(task_id, app=om.runtime.celeryapp).get() self.logger.info(result)
def shell(self): use_ipython = False command = self.args.get('<command>') or '' try: import IPython except: self.logger.warn("you should pip install ipython for convenience") else: use_ipython = True # ipython if use_ipython: c = Config() c.InteractiveShellApp.exec_lines = [ 'from omegaml.client.util import get_omega', 'om = get_omega(shell_args)', 'print("omegaml is available as the om variable")', ] c.TerminalIPythonApp.display_banner = False argv = f'-c {command}'.split(' ') if command else [] IPython.start_ipython(argv, config=c, user_ns=dict(shell_args=self.args)) return # default console import code om = get_omega(self.args) try: import gnureadline except: self.logger.warn( "you should pip install gnureadline for convenience") variables = {} variables.update(locals()) shell = code.InteractiveConsole(locals=variables) shell.interact()
def put(self): om = get_omega(self.args) local = self.args['<path>'] name = self.args['<name>'] replace = self.args['--replace'] csvkwargs = self.parse_kwargs('--csv') # TODO introduce a puggable filetype processing backend to do this is_csv = self.args.get('--format') == 'csv' or anyext(local, '.csv') is_image = self.args.get('--format') == 'image' or anyext( local, '.png,.img,.bmp,.jpeg,.jpg,.tif,.tiff,.eps,.raw,.gif') is_binary = self.args.get('--format') == 'binary' or not (is_csv or is_image) if is_csv: # csv formats om.datasets.read_csv(local, name, append=not replace, **csvkwargs) meta = om.datasets.metadata(name) elif is_image: # images from imageio import imread with smart_open.open(local, 'rb') as fin: img = imread(fin) meta = om.datasets.put(img, name) elif is_binary: with smart_open.open(local, 'rb') as fin: meta = om.datasets.put(fin, name, append=not replace) else: meta = om.datasets.put(local, name, append=not replace) self.logger.info(meta)
def list(self): om = get_omega(self.args) raw = self.args.get('--raw', False) regexp = self.args.get('--regexp') or self.args.get('-E') pattern = self.args.get('<pattern>') kwargs = dict(regexp=pattern) if regexp else dict(pattern=pattern) entries = om.datasets.list(raw=raw, **kwargs) self.logger.info(entries)
def list(self): om = get_omega(self.args) pattern = self.args.get('<pattern>') regexp = self.args.get('--regexp') or self.args.get('-E') raw = self.args.get('--raw') hidden = self.args.get('--hidden') kwargs = dict(regexp=pattern) if regexp else dict(pattern=pattern) store = getattr(om, self.command) self.logger.info(store.list(raw=raw, hidden=hidden, **kwargs))
def put(self): from nbformat import read as nbread om = get_omega(self.args) local = self.args['<path>'] name = self.args['<name>'] with open(local, 'rb') as fin: nb = nbread(fin, as_version=4) self.logger.info(om.jobs.put(nb, name))
def job(self): om = get_omega(self.args) name = self.args.get('<name>') async = self.args.get('--async') result = om.runtime.job(name).run() if not async: self.logger.info(result.get()) else: self.logger.info(result.task_id)
def job(self): om = get_omega(self.args) name = self.args.get('<name>') is_async = self.args.get('--async') label = self.args.get('--require') result = om.runtime.require(label).job(name).run() if not is_async: self.logger.info(result.get()) else: self.logger.info(result.task_id)
def script(self): om = get_omega(self.args) name = self.args.get('<name>') async = self.args.get('--async') kwargs = self.parse_kwargs('<kw=value>') result = om.runtime.script(name).run(**kwargs) if not async: self.logger.info(result.get()) else: self.logger.info(result.task_id)
def put(self): om = get_omega(self.args) script_path = self.args.get('<path>') name = self.args.get('<name>') if os.path.exists(script_path): name = name or os.path.basename(script_path) abs_path = os.path.abspath(script_path) meta = om.scripts.put('pkg://{}'.format(abs_path), name) self.logger.info(meta) else: raise ValueError('{} is not a valid path'.format(script_path))
def log(self): import pandas as pd tail = self.args.get('-f') om = get_omega(self.args) if not tail: df = om.logger.dataset.get() with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.max_colwidth', -1): print(df[['text']]) else: om.logger.dataset.tail()
def put(self): om = get_omega(self.args) modname = self.args.get('<module.callable>') name = self.args.get('<name>') modname, modelfn = modname.rsplit('.', maxsplit=1) try: mod = import_module(modname) except: raise modelfn = getattr(mod, modelfn) model = modelfn() self.logger.info(om.models.put(model, name))
def status(self): om = get_omega(self.args) name = self.args.get('<name>') meta = om.jobs.metadata(name) attrs = meta.attributes runs = attrs.get('job_runs', []) run_at, triggers = om.jobs.get_schedule(name, only_pending=True) self.logger.info("Runs:") for run in runs: self.logger.info(" {ts} {status} ".format(**run)) self.logger.info("Next scheduled runs:") for trigger in triggers: trigger['ts'] = trigger.get('ts', '') self.logger.info(" {ts} {status} {event-kind} {event}".format(**trigger))
def serve(self): om = get_omega(self.args, require_config=False) specs = self.args.get('<rule>') specfile = self.args.get('--rules') if specfile: with open(specfile, 'r') as fin: specs = [ s.replace('\n', '') for s in fin.readlines() if not s.startswith('#') ] os.environ['OMEGA_RESTAPI_FILTER'] = ';'.join( specs) if specs else om.defaults.OMEGA_RESTAPI_FILTER subprocess.run("gunicorn 'omegaml.restapi.app:serve_objects()'", shell=True)
def config(self): om = get_omega(self.args) config_file = om.defaults.OMEGA_CONFIG_FILE if config_file is None: config_file = print( "No configuration file identified, assuming defaults") # print config restapi_url = getattr(om.defaults, 'OMEGA_RESTAPI_URL', 'not configured') runtime_url = om.runtime.celeryapp.conf['BROKER_URL'] userid = getattr(om.defaults, 'OMEGA_USERID', 'not configured') self.logger.info('Config file: {config_file}'.format(**locals())) self.logger.info('User id: {userid}'.format(**locals())) self.logger.info('REST API URL: {restapi_url}'.format(**locals())) self.logger.info('Runtime broker: {runtime_url}'.format(**locals()))
def get(self): om = get_omega(self.args) local = self.args['<path>'] name = self.args['<name>'] obj = om.datasets.get(name) csvkwargs = self.parse_kwargs('--csv', index=False) if isinstance(obj, pd.DataFrame): obj.to_csv(local, **csvkwargs) elif hasattr(obj, 'read'): with open(local, 'wb') as fout: while True: data = obj.read(1024 * 10) if not data: break fout.write(data) self.logger.debug(local)
def do_export(self): om = get_omega(self.args) names = self.args.get('<prefix/name>') archive = self.args.get('--path') or './mlops-export' compress = self.args.get('--compress', True) dolist = self.args.get('--list') if dolist: arc = OmegaExporter.archive(archive) self.print(list(arc.members)) else: exp = OmegaExporter(om) arcfile = exp.to_archive(archive, names, compress=compress, progressfn=print) self.print(arcfile)
def put(self): om = get_omega(self.args) script_path = self.args.get('<path>') name = self.args.get('<name>') as_pypi = lambda v: 'pypi://{}'.format(v) if os.path.exists(script_path): name = name or os.path.basename(script_path) abs_path = os.path.abspath(script_path) meta = om.scripts.put('pkg://{}'.format(abs_path), name) elif PythonPipSourcedPackageData.supports(script_path, name): meta = om.scripts.put(script_path, name) elif PythonPipSourcedPackageData.supports(as_pypi(script_path), name): meta = om.scripts.put(as_pypi(script_path), name) else: raise ValueError('{} is not a valid path'.format(script_path)) self.logger.info(meta)
def restart(self): import requests om = get_omega(self.args, require_config=True) name = self.args.get('<name>') insecure = self.args.get('--insecure', False) user = om.runtime.auth.userid auth = requests.auth.HTTPBasicAuth(user, om.runtime.auth.apikey) parsed = urlparse(om.defaults.OMEGA_MONGO_URL) url = f'https://{parsed.hostname}' stop = requests.get(f'{url}/apps/api/stop/{user}/{name}'.format( om.runtime.auth.userid), auth=auth, verify=not insecure) start = requests.get(f'{url}/apps/api/start/{user}/{name}'.format( om.runtime.auth.userid), auth=auth, verify=not insecure) self.logger.info(f'stop: {stop} start: {start}')
def env(self): om = get_omega(self.args) action = self.args.get('<action>') package = self.args.get('<package>') reqfile = self.args.get('--file') every = self.args.get('--every') require = self.args.get('--require') or '' if reqfile: with open(reqfile, 'rb') as fin: om.scripts.put(fin, '.system/requirements.txt') if not om.scripts.exists('.system/envinstall', hidden=True): import omegaml as om_module envinstall_path = os.path.join(os.path.dirname(om_module.__file__), 'runtimes', 'envinstall') om.scripts.put(f'pkg://{envinstall_path}', '.system/envinstall') if every: labels = om.runtime.enable_hostqueues() else: labels = require.split(',') results = [] for label in labels: result = ( om.runtime.require(label).script('.system/envinstall').run( action=action, package=package, file=reqfile, __format='python')) results.append((label, result)) all_results = om.runtime.celeryapp.ResultSet([r[1] for r in results]) from tqdm import tqdm with tqdm() as progress: while all_results.waiting(): progress.update(1) sleep(1) all_results.get() for label, result in results: if label: print(f'** result of worker require={label}:') data = result.get() # resolve AsyncResult => dict print(str(data.get('result', data))) # get actual result object, pip stdout
def put(self): om = get_omega(self.args) local = self.args['<path>'] name = self.args['<name>'] replace = self.args['--replace'] csvkwargs = self.parse_kwargs('--csv') # TODO introduce a puggable filetype processing backend to do this if local.endswith('.csv'): # csv formats import pandas as pd data = pd.read_csv(local, **csvkwargs) meta = om.datasets.put(data, name, append=not replace) elif imghdr.what(local): # images from imageio import imread img = imread(local) meta = om.datasets.put(img, name) else: meta = self.logger.info( om.datasets.put(local, name, append=not replace)) self.logger.info(meta)
def model(self): om = get_omega(self.args) name = self.args.get('<name>') action = self.args.get('<model-action>') is_async = self.args.get('--async') kwargs_lst = self.args.get('--param') output = self.args.get('--result') label = self.args.get('--require') X = self._ensure_valid_XY(self.args.get('<X>')) Y = self._ensure_valid_XY(self.args.get('<Y>')) # parse the list of kw=value values # e.g. key1=val1 key2=val2 => kwargs_lst = ['key1=val1', 'key2=val2'] # => kw_dct = { 'key1': eval('val1'), 'key2': eval('val2') } kv_dct = {} for kv in kwargs_lst: k, v = kv.split('=', 1) kv_dct[k] = eval(v) kwargs = {} if action in ('predict', 'predict_proba', 'decision_function', 'transform'): # actions that take rName, but no Y kwargs['rName'] = output else: # actions that take Y, but no rName kwargs['Yname'] = Y if action == 'gridsearch': kwargs['parameters'] = kv_dct rt_model = om.runtime.require(label).model(name) meth = getattr(rt_model, action, None) if meth is not None: result = meth(X, **kwargs) if not is_async: self.logger.info(result.get()) else: self.logger.info(result) return raise ValueError( '{action} is not applicable to {name}'.format(**locals()))
def celery(self, action=None): om = get_omega(self.args) # giving om command here changes celery help output celery_cmds = ['om runtime celery'] if action: celery_cmds += action.split(' ') # convert omega terms into celery terms celery_opts = ( # omega term, celery term, value|flag ('--worker', '--destination', 'value'), ('--queue', '--queue', 'value'), ('--celery-help', '--help', 'flag'), ) is_r_worker = 'rworker' in self.args.get('<celery-command>') for opt, celery_opt, kind in celery_opts: if self.args.get(opt): celery_cmds += [celery_opt] if kind == 'value': celery_cmds += [self.args.get(opt)] # prepare celery command args, remove empty parts celery_cmds += self.args.get('<celery-command>') celery_cmds += (self.args.get('--flags') or '').split(' ') celery_cmds = [cmd for cmd in celery_cmds if cmd] if len(celery_cmds) == 1 + int(action is not None): celery_cmds += ['--help'] # start in-process for speed # -- disable command logging to avoid curses problems in celery events self.logger.setLevel(logging.CRITICAL + 1) if is_r_worker: # start r runtime from omegaml.runtimes import rsystem rworker = os.path.join(os.path.dirname(rsystem.__file__), 'omworker.R') rcmd = f'Rscript {rworker}'.split(' ') call(rcmd) else: # start python runtime om.runtime.celeryapp.start(celery_cmds)
def get(self): om = get_omega(self.args) local = self.args['<path>'] name = self.args['<name>'] try: # try lazy get first to suppport large dataframes obj = om.datasets.get(name, lazy=True) except: obj = om.datasets.get(name) csvkwargs = self.parse_kwargs('--csv', index=False) is_csv = self.args.get('--format') == 'csv' or hasattr( obj, 'to_csv') or anyext(local, '.csv') is_binary = self.args.get('--format') == 'binary' or hasattr( obj, 'read') if is_csv and not is_binary: obj.to_csv(local, **csvkwargs) elif is_binary: with smart_open.open(local, 'wb') as fout: while True: data = obj.read(1024 * 10) if not data: break fout.write(data) self.logger.debug(local)
def schedule(self): # FIXME this is a mess om = get_omega(self.args) name = self.args.get('<name>') at = self.args.get('--at') # get interval specs if at: hour, minute = at.split(':') else: hour = self.args.get('--hour') minute = self.args.get('--minute') weekday = self.args.get('--weekday') monthday = self.args.get('--monthday') month = self.args.get('--month') delete = self.args.get('delete') show = self.args.get('show') spec = self.args.get('--cron') next_n = self.args.get('--next') interval = self.args.get('<interval>') # by default we show if no interval is specified show = show or not any(s for s in (weekday, monthday, month, hour, minute, interval, spec)) # print current schedule and triggers run_at, triggers = om.jobs.get_schedule(name, only_pending=True) if run_at: human_sched = get_description(run_at) self.logger.info("Currently {name} is scheduled at {human_sched}".format(**locals())) if next_n: self.logger.info("Given this existing interval, next {next_n} times would be:".format(**locals())) for time in om.jobs.Schedule.from_cron(run_at).next_times(int(next_n)): self.logger.info(" {}".format(time)) else: self.logger.info("Currently {name} is not scheduled".format(**locals())) # show current triggers if triggers: trigger = triggers[-1] if trigger['status'] == 'PENDING': event = trigger['event'] self.logger.info("{name} is scheduled to run next at {event}".format(**locals())) # delete if currently scheduled if delete: if run_at or triggers: answer = self.ask("Do you want to delete this schedule?", options='Y/n', default='y') should_drop = answer.lower().startswith('y') return om.jobs.drop_schedule(name) if should_drop else None # create new schedule if not (show or delete): if interval: try: # nlp text-like spec = om.jobs.Schedule(interval).cron except Exception as e: self.logger.info(f"Cannot parse {interval}, error was {e}") raise if not spec: cron_repr = ('{0._orig_minute} {0._orig_hour} {0._orig_day_of_month} ' '{0._orig_month_of_year} {0._orig_day_of_week}') sched = om.jobs.Schedule(minute=minute or '*', hour=hour or '*', monthday=monthday or '*', weekday=weekday or '*', month=month or '*') cron_sched = sched.cron else: cron_sched = spec human_sched = get_description(cron_sched) if next_n: self.logger.info("Given this new interval, next {next_n} times would be:".format(**locals())) for time in om.jobs.Schedule.from_cron(cron_sched).next_times(int(next_n)): self.logger.info(" {}".format(time)) text = "Do you want to schedule {name} at {human_sched}?".format(**locals()) answer = self.ask(text, options="Y/n", default='y') if answer.lower().startswith('n'): self.logger.info('Ok, not scheduled. Try again.') return self.logger.info('{name} will be scheduled to run {human_sched}'.format(**locals())) om.jobs.schedule(name, run_at=cron_sched, last_run=datetime.datetime.now())