コード例 #1
0
def stream_job_log(job):
    ids_to_name = {job.id: job.short_id}
    url = '%s/ws/jobs/%s/stream' % (get_stream_url(), job.id)
    meta = {"job_id": job.short_id}
    if util.is_tensorboard_job(job):
        meta["tensorboard_job"] = job
    LogPrinter(url, ids_to_name, stream_meta=meta).stream()
コード例 #2
0
ファイル: stream.py プロジェクト: datar-ai/cli
def stream_experiment_log(experiment, filter_job=None):
    def add_experiment_to_log(experiment):
        ids_to_name[experiment.id] = experiment.short_id
        for job in experiment.jobs:
            ids_to_name[job.id] = '{}.{}'.format(experiment.short_id, job.name)

    url = '%s/ws/experiments/%s/stream' % (get_stream_url(), experiment.id)
    ids_to_name = {}
    jobs = experiment.jobs
    add_experiment_to_log(experiment)
    for child_experiment in experiment.children:
        jobs += child_experiment.jobs
        add_experiment_to_log(child_experiment)

    horovod_gpus = max(
        [job.gpus for job in jobs if job.role == 'tf-hrvd-master'] + [0])
    horovod_processes = experiment.framework_config or {}
    horovod_processes = horovod_processes.get('horovod', {}) or {}
    horovod_processes = horovod_processes.get('workers', {}) or {}
    horovod_processes = horovod_processes.get('processes', None)
    meta = {
        "experiment_id": experiment.short_id,
        "filter_job": filter_job,
        "horovod_processes": horovod_processes or horovod_gpus or 1
    }
    if util.has_tensorboard(experiment):
        meta["tensorboard_job"] = util.tensorboard_job(experiment)
    LogPrinter(url, ids_to_name, stream_meta=meta).stream()
コード例 #3
0
ファイル: monitor.py プロジェクト: datar-ai/cli
def monitor_experiment(experiment, detailed=False, stream_meta={}):
    url = '%s/ws/experiments/%s/monitor' % (get_stream_url(), experiment.id)
    jobs = get_experiment_jobs(experiment)
    monitor_jobs(url,
                 experiment.project,
                 jobs,
                 detailed=detailed,
                 stream_meta={"experiment_id": experiment.short_id})
コード例 #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-v',
                        help="show endpoints",
                        action='store_const',
                        const=True)
    parser.add_argument('--version',
                        '-V',
                        help="show version",
                        action='version',
                        version='RiseML CLI {}'.format(VERSION))
    subparsers = parser.add_subparsers()

    # user ops
    add_whoami_parser(subparsers)
    add_user_parser(subparsers)

    # system ops
    add_system_parser(subparsers)
    add_account_parser(subparsers)

    # data ops
    add_ls_parser(subparsers)
    add_cp_parser(subparsers)
    add_rm_parser(subparsers)

    # worklow ops
    add_init_parser(subparsers)
    add_train_parser(subparsers)
    #add_exec_parser(subparsers)
    add_monitor_parser(subparsers)
    #add_deploy_parser(subparsers)
    add_logs_parser(subparsers)
    add_kill_parser(subparsers)
    add_status_parser(subparsers)

    args = parser.parse_args(sys.argv[1:])

    if args.v:
        print('api_url: %s' % get_api_url())
        print('sync_url: %s' % get_sync_url())
        print('stream_url: %s' % get_stream_url())
        print('git_url: %s' % get_git_url())

    if hasattr(args, 'run'):
        if not (config_file_exists() or args.run.__name__ == 'run_login'):
            handle_error('Client configuration file %s not found' %
                         get_config_file())
        try:
            args.run(args)
        except HTTPError as e:
            # all uncaught http errors goes here
            handle_error(str(e))
        except KeyboardInterrupt:
            print('\nAborting...')
    else:
        parser.print_usage()
コード例 #5
0
def stream_experiment_log(experiment):
    def add_experiment_to_log(experiment):
        ids_to_name[experiment.id] = experiment.short_id
        for job in experiment.jobs:
            ids_to_name[job.id] = '{}.{}'.format(experiment.short_id, job.name)

    url = '%s/ws/experiments/%s/stream' % (get_stream_url(), experiment.id)
    ids_to_name = {}
    add_experiment_to_log(experiment)
    for child_experiment in experiment.children:
        add_experiment_to_log(child_experiment)

    meta = {"experiment_id": experiment.short_id}
    if util.has_tensorboard(experiment):
        meta["tensorboard_job"] = util.tensorboard_job(experiment)
    LogPrinter(url, ids_to_name, stream_meta=meta).stream()
コード例 #6
0
ファイル: monitor.py プロジェクト: datar-ai/cli
def monitor_job(job, detailed=False):
    url = '%s/ws/jobs/%s/monitor' % (get_stream_url(), job.id)
    monitor_jobs(url,
                 job.project, [job],
                 detailed=detailed,
                 stream_meta={"job_id": job.short_id})