Ejemplo n.º 1
0
def run(ctx, args):
    """Runs an experiment
    """
    print(args)
    # Strip potential operation from experiment name
    experiment, op_name = _strip_op_name_from_experiment(args)

    # Safe load of experiment file path
    try:
        experiment_config_file = \
            config.get_project_config()["experiments"].get(experiment)
    except KeyError:
        cli.error("No experiments found. "
                  "Are you sure you're in a Tracker project?")

    # Load configuration file
    experiment_config = config.load(experiment_config_file)

    # Create operation object
    #  - Here we scan through the sourcecode
    #    and extract the (hyper-)parameters
    op = oplib.Operation(
        op_name, _op_run_dir(args),
        _get_experiment_dict_by_name(experiment, experiment_config),
        _op_gpus(args), args.yes)

    # Prompt user to confirm run parameters
    if args.yes or _confirm_run(args, experiment, op):
        for n in range(args.trials):
            cli.out("Trial {}/{}".format(n + 1, args.trials))
            # Run the trial
            _run(args, op)
Ejemplo n.º 2
0
def run(ctx, args):
    """Runs an experiment
    """
    # Strip potential operation from experiment name
    exp_name, op_name = _strip_op_name_from_experiment(args)
    if op_name is None:
        op_name = DEFAULT_OP
        log.debug("Running experiment: '{}' with default operation: '{}' "
                  "as no op was provided by the user!".format(
                      exp_name, op_name))

    # Safe load of experiment file path
    try:
        exp_conf_file = \
            config.get_project_config()["experiments"].get(exp_name)
    except KeyError:
        cli.error("No experiments found. "
                  "Are you sure you're in a Tracker project?")

    # Load configuration file
    exp_conf = config.load(exp_conf_file)

    # Create operation object
    op = oplib.Operation(op_name, _op_run_dir(args),
                         _op_experiment(exp_name, exp_conf), _op_remote(args),
                         _op_gpus(args))

    # Prompt user to confirm run parameters
    if args.yes or _confirm_run(args, exp_name, op):
        for n in range(args.trials):
            cli.out("Trial {}/{}".format(n + 1, args.trials))
            # Run the trial
            _run(args, op)
Ejemplo n.º 3
0
def run_dir_for_id(run_id):
    try:
        return _path_for_id(run_id)
    except NoSuchRun:
        cli.out("The trial with id: '{}' was not found\n"
                "Show trials by running 'tracker experiment NAME "
                "--list_trials'.".format(run_id))
        cli.error("No such directory", errno.ENOENT)
Ejemplo n.º 4
0
def _op_run_dir(args):
    if args.run_dir:
        run_dir = os.path.abspath(args.run_dir)
        if os.getenv("NO_WARN_RUNDIR") != "1":
            cli.out("Run directory is '{}' (results will not be "
                    "visible to Tracker)".format(run_dir))
        return run_dir
    else:
        return None
Ejemplo n.º 5
0
def remote_op(op, prompt, default_resp, args):
    if not args.yes:
        cli.out(prompt)
    if args.yes or cli.confirm("Continue?", default_resp):
        try:
            op()
        except OperationNotSupported as e:
            cli.error(e)
        except OperationError as e:
            cli.error(e)
Ejemplo n.º 6
0
def _maybe_apply_default_runs(args):
    n_runs = len(args.runs)
    if n_runs == 0:
        raise NotImplementedError
        # args.run = ("2", "1")
    elif n_runs == 1:
        cli.out(
            "The `diff` command requires two runs.\n"
            "Try specifying a second run or 'tracker diff --help' "
            "for more information.")
        cli.error()
    elif n_runs > 2:
        cli.out(
            "The `diff` command cannot compare more than two runs.\n"
            "Try specifying just two runs or 'tracker diff --help' "
            "for more information.")
        cli.error()
    else:
        assert n_runs == 2, args
Ejemplo n.º 7
0
def _run_remote(op, args):
    remote = remotelib.remote_for_args(args)

    try:
        run_id = remote.run_op(**_run_kw(args))
    except remotelib.RunFailed as e:
        _handle_remote_run_failed(e, remote)
    except remotelib.RemoteProcessError as e:
        _handle_remote_process_error(e)
    except remotelib.RemoteProcessDetached as e:
        _handle_remote_process_detached(e, args.remote)
    except remotelib.OperationError as e:
        _handle_remote_op_error(e, remote)
    except remotelib.OperationNotSupported:
        cli.error("{} does not support this operation".format(remote.name))
    else:
        if args.background:
            cli.out("{run_id} is running remotely on {remote}\n"
                    "To watch use 'tracker watch {run_id} -r {remote}'".format(
                        run_id=run_id[:8], remote=args.remote))
Ejemplo n.º 8
0
def _tail(run):
    if os.getenv("NO_WATCHING_MSG") != "1":
        cli.out("Watching run %s (pid: %s)" % (run.id, run.pid), err=True)
    if run.pid is None:
        _print_output(run)
        return
    proc = psutil.Process(run.pid)
    output_path = run.tracker_path("output")
    f = None
    while proc.is_running():
        f = f or _try_open(output_path)
        if not f:
            time.sleep(1.0)
            continue
        line = f.readline()
        if not line:
            time.sleep(0.1)
            continue
        sys.stdout.write(line)
        sys.stdout.flush()
Ejemplo n.º 9
0
def _handle_remote_process_detached(e, remote):
    run_id = e.args[0]
    cli.out("\nDetached from remote run {run_id} (still running)\n"
            "To re-attach use 'tracker watch {run_id} -r {remote}'".format(
                run_id=run_id[:8], remote=remote))
Ejemplo n.º 10
0
def _handle_remote_run_failed(e, remote):
    run_id = os.path.basename(e.remote_run_dir)
    cli.out("Try 'tracker runs info %s -O -r %s' to view its output." %
            (run_id[:8], remote.name),
            err=True)
    cli.error()
Ejemplo n.º 11
0
def _print_run_status(run):
    cli.out("Run %s stopped with a status of '%s'" %
            (run.short_id, run.status),
            err=True)
Ejemplo n.º 12
0
def _stopped_msg(run):
    msg = "\nStopped watching %s" % run.short_id
    if run.pid and psutil.Process(run.pid).is_running():
        msg += " (%s still running)" % run.pid
    cli.out(msg)