Exemple #1
0
def _maybe_apply_strict_cols(args):
    if args.strict_cols:
        if args.cols:
            cli.error("--strict-cols and --cols cannot both be specified")
        args.cols = args.strict_cols
        args.skip_core = True
        args.skip_op_cols = True
Exemple #2
0
 def get_gpu_summary(self):
     if not self._stats_cmd:
         cli.error("nvidia-smi not available")
     stats = []
     for raw in self._read_raw_gpu_stats(self._stats_cmd):
         stats.append(self._format_gpu_stats(raw))
     return stats
def run(ctx, args):
    """Runs an experiment
    """
    print(args)
    # Strip potential operation from experiment name
    experiment, op_name = _strip_op_name_from_experiment(args)

    # Safe load of experiment file path
    try:
        experiment_config_file = \
            config.get_project_config()["experiments"].get(experiment)
    except KeyError:
        cli.error("No experiments found. "
                  "Are you sure you're in a Tracker project?")

    # Load configuration file
    experiment_config = config.load(experiment_config_file)

    # Create operation object
    #  - Here we scan through the sourcecode
    #    and extract the (hyper-)parameters
    op = oplib.Operation(
        op_name, _op_run_dir(args),
        _get_experiment_dict_by_name(experiment, experiment_config),
        _op_gpus(args), args.yes)

    # Prompt user to confirm run parameters
    if args.yes or _confirm_run(args, experiment, op):
        for n in range(args.trials):
            cli.out("Trial {}/{}".format(n + 1, args.trials))
            # Run the trial
            _run(args, op)
Exemple #4
0
def run(ctx, args):
    """Runs an experiment
    """
    # Strip potential operation from experiment name
    exp_name, op_name = _strip_op_name_from_experiment(args)
    if op_name is None:
        op_name = DEFAULT_OP
        log.debug("Running experiment: '{}' with default operation: '{}' "
                  "as no op was provided by the user!".format(
                      exp_name, op_name))

    # Safe load of experiment file path
    try:
        exp_conf_file = \
            config.get_project_config()["experiments"].get(exp_name)
    except KeyError:
        cli.error("No experiments found. "
                  "Are you sure you're in a Tracker project?")

    # Load configuration file
    exp_conf = config.load(exp_conf_file)

    # Create operation object
    op = oplib.Operation(op_name, _op_run_dir(args),
                         _op_experiment(exp_name, exp_conf), _op_remote(args),
                         _op_gpus(args))

    # Prompt user to confirm run parameters
    if args.yes or _confirm_run(args, exp_name, op):
        for n in range(args.trials):
            cli.out("Trial {}/{}".format(n + 1, args.trials))
            # Run the trial
            _run(args, op)
Exemple #5
0
def _diff(path1, path2, args):
    cmd_base = command.shlex_split(_diff_cmd(args))
    cmd = cmd_base + [path1, path2]
    log.debug("diff cmd: %r", cmd)
    try:
        subprocess.call(cmd)
    except OSError as e:
        cli.error("error running '%s': %s" % (" ".join(cmd), e))
Exemple #6
0
def run_dir_for_id(run_id):
    try:
        return _path_for_id(run_id)
    except NoSuchRun:
        cli.out("The trial with id: '{}' was not found\n"
                "Show trials by running 'tracker experiment NAME "
                "--list_trials'.".format(run_id))
        cli.error("No such directory", errno.ENOENT)
Exemple #7
0
def remote_op(op, prompt, default_resp, args):
    if not args.yes:
        cli.out(prompt)
    if args.yes or cli.confirm("Continue?", default_resp):
        try:
            op()
        except OperationNotSupported as e:
            cli.error(e)
        except OperationError as e:
            cli.error(e)
def _handle_remote_op_error(e, remote):
    if e.args[0] == "running":
        assert len(e.args) == 2, e.args
        msg = ("{run_id} is still running\n"
               "Wait for it to stop or try 'tracker stop"
               "{run_id} -r {remote_name}' "
               "to stop it.".format(run_id=e.args[1], remote_name=remote.name))
    else:
        msg = e.args[0]
    cli.error(msg)
def remote_status(args):
    """ Command to aquire status of specific remote.
    """
    remote = remotelib.remote_for_args(args)
    try:
        remote.status(args.verbose)
    except remotelib.Down as e:
        cli.error("Remote %s is not available (%s)" % (remote.name, e),
                  exit_status=2)
    except remotelib.OperationError as e:
        cli.error(e)
Exemple #10
0
def _run_for_pid(pid):
    pid = _try_int(pid)
    if pid is None:
        return None

    for exp in os.listdir(pathlib.path("experiments")):
        experiment_dir = os.path.join(pathlib.path("experiments"), exp)
        for run_id, run_dir in pathlib.iter_dirs(experiment_dir):
            run = runlib.Run(run_id, run_dir)
            if run.pid and (run.pid == pid or _parent_pid(run.pid) == pid):
                return run
    cli.error("cannot find run for pid %i" % pid)
Exemple #11
0
def get_project_names_and_dirs():
    trackerfile = TrackerFile()
    projects = trackerfile.get("projects", {})

    if projects:
        data = [{
            "name": name,
            "path": r.get("path", ""),
        } for d in projects for name, r in d.items()]
        return data
    else:
        cli.error("No projects specified in {}".format(
            config.get_user_config_path()))
Exemple #12
0
def _read_pid(path):
    try:
        f = open(path, "r")
    except IOError as e:
        if e.errno != 2:
            raise
        return None
    else:
        raw = f.readline().strip()
        try:
            return int(raw)
        except ValueError:
            cli.error("pidfile %s does not contain a valid pid" % path)
Exemple #13
0
def get_remotes():
    remotes = get_user_config().get("remotes", {})
    if remotes:
        data = [
            {
                "name": name,
                "type": r.get("type", ""),
                "host": r.get("host", ""),
                "desc": r.get("description", ""),
            }
            for name, r in sorted(remotes.items())
        ]
        return data
    else:
        cli.error("No remotes specified in {}".format(
            get_user_config_path()))
Exemple #14
0
def _maybe_apply_default_runs(args):
    n_runs = len(args.runs)
    if n_runs == 0:
        raise NotImplementedError
        # args.run = ("2", "1")
    elif n_runs == 1:
        cli.out(
            "The `diff` command requires two runs.\n"
            "Try specifying a second run or 'tracker diff --help' "
            "for more information.")
        cli.error()
    elif n_runs > 2:
        cli.out(
            "The `diff` command cannot compare more than two runs.\n"
            "Try specifying just two runs or 'tracker diff --help' "
            "for more information.")
        cli.error()
    else:
        assert n_runs == 2, args
def _run_remote(op, args):
    remote = remotelib.remote_for_args(args)

    try:
        run_id = remote.run_op(**_run_kw(args))
    except remotelib.RunFailed as e:
        _handle_remote_run_failed(e, remote)
    except remotelib.RemoteProcessError as e:
        _handle_remote_process_error(e)
    except remotelib.RemoteProcessDetached as e:
        _handle_remote_process_detached(e, args.remote)
    except remotelib.OperationError as e:
        _handle_remote_op_error(e, remote)
    except remotelib.OperationNotSupported:
        cli.error("{} does not support this operation".format(remote.name))
    else:
        if args.background:
            cli.out("{run_id} is running remotely on {remote}\n"
                    "To watch use 'tracker watch {run_id} -r {remote}'".format(
                        run_id=run_id[:8], remote=args.remote))
Exemple #16
0
def cd(ctx, args):
    """Change directory into any project created by Tracker.
       Lists all projects defined under the `projects` key in the
       Tracker home configuration file (default placed: ~/.tracker/)
    """

    log.debug("Searching for projects")

    # Retrieve project directory by its name
    project_dir = projects.get_project_dir_by_name(args.project_name)

    if not os.path.isdir(project_dir):
        cli.error("No such directory: {}".format(project_dir))
    else:
        try:
            os.chdir(project_dir)
            # The active shell will not change directory without rerunning
            # /bin/bash
            os.system("/bin/bash")
        except OSError as e:
            cli.error(e)
Exemple #17
0
    def add_project(self, project):
        """Adds project to the global Tracker file

        Arguments:
            project {<dict>} -- project dictionary
        """
        if self._data is None:
            self._data = {}

        if "projects" not in self._data or self._data["projects"] is None:
            self._data["projects"] = []

        project_name = list(project.keys())[0]
        for d in self._data["projects"]:
            if project_name in d:
                cli.error(
                    "A project of that name ('{}') already exists!".format(
                        project_name))

        # if project in self._data["projects"]:
        #     cli.error("Project: '{}' already exists!".format(project))

        self._data["projects"].append(project)
        self._write()
def _run_local(op, args):
    try:
        returncode = op.run(_op_pidfile(args))
    except resources.ResourceError as e:
        cli.error(
            "Run failed as a resource could not be obtained: {}".format(e))
    except oplib.ProcessError as e:
        cli.error("Run failed: {}".format(e))
    else:
        log.debug("Exited with return code {}".format(returncode))
        if returncode != 0:
            cli.error(exit_status=returncode)
Exemple #19
0
def remote_for_args(args):
    assert args.remote, args
    try:
        return for_name(args.remote)
    except NoSuchRemote:
        cli.error(
            "remote '%s' is not defined\n"
            "Show remotes by running 'tracker remotes' or "
            "'tracker remotes --help' for more information."
            % args.remote)
    except UnsupportedRemoteType as e:
        cli.error(
            "remote '%s' in ~/.tracker/tracker.yaml has unsupported "
            "type: %s" % (args.remote, e.args[0]))
    except MissingRequiredConfig as e:
        cli.error(
            "remote '%s' in ~/.tracker/tracker.yaml is missing required "
            "config: %s" % (args.remote, e.args[0]))
def _handle_remote_process_error(e):
    cli.error(exit_status=e.exit_status)
def _handle_remote_run_failed(e, remote):
    run_id = os.path.basename(e.remote_run_dir)
    cli.out("Try 'tracker runs info %s -O -r %s' to view its output." %
            (run_id[:8], remote.name),
            err=True)
    cli.error()
Exemple #22
0
def _handle_no_run_for_pid_arg(pid_arg):
    # Assume pid_arg is a pidfile path.
    cli.error("%s does not exist" % pid_arg)
Exemple #23
0
def _validate_args(args):
    if args.csv and args.table:
        cli.error("--table and --csv cannot both be specified")