コード例 #1
0
def unnest_checkpoints(checkpoints):
    checkpoint_dicts = []
    for g in checkpoints:
        checkpoint = copy.deepcopy(g)
        for key in UNNEST_KEYS:
            if key not in checkpoint:
                continue
            try:
                unnest_dict = flatten_dict(checkpoint.pop(key))
                checkpoint.update(unnest_dict)
            except Exception:
                logger.debug("Failed to flatten dict.")
        checkpoint = flatten_dict(checkpoint)
        checkpoint_dicts.append(checkpoint)
    return checkpoint_dicts
コード例 #2
0
ファイル: browser.py プロジェクト: vv111y/nupic.research
def _read_experiment(experiment_state, experiment_path):
    checkpoint_dicts = experiment_state["checkpoints"]
    checkpoint_dicts = [flatten_dict(g) for g in checkpoint_dicts]
    progress = {}
    params = {}
    # TODO: no real use for exp_directories outside this function, why get it?
    exp_directories = {}
    for exp in checkpoint_dicts:
        if exp.get("logdir", None) is None:
            continue
        exp_dir = os.path.basename(exp["logdir"])
        exp_tag = exp["experiment_tag"]
        csv = os.path.join(experiment_path, exp_dir, "progress.csv")
        # check if file size is > 0 before proceeding
        if os.path.isfile(csv) and os.stat(csv).st_size:
            progress[exp_tag] = pd.read_csv(csv)
            exp_directories[exp_tag] = os.path.abspath(
                os.path.join(experiment_path, exp_dir))

            # Read in the configs for this experiment
            params_file = os.path.join(experiment_path, exp_dir, "params.json")
            with open(params_file) as f:
                params[exp_tag] = json.load(f)

    return progress, params
コード例 #3
0
ファイル: trial_runner.py プロジェクト: xiaming9880/ray
    def _process_trial(self, trial):
        try:
            result = self.trial_executor.fetch_result(trial)

            is_duplicate = RESULT_DUPLICATE in result
            # TrialScheduler and SearchAlgorithm still receive a
            # notification because there may be special handling for
            # the `on_trial_complete` hook.
            if is_duplicate:
                logger.debug("Trial finished without logging 'done'.")
                result = trial.last_result
                result.update(done=True)

            self._total_time += result.get(TIME_THIS_ITER_S, 0)

            flat_result = flatten_dict(result)
            if trial.should_stop(flat_result):
                # Hook into scheduler
                self._scheduler_alg.on_trial_complete(self, trial, flat_result)
                self._search_alg.on_trial_complete(
                    trial.trial_id, result=flat_result)
                decision = TrialScheduler.STOP
            else:
                with warn_if_slow("scheduler.on_trial_result"):
                    decision = self._scheduler_alg.on_trial_result(
                        self, trial, flat_result)
                with warn_if_slow("search_alg.on_trial_result"):
                    self._search_alg.on_trial_result(trial.trial_id,
                                                     flat_result)
                if decision == TrialScheduler.STOP:
                    with warn_if_slow("search_alg.on_trial_complete"):
                        self._search_alg.on_trial_complete(
                            trial.trial_id,
                            result=flat_result,
                            early_terminated=True)

            if not is_duplicate:
                trial.update_last_result(
                    result, terminate=(decision == TrialScheduler.STOP))

            # Checkpoints to disk. This should be checked even if
            # the scheduler decision is STOP or PAUSE. Note that
            # PAUSE only checkpoints to memory and does not update
            # the global checkpoint state.
            self._checkpoint_trial_if_needed(
                trial, force=result.get(SHOULD_CHECKPOINT, False))

            if decision == TrialScheduler.CONTINUE:
                self.trial_executor.continue_training(trial)
            elif decision == TrialScheduler.PAUSE:
                self.trial_executor.pause_trial(trial)
            elif decision == TrialScheduler.STOP:
                self.trial_executor.export_trial_if_needed(trial)
                self.trial_executor.stop_trial(trial)
            else:
                assert False, "Invalid scheduling decision: {}".format(
                    decision)
        except Exception:
            logger.exception("Error processing event.")
            self._process_trial_failure(trial, traceback.format_exc())
コード例 #4
0
    def on_result(self, result):
        if self._file_writer is None:
            from tensorflow.python.eager import context
            self._context = context
            self._file_writer = tf.summary.create_file_writer(self.logdir)
        with tf.device("/CPU:0"), self._context.eager_mode():
            with tf.summary.record_if(True), self._file_writer.as_default():
                step = result.get(
                    TIMESTEPS_TOTAL) or result[TRAINING_ITERATION]

                tmp = result.copy()
                for k in [
                        "config", "pid", "timestamp", TIME_TOTAL_S,
                        TRAINING_ITERATION
                ]:
                    if k in tmp:
                        del tmp[k]  # not useful to log these

                flat_result = flatten_dict(tmp, delimiter="/")
                path = ["ray", "tune"]
                for attr, value in flat_result.items():
                    if type(value) in VALID_SUMMARY_TYPES:
                        tf.summary.scalar("/".join(path + [attr]),
                                          value,
                                          step=step)
        self._file_writer.flush()
コード例 #5
0
ファイル: suggestion.py プロジェクト: zlpmichelle/ray
    def _generate_trials(self, experiment_spec, output_path=""):
        """Generates trials with configurations from `_suggest`.

        Creates a trial_id that is passed into `_suggest`.

        Yields:
            Trial objects constructed according to `spec`
        """
        if "run" not in experiment_spec:
            raise TuneError("Must specify `run` in {}".format(experiment_spec))
        for _ in range(experiment_spec.get("num_samples", 1)):
            trial_id = Trial.generate_id()
            while True:
                suggested_config = self._suggest(trial_id)
                if suggested_config is None:
                    yield None
                else:
                    break
            spec = copy.deepcopy(experiment_spec)
            spec["config"] = merge_dicts(spec["config"],
                                         copy.deepcopy(suggested_config))
            flattened_config = resolve_nested_dict(spec["config"])
            self._counter += 1
            tag = "{0}_{1}".format(str(self._counter),
                                   format_vars(flattened_config))
            yield create_trial_from_spec(
                spec,
                output_path,
                self._parser,
                evaluated_params=flatten_dict(suggested_config),
                experiment_tag=tag,
                trial_id=trial_id)
コード例 #6
0
ファイル: visual_utils.py プロジェクト: blockspacer/ray-2
def _parse_configs(cfg_path):
    try:
        with open(cfg_path) as f:
            cfg_dict = flatten_dict(json.load(f))
    except Exception:
        logger.exception("Config parsing failed.")
    return cfg_dict
コード例 #7
0
ファイル: trial.py プロジェクト: wwxFromTju/ray
 def update_last_result(self, result, terminate=False):
     result.update(trial_id=self.trial_id, done=terminate)
     if self.experiment_tag:
         result.update(experiment_tag=self.experiment_tag)
     if self.verbose and (terminate or time.time() - self.last_debug >
                          DEBUG_PRINT_INTERVAL):
         print("Result for {}:".format(self))
         print("  {}".format(pretty_print(result).replace("\n", "\n  ")))
         self.last_debug = time.time()
     self.set_location(Location(result.get("node_ip"), result.get("pid")))
     self.last_result = result
     self.last_update_time = time.time()
     self.result_logger.on_result(self.last_result)
     for metric, value in flatten_dict(result).items():
         if isinstance(value, Number):
             if metric not in self.metric_analysis:
                 self.metric_analysis[metric] = {
                     "max": value,
                     "min": value,
                     "last": value
                 }
             else:
                 self.metric_analysis[metric]["max"] = max(
                     value, self.metric_analysis[metric]["max"])
                 self.metric_analysis[metric]["min"] = min(
                     value, self.metric_analysis[metric]["min"])
                 self.metric_analysis[metric]["last"] = value
コード例 #8
0
ファイル: logger.py プロジェクト: chuckwoody/ray
def to_tf_values(result, path):
    flat_result = flatten_dict(result, delimiter="/")
    values = [
        tf.Summary.Value(tag="/".join(path + [attr]), simple_value=value)
        for attr, value in flat_result.items()
        if type(value) in VALID_SUMMARY_TYPES
    ]
    return values
コード例 #9
0
ファイル: visual_utils.py プロジェクト: blockspacer/ray-2
def _parse_results(res_path):
    res_dict = {}
    try:
        with open(res_path) as f:
            # Get last line in file
            for line in f:
                pass
        res_dict = flatten_dict(json.loads(line.strip()))
    except Exception:
        logger.exception("Importing %s failed...Perhaps empty?" % res_path)
    return res_dict
コード例 #10
0
def to_tf_values(result, path):
    if use_tf150_api:
        type_list = [int, float, np.float32, np.float64, np.int32]
    else:
        type_list = [int, float]
    flat_result = flatten_dict(result, delimiter="/")
    values = [
        tf.Summary.Value(tag="/".join(path + [attr]), simple_value=value)
        for attr, value in flat_result.items() if type(value) in type_list
    ]
    return values
コード例 #11
0
ファイル: logger.py プロジェクト: chuckwoody/ray
 def on_result(self, result):
     tmp = result.copy()
     if "config" in tmp:
         del tmp["config"]
     result = flatten_dict(tmp, delimiter="/")
     if self._csv_out is None:
         self._csv_out = csv.DictWriter(self._file, result.keys())
         if not self._continuing:
             self._csv_out.writeheader()
     self._csv_out.writerow(
         {k: v
          for k, v in result.items() if k in self._csv_out.fieldnames})
     self._file.flush()
コード例 #12
0
def _get_trial_info(trial, parameters, metrics):
    """Returns the following information about a trial:

    name | status | loc | params... | metrics...

    Args:
        trial (Trial): Trial to get information for.
        parameters (List[str]): Names of trial parameters to include.
        metrics (List[str]): Names of metrics to include.
    """
    result = flatten_dict(trial.last_result)
    trial_info = [str(trial), trial.status, str(trial.location)]
    trial_info += [result.get(CONFIG_PREFIX + param) for param in parameters]
    trial_info += [result.get(metric) for metric in metrics]
    return trial_info
コード例 #13
0
ファイル: commands.py プロジェクト: Deegue/ray
def list_trials(experiment_path,
                sort=None,
                info_keys=DEFAULT_EXPERIMENT_INFO_KEYS,
                result_keys=DEFAULT_RESULT_KEYS):
    """Lists trials in the directory subtree starting at the given path.

    Args:
        experiment_path (str): Directory where trials are located.
            Corresponds to Experiment.local_dir/Experiment.name.
        sort (str): Key to sort by.
        info_keys (list): Keys that are displayed.
        result_keys (list): Keys of last result that are displayed.
    """
    _check_tabulate()
    experiment_state = _get_experiment_state(experiment_path,
                                             exit_on_fail=True)

    checkpoint_dicts = experiment_state["checkpoints"]
    checkpoint_dicts = [flatten_dict(g) for g in checkpoint_dicts]
    checkpoints_df = pd.DataFrame(checkpoint_dicts)

    result_keys = ["last_result:{}".format(k) for k in result_keys]
    col_keys = [
        k for k in list(info_keys) + result_keys if k in checkpoints_df
    ]
    checkpoints_df = checkpoints_df[col_keys]

    if "last_update_time" in checkpoints_df:
        with pd.option_context('mode.use_inf_as_null', True):
            datetime_series = checkpoints_df["last_update_time"].dropna()

        datetime_series = datetime_series.apply(
            lambda t: datetime.fromtimestamp(t).strftime(TIMESTAMP_FORMAT))
        checkpoints_df["last_update_time"] = datetime_series

    if "logdir" in checkpoints_df:
        # logdir often too verbose to view in table, so drop experiment_path
        checkpoints_df["logdir"] = checkpoints_df["logdir"].str.replace(
            experiment_path, '')

    if sort:
        if sort not in checkpoints_df:
            raise KeyError("Sort Index '{}' not in: {}".format(
                sort, list(checkpoints_df)))
        checkpoints_df = checkpoints_df.sort_values(by=sort)

    print_format_output(checkpoints_df)
コード例 #14
0
ファイル: __init__.py プロジェクト: tokestermw/client
 def on_result(self, result):
     config = result.get("config")
     if config and self._config is None:
         for k in config.keys():
             if wandb.config.get(k) is None:
                 wandb.config[k] = config[k]
         self._config = config
     tmp = result.copy()
     for k in ["done", "config", "pid", "timestamp"]:
         if k in tmp:
             del tmp[k]
     metrics = {}
     for key, value in flatten_dict(tmp, delimiter="/").items():
         if not isinstance(value, numbers.Number):
             continue
         metrics[key] = value
     wandb.log(metrics)
コード例 #15
0
    def testNestedResults(self):
        def create_result(i):
            return {"test": {"1": {"2": {"3": i, "4": False}}}}

        flattened_keys = list(flatten_dict(create_result(0)))

        class _MockScheduler(FIFOScheduler):
            results = []

            def on_trial_result(self, trial_runner, trial, result):
                self.results += [result]
                return TrialScheduler.CONTINUE

            def on_trial_complete(self, trial_runner, trial, result):
                self.complete_result = result

        def train(config, reporter):
            for i in range(100):
                reporter(**create_result(i))

        algo = _MockSuggestionAlgorithm()
        scheduler = _MockScheduler()
        [trial] = tune.run(train,
                           scheduler=scheduler,
                           search_alg=algo,
                           stop={
                               "test/1/2/3": 20
                           }).trials
        self.assertEqual(trial.status, Trial.TERMINATED)
        self.assertEqual(trial.last_result["test"]["1"]["2"]["3"], 20)
        self.assertEqual(trial.last_result["test"]["1"]["2"]["4"], False)
        self.assertEqual(trial.last_result[TRAINING_ITERATION], 21)
        self.assertEqual(len(scheduler.results), 20)
        self.assertTrue(
            all(
                set(result) >= set(flattened_keys)
                for result in scheduler.results))
        self.assertTrue(set(scheduler.complete_result) >= set(flattened_keys))
        self.assertEqual(len(algo.results), 20)
        self.assertTrue(
            all(set(result) >= set(flattened_keys) for result in algo.results))
        with self.assertRaises(TuneError):
            [trial] = tune.run(train, stop={"1/2/3": 20})
        with self.assertRaises(TuneError):
            [trial] = tune.run(train, stop={"test": 1}).trials
コード例 #16
0
    def on_result(self, result):
        tmp = result.copy()
        if "config" in tmp:
            del tmp["config"]

        result = flatten_dict(tmp, delimiter="/")
        if self._csv_out is None:
            self._csv_out = csv.DictWriter(self._file, result.keys())
            if not self._continuing:
                self._csv_out.writeheader()
        columns_to_unroll = [tmp[col] for col in tmp['unroll_columns']]
        for i, row in enumerate(zip(*columns_to_unroll)):
            row = {k: v for k, v in zip(tmp['unroll_columns'], row)}
            if i == len(columns_to_unroll[0])-1:
                # Writing the additional information in the last row
                filtered_dict = {k: v for k, v in tmp.items() if k not in tmp['unroll_columns']}
                row.update(**filtered_dict)
            self._csv_out.writerow(row)
        self._file.flush()
コード例 #17
0
def _get_trial_info(trial, parameters, metrics, include_error_data=False):
    """Returns the following information about a trial:

    name | ID | status | loc | # failures | error_file | params... | metrics...

    Args:
        trial (Trial): Trial to get information for.
        parameters (List[str]): Names of trial parameters to include.
        metrics (List[str]): Names of metrics to include.
        include_error_data (bool): Include error file and # of failures.
    """
    result = flatten_dict(trial.last_result)
    trial_info = [str(trial), trial.trial_id, trial.status]
    trial_info += [_location_str(result.get(HOSTNAME), result.get(PID))]
    if include_error_data:
        # TODO(ujvl): File path is too long to display in a single row.
        trial_info += [trial.num_failures, trial.error_file]
    trial_info += [result.get(CONFIG_PREFIX + param) for param in parameters]
    trial_info += [result.get(metric) for metric in metrics]
    return trial_info
コード例 #18
0
    def on_result(self, result):
        tmp = result.copy()
        if "config" in tmp:
            del tmp["config"]
        result = flatten_dict(tmp, delimiter="/")
        if self._csv_out is None:
            self._csv_out = csv.DictWriter(self._file, result.keys())
            if not self._continuing:
                self._csv_out.writeheader()

        encode_results = {}
        for k, v in result.items():
            if k not in self._csv_out.fieldnames:
                continue

            if isinstance(v, self.pickle_types):
                v = pickle.dumps(v)
                v = codecs.encode(v, "base64").decode()
            encode_results[k] = v

        self._csv_out.writerow(encode_results)
        self._file.flush()
コード例 #19
0
ファイル: logger.py プロジェクト: zenghsh3/ray
    def on_result(self, result):
        with tf.device("/CPU:0"):
            with self._file_writer.as_default():
                step = result.get(
                    TIMESTEPS_TOTAL) or result[TRAINING_ITERATION]

                tmp = result.copy()
                for k in [
                        "config", "pid", "timestamp", TIME_TOTAL_S,
                        TRAINING_ITERATION
                ]:
                    if k in tmp:
                        del tmp[k]  # not useful to log these

                flat_result = flatten_dict(tmp, delimiter="/")
                path = ["ray", "tune"]
                for attr, value in flat_result.items():
                    if type(value) in VALID_SUMMARY_TYPES:
                        tf.summary.scalar("/".join(path + [attr]),
                                          value,
                                          step=step)
        self._file_writer.flush()
コード例 #20
0
    def on_result(self, result):
        step = result.get(TIMESTEPS_TOTAL) or result[TRAINING_ITERATION]

        tmp = result.copy()
        for k in [
                "config", "pid", "timestamp", TIME_TOTAL_S, TRAINING_ITERATION
        ]:
            if k in tmp:
                del tmp[k]  # not useful to log these

        flat_result = flatten_dict(tmp, delimiter="/")
        path = ["ray", "tune"]
        valid_result = {
            "/".join(path + [attr]): value
            for attr, value in flat_result.items()
            if type(value) in VALID_SUMMARY_TYPES
        }

        for attr, value in valid_result.items():
            self._file_writer.add_scalar(attr, value, global_step=step)
        self.last_result = valid_result
        self._file_writer.flush()
コード例 #21
0
ファイル: wandb_logger.py プロジェクト: ziyuwan/pipeline-psro
    def on_result(self, result):
        tmp = result.copy()

        config = tmp.get("config")
        if config and self._config is None:

            make_dict_items_yaml_representable(config)

            for k in config.keys():
                if wandb.config.get(k) is None:
                    wandb.config[k] = config[k]

            self._config = config

        for k in ["done", "config", "pid", "timestamp"]:
            if k in tmp:
                del tmp[k]
        metrics = {}
        for key, value in flatten_dict(tmp, delimiter="/").items():
            if not isinstance(value, numbers.Number):
                continue
            metrics[key] = value
        wandb.log(metrics, step=tmp.get("timesteps_total", None))
コード例 #22
0
    def _read_experiment(self, experiment_state):
        checkpoint_dicts = experiment_state["checkpoints"]
        checkpoint_dicts = [flatten_dict(g) for g in checkpoint_dicts]

        for exp in checkpoint_dicts:
            if exp.get("logdir", None) is None:
                continue
            exp_dir = os.path.basename(exp["logdir"])
            csv = os.path.join(self.experiment_path, exp_dir, "progress.csv")
            self.progress[exp["experiment_tag"]] = pd.read_csv(csv)
            self.exp_directories[exp["experiment_tag"]] = os.path.abspath(
                os.path.join(self.experiment_path, exp_dir))

            # Figure out checkpoint file (.pt or .pth) if it exists. For some reason
            # we need to switch to the directory in order for glob to work.
            ed = os.path.abspath(os.path.join(self.experiment_path, exp_dir))
            os.chdir(ed)
            cds = glob.glob("checkpoint*")
            if len(cds) > 0:
                cd = max(cds)
                cf = glob.glob(os.path.join(cd, "*.pt"))
                cf += glob.glob(os.path.join(cd, "*.pth"))
                if len(cf) > 0:
                    self.checkpoint_directories[
                        exp["experiment_tag"]] = os.path.join(ed, cf[0])
                else:
                    self.checkpoint_directories[exp["experiment_tag"]] = ""
            else:
                self.checkpoint_directories[exp["experiment_tag"]] = ""

            # Read in the configs for this experiment
            params_file = os.path.join(self.experiment_path, exp_dir,
                                       "params.json")
            with open(params_file) as f:
                import json

                self.params[exp["experiment_tag"]] = json.load(f)
コード例 #23
0
ファイル: logger.py プロジェクト: yannbouteiller/ray
    def on_result(self, result):
        if self._file_writer is None:
            from tensorflow.python.eager import context
            from tensorboard.plugins.hparams import api as hp
            self._context = context
            self._file_writer = tf.summary.create_file_writer(self.logdir)
        with tf.device("/CPU:0"):
            with tf.summary.record_if(True), self._file_writer.as_default():
                step = result.get(
                    TIMESTEPS_TOTAL) or result[TRAINING_ITERATION]

                tmp = result.copy()
                if not self._hp_logged:
                    if self.trial and self.trial.evaluated_params:
                        try:
                            hp.hparams(self.trial.evaluated_params,
                                       trial_id=self.trial.trial_id)
                        except Exception as exc:
                            logger.error("HParams failed with %s", exc)
                    self._hp_logged = True

                for k in [
                        "config", "pid", "timestamp", TIME_TOTAL_S,
                        TRAINING_ITERATION
                ]:
                    if k in tmp:
                        del tmp[k]  # not useful to log these

                flat_result = flatten_dict(tmp, delimiter="/")
                path = ["ray", "tune"]
                for attr, value in flat_result.items():
                    if type(value) in VALID_SUMMARY_TYPES:
                        tf.summary.scalar("/".join(path + [attr]),
                                          value,
                                          step=step)
        self._file_writer.flush()
コード例 #24
0
ファイル: commands.py プロジェクト: gravitywp/ray
def list_trials(experiment_path,
                sort=None,
                output=None,
                filter_op=None,
                info_keys=DEFAULT_EXPERIMENT_INFO_KEYS,
                result_keys=DEFAULT_RESULT_KEYS):
    """Lists trials in the directory subtree starting at the given path.

    Args:
        experiment_path (str): Directory where trials are located.
            Corresponds to Experiment.local_dir/Experiment.name.
        sort (str): Key to sort by.
        output (str): Name of file where output is saved.
        filter_op (str): Filter operation in the format
            "<column> <operator> <value>".
        info_keys (list): Keys that are displayed.
        result_keys (list): Keys of last result that are displayed.
    """
    _check_tabulate()
    experiment_state = _get_experiment_state(experiment_path,
                                             exit_on_fail=True)

    checkpoint_dicts = experiment_state["checkpoints"]
    checkpoint_dicts = [flatten_dict(g) for g in checkpoint_dicts]
    checkpoints_df = pd.DataFrame(checkpoint_dicts)

    result_keys = ["last_result:{}".format(k) for k in result_keys]
    col_keys = [
        k for k in list(info_keys) + result_keys if k in checkpoints_df
    ]
    checkpoints_df = checkpoints_df[col_keys]

    if "last_update_time" in checkpoints_df:
        with pd.option_context("mode.use_inf_as_null", True):
            datetime_series = checkpoints_df["last_update_time"].dropna()

        datetime_series = datetime_series.apply(
            lambda t: datetime.fromtimestamp(t).strftime(TIMESTAMP_FORMAT))
        checkpoints_df["last_update_time"] = datetime_series

    if "logdir" in checkpoints_df:
        # logdir often too verbose to view in table, so drop experiment_path
        checkpoints_df["logdir"] = checkpoints_df["logdir"].str.replace(
            experiment_path, '')

    if filter_op:
        col, op, val = filter_op.split(' ')
        col_type = checkpoints_df[col].dtype
        if is_numeric_dtype(col_type):
            val = float(val)
        elif is_string_dtype(col_type):
            val = str(val)
        # TODO(Andrew): add support for datetime and boolean
        else:
            raise ValueError("Unsupported dtype for '{}': {}".format(
                val, col_type))
        op = OPERATORS[op]
        filtered_index = op(checkpoints_df[col], val)
        checkpoints_df = checkpoints_df[filtered_index]

    if sort:
        if sort not in checkpoints_df:
            raise KeyError("Sort Index '{}' not in: {}".format(
                sort, list(checkpoints_df)))
        checkpoints_df = checkpoints_df.sort_values(by=sort)

    print_format_output(checkpoints_df)

    if output:
        file_extension = os.path.splitext(output)[1].lower()
        if file_extension in (".p", ".pkl", ".pickle"):
            checkpoints_df.to_pickle(output)
        elif file_extension == ".csv":
            checkpoints_df.to_csv(output, index=False)
        else:
            raise ValueError("Unsupported filetype: {}".format(output))
        print("Output saved at:", output)