def test_validation_circular_ref() -> None: with pytest.raises(hip.ExperimentValidationCircularRef): hip.Experiment(datapoints=[ hip.Datapoint(uid="1", from_uid="2", values={}), hip.Datapoint(uid="2", from_uid="3", values={}), hip.Datapoint(uid="3", from_uid="4", values={}), hip.Datapoint(uid="4", from_uid="2", values={}), ]).validate()
def test_merge() -> None: merged = hip.Experiment.merge({ "xp1": hip.Experiment(datapoints=[hip.Datapoint(uid="1", values={"a": "b"})]), "xp2": hip.Experiment(datapoints=[hip.Datapoint(uid="1", values={"a": "c"})]), }) assert len(merged.datapoints) == 2, merged merged.validate()
def test_validation_missing_parent() -> None: xp = hip.Experiment(datapoints=[hip.Datapoint(uid="1", from_uid="2", values={})]) with pytest.raises(hip.ExperimentValidationMissingParent): xp.validate() xp.remove_missing_parents() assert xp.datapoints[0].from_uid is None xp.validate()
def to_hiplot_experiment(self, max_list_elements: int = 24) -> tp.Any: # no typing here since Hiplot is not a hard requirement """Converts the logs into an hiplot experiment for display. Example ------- exp = logs.to_hiplot_experiment() exp.display(force_full_width=True) Note ---- - You can easily change the axes of the XY plot: exp.display_data(hip.Displays.XY).update({'axis_x': '0#0', 'axis_y': '0#1'}) - For more context about hiplot, check: - blogpost: https://ai.facebook.com/blog/hiplot-high-dimensional-interactive-plots-made-easy/ - github repo: https://github.com/facebookresearch/hiplot - documentation: https://facebookresearch.github.io/hiplot/ """ import hiplot as hip exp = hip.Experiment() for xp in self.load_flattened(max_list_elements=max_list_elements): dp = hip.Datapoint( from_uid=xp.get("#parents_uids#0"), uid=xp["#uid"], values={x: y for x, y in xp.items() if not (x.startswith("#") and ("uid" in x or "ask" in x))} ) exp.datapoints.append(dp) exp.display_data(hip.Displays.XY).update({'axis_x': '#num-tell', 'axis_y': '#loss'}) # for the record, some more options: exp.display_data(hip.Displays.XY).update({'lines_thickness': 1.0, 'lines_opacity': 1.0}) return exp
def to_hiplot_experiment( self, max_list_elements: int = 24 ) -> tp.Any: # no typing here since Hiplot is not a hard requirement """Converts the logs into an hiplot experiment for display. Parameters ---------- max_list_elements: int maximum number of elements of list/arrays to export (only the first elements are extracted) Example ------- .. code-block:: python exp = logs.to_hiplot_experiment() exp.display(force_full_width=True) Note ---- - You can easily change the axes of the XY plot: :code:`exp.display_data(hip.Displays.XY).update({'axis_x': '0#0', 'axis_y': '0#1'})` - For more context about hiplot, check: - blogpost: https://ai.facebook.com/blog/hiplot-high-dimensional-interactive-plots-made-easy/ - github repo: https://github.com/facebookresearch/hiplot - documentation: https://facebookresearch.github.io/hiplot/ """ # pylint: disable=import-outside-toplevel try: import hiplot as hip except ImportError as e: raise ImportError( f"{self.__class__.__name__} requires hiplot which is not installed by default " "(pip install hiplot)") from e exp = hip.Experiment() for xp in self.load_flattened(max_list_elements=max_list_elements): dp = hip.Datapoint( from_uid=xp.get("#parents_uids#0"), uid=xp["#uid"], values={ x: y for x, y in xp.items() if not (x.startswith("#") and ("uid" in x or "ask" in x)) }, ) exp.datapoints.append(dp) exp.display_data(hip.Displays.XY).update({ "axis_x": "#num-tell", "axis_y": "#loss" }) # for the record, some more options: exp.display_data(hip.Displays.XY).update({ "lines_thickness": 1.0, "lines_opacity": 1.0 }) return exp
def gey_hiplot( self, query: str = None, sort: str = None, limit: int = None, offset: int = None ): import hiplot data = self.get_runs_io(query=query, sort=sort, limit=limit, offset=offset) exp = hiplot.Experiment() for d in data: dp = hiplot.Datapoint( uid=data["uid"], values=data["values"], ) exp.datapoints.append(dp) return exp
def _create_experiment_from_dataframe(df: DataFrame, include_tags: bool) -> hiplot.Experiment: """Generate HiPlot experiment from MLFlow runs. Parameters ---------- df: pandas.DataFrame A dataframe (returned by ``mlflow.search_runs`` normally) to turn process include_tags: bool Whether or not to include tags in the results (False) Returns ------- hiplot.Experiment The processed experiment """ exp = hiplot.Experiment() params = [p for p in df.columns if p.startswith("params.")] metrics = [m for m in df.columns if m.startswith("metrics.")] if include_tags: tags = [t for t in df.columns if t.startswith("tags.")] for _, row in df.iterrows(): values = {} for p in params: values[p] = row[p] for m in metrics: if isfinite(row[m]): values[m] = row[m] if include_tags: for t in tags: values[t] = row[t] dp = hiplot.Datapoint( uid=str(uuid.UUID(row["run_id"])), values=values, ) exp.datapoints.append(dp) return exp
def test_validation() -> None: with pytest.raises(hip.ExperimentValidationError): hip.Datapoint(uid="x", values={"uid": "y"}).validate()
RUN_ID_1 = str(uuid.uuid4()) RUN_ID_2 = str(uuid.uuid4()) SEARCH_RUNS_RESULT = DataFrame({ "run_id": [RUN_ID_1, RUN_ID_2], "params.numeric": [0, 1], "params.category": ["value1", "value2"], "metrics.third": [2, 4], "tags.test": ["yes", "no"], }) EXPERIMENT = hiplot.Experiment([ hiplot.Datapoint( uid=RUN_ID_1, values={ "params.numeric": 0, "params.category": "value1", "metrics.third": 2, }, ), hiplot.Datapoint( uid=RUN_ID_2, values={ "params.numeric": 1, "params.category": "value2", "metrics.third": 4, }, ), ]) EXPERIMENT_WITH_TAGS = hiplot.Experiment([ hiplot.Datapoint( uid=RUN_ID_1,
def fetcher(uri): """Prepare param sweep output for hiplot Collects the sweep results and simplifies them for easy display using hiplot. :param uri: root dir that containing all the param_sweeping results. :returns: hiplot Experiment Object for display """ print("got request for %s, collecting logs" % uri) exp = hip.Experiment() exp.display_data(hip.Displays.XY).update({ "axis_x": "step", "axis_y": "cumulative_reward" }) dfs = collect_logs(Path(uri)) # list of (name, log, df) triplets cfg_variants = {} cfgs = {} for name, _dfs in dfs: # first collect each config print("loading config from %s" % name) target = Path(name) configpath = target / "config.yaml" cfg = flatten(OmegaConf.load(str(configpath))) cfgs[name] = cfg for k, v in cfg.items(): if k not in cfg_variants: cfg_variants[k] = set() cfg_variants[k].add(v) print("Read in %d logs successfully" % len(cfgs)) order = [] order.append("mean_final_reward") # cfg_variants are hyperparams with more than one value for key, vals in cfg_variants.items(): if len(vals) > 1: order.append(key) order.append("cumulative_reward") print("headers found to plot: ", order) exp.display_data(hip.Displays.PARALLEL_PLOT).update( hide=["step", "uid", "from_uid"], order=order) # min_points = min(len(df["step"]) for _name, df in dfs) # max_points = max(len(df["step"]) for _name, df in dfs) ave_points = sum(len(df["step"]) for _name, df in dfs) // len(dfs) step_size = ave_points // 100 + 1 # I want an average of 100 points per experiment print("ave_points:", ave_points, "step_size:", step_size) for name, df in dfs: # now go through each dataframe cfg = cfgs[name] hyperparams = dict() for key, val in cfg.items(): if len(cfg_variants[key]) > 1: try: hyperparams[key] = float(val) except ValueError: hyperparams[key] = str(val) steps = df["step"] prev_name = None cum_sum = df["mean_episode_return"].cumsum() for idx in range(0, len(cum_sum), step_size): step = int(steps[idx]) cumulative_reward = cum_sum[idx] curr_name = "{},step{}".format(name, step) sp = hip.Datapoint( uid=curr_name, values=dict(step=step, cumulative_reward=cumulative_reward), ) if prev_name is not None: sp.from_uid = prev_name exp.datapoints.append(sp) prev_name = curr_name mean_final_reward = float(df["mean_episode_return"][-10000:].mean()) peak_performance = float( df["mean_episode_return"].rolling(window=1000).mean().max()) end_vals = copy.deepcopy(hyperparams) end_vals.update( step=int(steps.iloc[-1]), cumulative_reward=cum_sum.iloc[-1], mean_final_reward=mean_final_reward, peak_performance=peak_performance, ) dp = hip.Datapoint(uid=name, from_uid=prev_name, values=end_vals) exp.datapoints.append(dp) return exp