def test_trial_to_tuple(space, trial, fixed_suggestion): """Check if trial is correctly created from a sample/tuple.""" data = trial_to_tuple(trial, space) assert data == fixed_suggestion trial.params[0].name = 'lalala' with pytest.raises(AssertionError): trial_to_tuple(trial, space) trial.params.pop(0) with pytest.raises(AssertionError): trial_to_tuple(trial, space)
def test_trial_to_tuple(space, trial, fixed_suggestion): """Check if trial is correctly created from a sample/tuple.""" data = trial_to_tuple(trial, space) assert data == fixed_suggestion trial._params[0].name = 'lalala' with pytest.raises(ValueError) as exc: trial_to_tuple(trial, space) assert "Trial params: [\'lalala\', \'yolo2\', \'yolo3\']" in str(exc.value) trial._params.pop(0) with pytest.raises(ValueError) as exc: trial_to_tuple(trial, space) assert "Trial params: [\'yolo2\', \'yolo3\']" in str(exc.value)
def test_trial_to_tuple(space, fixed_suggestion, params_tuple): """Check if trial is correctly created from a sample/tuple.""" data = trial_to_tuple(fixed_suggestion, space) assert data == params_tuple fixed_suggestion._params[0].name = "lalala" with pytest.raises(ValueError) as exc: trial_to_tuple(fixed_suggestion, space) assert "Trial params: ['lalala', 'yolo2', 'yolo3']" in str(exc.value) fixed_suggestion._params.pop(0) with pytest.raises(ValueError) as exc: trial_to_tuple(fixed_suggestion, space) assert "Trial params: ['yolo2', 'yolo3']" in str(exc.value)
def test_change_trial_params(space, rspace): working_dir = "/new/working/dir" status = "interrupted" rtrial = rspace.sample()[0] # Sampling a new point in original space instead of using reserve() trial = space.sample()[0] point = format_trials.trial_to_tuple(trial, space) rtrial.exp_working_dir = working_dir rtrial.status = status restored_trial = change_trial_params(rtrial, point, space) # Test that attributes are conserved assert restored_trial.exp_working_dir == working_dir assert restored_trial.status == status # Test params are updated assert restored_trial.params != rtrial.params assert restored_trial.params == trial.params # Test that id is based on current params assert restored_trial.id != rtrial.id assert restored_trial.id == trial.id
def _update_algorithm(self, completed_trials): """Pull newest completed trials to update local model.""" log.debug("### Fetch completed trials to observe:") new_completed_trials = [] for trial in completed_trials: if trial not in self.trials_history: new_completed_trials.append(trial) log.debug("### %s", new_completed_trials) if new_completed_trials: log.debug("### Convert them to list of points and their results.") points = list( map( lambda trial: format_trials.trial_to_tuple( trial, self.space), new_completed_trials)) results = list( map(format_trials.get_trial_results, new_completed_trials)) log.debug("### Observe them.") self.trials_history.update(new_completed_trials) self.algorithm.observe(points, results) self.strategy.observe(points, results) self._update_params_hashes(new_completed_trials)
def test_tuple_to_trial_to_tuple(space, trial, fixed_suggestion): """The two functions should be inverse.""" data = trial_to_tuple(tuple_to_trial(fixed_suggestion, space), space) assert data == fixed_suggestion t = tuple_to_trial(trial_to_tuple(trial, space), space) assert t.experiment is None assert t.status == 'new' assert t.worker is None assert t.submit_time is None assert t.start_time is None assert t.end_time is None assert t.results == [] assert len(t._params) == len(trial._params) for i in range(len(t._params)): assert t._params[i].to_dict() == trial._params[i].to_dict()
def reshape(self, trial): """Reshape the point""" point = format_trials.trial_to_tuple(trial, self._original_space) reshaped_point = [] for dim in self.values(): reshaped_point.append(dim.transform(point[dim.index])) return change_trial_params(trial, reshaped_point, self)
def reserve_trial(self, score_handle=None): """Find *new* trials that exist currently in database and select one of them based on the highest score return from `score_handle` callable. :param score_handle: A way to decide which trial out of the *new* ones to to pick as *reserved*, defaults to a random choice. :type score_handle: callable :return: selected `Trial` object, None if could not find any. """ if score_handle is not None and not callable(score_handle): raise ValueError( "Argument `score_handle` must be callable with a `Trial`.") query = dict(experiment=self._id, status={'$in': ['new', 'suspended', 'interrupted']}) new_trials = Trial.build(self._db.read('trials', query)) if not new_trials: return None if score_handle is not None and self.space: scores = list( map(score_handle, map(lambda x: trial_to_tuple(x, self.space), new_trials))) scored_trials = zip(scores, new_trials) best_trials = filter(lambda st: st[0] == max(scores), scored_trials) new_trials = list(zip(*best_trials))[1] elif score_handle is not None: log.warning( "While reserving trial: `score_handle` was provided, but " "parameter space has not been defined yet.") selected_trial = random.sample(new_trials, 1)[0] # Query on status to ensure atomicity. If another process change the # status meanwhile, read_and_write will fail, because query will fail. query = {'_id': selected_trial.id, 'status': selected_trial.status} update = dict(status='reserved') if selected_trial.status == 'new': update["start_time"] = datetime.datetime.utcnow() selected_trial_dict = self._db.read_and_write('trials', query=query, data=update) if selected_trial_dict is None: selected_trial = self.reserve_trial(score_handle=score_handle) else: selected_trial = Trial(**selected_trial_dict) return selected_trial
def restore_shape(self, transformed_trial): """Restore shape.""" transformed_point = format_trials.trial_to_tuple(transformed_trial, self) original_keys = self._original_space.keys() point = [None for _ in original_keys] for index, dim in enumerate(self.values()): if dim.first: point_index = original_keys.index(dim.original_dimension.name) point[point_index] = dim.reverse(transformed_point, index) return change_trial_params(transformed_trial, point, self._original_space)
def test_suggest_initial_points(self, tpe: TPE, monkeypatch): """Test that initial points can be sampled correctly""" _points = [(i, i - 6, "c") for i in range(1, 12)] _trials = [ format_trials.tuple_to_trial(point, space=tpe.space) for point in _points ] index = 0 def sample(num: int = 1, seed=None) -> list[Trial]: nonlocal index result = _trials[index:index + num] index += num return result monkeypatch.setattr(tpe.space, "sample", sample) tpe.n_initial_points = 10 results = numpy.random.random(10) for i in range(1, 11): trials = tpe.suggest(1) assert trials is not None trial = trials[0] assert trial.params == _trials[i] point = format_trials.trial_to_tuple(trial, space=tpe.space) assert point == (i, i - 6, "c") trial.results = [ Trial.Result(name="objective", type="objective", value=results[i - 1]) ] tpe.observe([trial]) trials = tpe.suggest(1) assert trials is not None trial = trials[0] assert trial == _trials[-1] # BUG: This is failing. We expect this trial to be sampled from the model, not from the # search space. assert format_trials.trial_to_tuple(trial, space=tpe.space) != (11, 5, "c")
def flatten_numpy(trials_array, flattened_space): """Flatten dimensions""" flattened_points = numpy.array([ format_trials.trial_to_tuple( flattened_space.transform( format_trials.tuple_to_trial(point[:-1], flattened_space.original)), flattened_space, ) for point in trials_array ]) return numpy.concatenate((flattened_points, trials_array[:, -1:]), axis=1)
def observe(self, trials): """Observe evaluation `results` corresponding to list of `points` in space. Save current point and gradient corresponding to this point. """ if trials[-1].status != "completed": return self.current_point = numpy.asarray( format_trials.trial_to_tuple(trials[-1], self.space)) self.gradient = numpy.asarray(trials[-1].gradient.value) self.has_observed_once = True
def update(self): """Pull newest completed trials to update local model.""" log.debug("### Fetch trials to observe:") completed_trials = self.experiment.fetch_completed_trials() log.debug("### %s", completed_trials) if completed_trials: log.debug("### Convert them to list of points and their results.") points = list( map( lambda trial: format_trials.trial_to_tuple( trial, self.space), completed_trials)) results = list( map(format_trials.get_trial_results, completed_trials)) log.debug("### Observe them.") self.algorithm.observe(points, results)
def _update_naive_algorithm(self, incomplete_trials): """Pull all non completed trials to update naive model.""" self.naive_algorithm = copy.deepcopy(self.algorithm) self.naive_trials_history = copy.deepcopy(self.trials_history) log.debug("### Create fake trials to observe:") lying_trials = self._produce_lies(incomplete_trials) log.debug("### %s", lying_trials) if lying_trials: log.debug("### Convert them to list of points and their results.") points = list( map( lambda trial: format_trials.trial_to_tuple( trial, self.space), lying_trials)) results = list(map(format_trials.get_trial_results, lying_trials)) log.debug("### Observe them.") self.naive_trials_history.update(lying_trials) self.naive_algorithm.observe(points, results)
def test_suggest_ei_candidates(self, tpe: TPE): """Test suggest with no shape dimensions""" tpe.n_initial_points = 2 tpe.n_ei_candidates = 0 results = numpy.random.random(2) for i in range(2): trials = tpe.suggest(1) assert trials is not None assert len(trials) == 1 points = [format_trials.trial_to_tuple(trials[0], space=tpe.space)] assert len(points[0]) == 3 assert not isinstance(points[0][0], tuple) trials[0] = _add_result(trials[0], results[i]) tpe.observe(trials) trials = tpe.suggest(1) assert not trials tpe.n_ei_candidates = 24 trials = tpe.suggest(1) assert trials is not None assert len(trials) > 0
def _trial_to_array(trial: Trial, space: Space) -> np.ndarray: return np.array(format_trials.trial_to_tuple(trial, space=space))
def suggest(pool_size): return [ trial_to_tuple(experiment.fetch_trials()[-1], experiment.space) ]
def test_partial_dependency_grid(hspace): """Test the computation of the averages and stds""" flattened_space = flatten_space(hspace) n_points = 5 n_samples = 20 samples = [ format_trials.trial_to_tuple(trial, flattened_space) for trial in flattened_space.sample(n_samples) ] samples = pd.DataFrame(samples, columns=flattened_space.keys()) params = ["x", "y[0]", "y[2]", "z"] # Test for 1 param grid, averages, stds = partial_dependency_grid(flattened_space, mock_model(), ["x"], samples, n_points=n_points) assert list(grid.keys()) == ["x"] assert list(grid["x"]) == [0, 1.5, 3, 4.5, 6] assert averages.shape == (n_points, ) assert stds.shape == (n_points, ) assert averages[0] == numpy.arange(n_samples).mean() assert (averages[4] == numpy.arange(n_samples * (n_points - 1), n_samples * n_points).mean()) assert stds[0] == numpy.arange(n_samples).std() # Test for 2 param grid, averages, stds = partial_dependency_grid(flattened_space, mock_model(), ["x", "y[0]"], samples, n_points=n_points) assert list(grid.keys()) == ["x", "y[0]"] assert list(grid["x"]) == [0, 1.5, 3, 4.5, 6] # assert list(grid["y[0]"]) == [0, 0.75, 1.5, 2.25, 3] numpy.testing.assert_almost_equal( grid["y[0]"], numpy.linspace( numpy.log(float(f"{numpy.e}")), numpy.log(float(f"{numpy.e}")**4), num=n_points, ), decimal=4, ) assert averages.shape == (n_points, n_points) assert stds.shape == (n_points, n_points) assert averages[0, 0] == numpy.arange(n_samples).mean() assert (averages[4, 4] == numpy.arange( n_samples * n_points * n_points - n_samples, n_samples * n_points * n_points).mean()) assert stds[0, 0] == numpy.arange(n_samples).std() assert stds[4, 4] == numpy.arange(n_samples).std() # Test for 2 param with one categorical, with less categories then n_points grid, averages, stds = partial_dependency_grid(flattened_space, mock_model(), ["x", "z"], samples, n_points=n_points) assert list(grid.keys()) == ["x", "z"] assert list(grid["x"]) == [0, 1.5, 3, 4.5, 6] assert list(grid["z"]) == [0, 1, 2] assert averages.shape == (3, n_points) assert stds.shape == (3, n_points) assert averages[0, 0] == numpy.arange(n_samples).mean() assert (averages[2, 4] == numpy.arange(n_samples * 3 * n_points - n_samples, n_samples * 3 * n_points).mean()) assert stds[0, 0] == numpy.arange(n_samples).std() assert stds[2, 4] == numpy.arange(n_samples).std()
def reserve_trial(self, score_handle=None, _depth=1): """Find *new* trials that exist currently in database and select one of them based on the highest score return from `score_handle` callable. :param score_handle: A way to decide which trial out of the *new* ones to to pick as *reserved*, defaults to a random choice. :type score_handle: callable :param _depth: recursion depth only used for logging purposes can be ignored :return: selected `Trial` object, None if could not find any. """ log.debug('%s reserving trial with (score: %s)', '<' * _depth, score_handle) if score_handle is not None and not callable(score_handle): raise ValueError( "Argument `score_handle` must be callable with a `Trial`.") self.fix_lost_trials() query = dict(experiment=self._id, status={'$in': ['new', 'suspended', 'interrupted']}) new_trials = self.fetch_trials(query) log.debug('%s Fetched (trials: %s)', '<' * _depth, len(new_trials)) if not new_trials: log.debug('%s no new trials found', '<' * _depth) return None if score_handle is not None and self.space: scores = list( map(score_handle, map(lambda x: trial_to_tuple(x, self.space), new_trials))) scored_trials = zip(scores, new_trials) best_trials = filter(lambda st: st[0] == max(scores), scored_trials) new_trials = list(zip(*best_trials))[1] elif score_handle is not None: log.warning( "While reserving trial: `score_handle` was provided, but " "parameter space has not been defined yet.") selected_trial = random.sample(new_trials, 1)[0] log.debug('%s selected (trial: %s)', '<' * _depth, selected_trial) update = dict(status='reserved', heartbeat=datetime.datetime.utcnow()) if selected_trial.status == 'new': update["start_time"] = datetime.datetime.utcnow() # Query on status to ensure atomicity. If another process change the # status meanwhile, update will fail, because query will fail. # This relies on the atomicity of document updates. log.debug('%s trying to reverse trial', '<' * _depth) reserved = self._storage.update_trial( selected_trial, **update, where={'status': selected_trial.status}) if not reserved: selected_trial = self.reserve_trial(score_handle=score_handle, _depth=_depth + 1) else: log.debug('%s found suitable trial', '<' * _depth) selected_trial = self.fetch_trials({'_id': selected_trial.id})[0] log.debug('%s reserved trial (trial: %s)', '<' * _depth, selected_trial) return selected_trial
def partial_dependency(trials, space, params=None, model="RandomForestRegressor", n_grid_points=10, n_samples=50, **kwargs): """ Calculates the partial dependency of parameters in a collection of :class:`orion.core.worker.trial.Trial`. Parameters ---------- trials: DataFrame or dict A dataframe of trials containing, at least, the columns 'objective' and 'id'. Or a dict equivalent. space: Space object A space object from an experiment. params: list of str, optional The parameters to include in the computation. All parameters are included by default. model: str Name of the regression model to use. Can be one of - AdaBoostRegressor - BaggingRegressor - ExtraTreesRegressor - GradientBoostingRegressor - RandomForestRegressor (Default) n_grid_points: int Number of points in the grid to compute partial dependency. Default is 10. n_samples: int Number of samples to randomly generate the grid used to compute the partial dependency. Default is 50. **kwargs Arguments for the regressor model. Returns ------- dict Dictionary of DataFrames. Each combination of parameters as keys (dim1.name, dim2.name) and for each parameters individually (dim1.name). Columns are (dim1.name, dim2.name, objective) or (dim1.name, objective). """ params = flatten_params(space, params) flattened_space = build_required_space( space, dist_requirement="linear", type_requirement="numerical", shape_requirement="flattened", ) if trials.empty or trials.shape[0] == 0: return {} data = to_numpy(trials, space) data = flatten_numpy(data, flattened_space) model = train_regressor(model, data, **kwargs) data = [ format_trials.trial_to_tuple(trial, flattened_space) for trial in flattened_space.sample(n_samples) ] data = pandas.DataFrame(data, columns=flattened_space.keys()) partial_dependencies = dict() for x_i, x_name in enumerate(params): grid, averages, stds = partial_dependency_grid(flattened_space, model, [x_name], data, n_grid_points) grid = reverse(flattened_space, grid) partial_dependencies[x_name] = (grid, averages, stds) for y_i in range(x_i + 1, len(params)): y_name = params[y_i] grid, averages, stds = partial_dependency_grid( flattened_space, model, [x_name, y_name], data, n_grid_points) grid = reverse(flattened_space, grid) partial_dependencies[(x_name, y_name)] = (grid, averages, stds) return partial_dependencies
def test_hierarchical_trial_to_tuple(hierarchical_space, hierarchical_trial, params_tuple): """Check if hierarchical trial is correctly created from a sample/tuple.""" data = trial_to_tuple(hierarchical_trial, hierarchical_space) assert data == params_tuple
def test_hierarchical_trial_to_tuple(hierarchical_space, hierarchical_trial, fixed_suggestion): """Check if hierarchical trial is correctly created from a sample/tuple.""" data = trial_to_tuple(hierarchical_trial, hierarchical_space) assert data == fixed_suggestion