def _setup_studies( storage: BaseStorage, n_study: int, n_trial: int, seed: int, direction: Optional[StudyDirection] = None, ) -> Tuple[Dict[int, StudySummary], Dict[int, Dict[int, FrozenTrial]]]: generator = random.Random(seed) study_id_to_summary: Dict[int, StudySummary] = {} study_id_to_trials: Dict[int, Dict[int, FrozenTrial]] = {} for i in range(n_study): study_name = "test-study-name-{}".format(i) study_id = storage.create_new_study(study_name=study_name) if direction is None: direction = generator.choice( [StudyDirection.MINIMIZE, StudyDirection.MAXIMIZE]) storage.set_study_directions(study_id, (direction, )) best_trial = None trials = {} datetime_start = None for j in range(n_trial): trial = _generate_trial(generator) trial.number = j trial._trial_id = storage.create_new_trial(study_id, trial) trials[trial._trial_id] = trial if datetime_start is None: datetime_start = trial.datetime_start else: datetime_start = min(datetime_start, trial.datetime_start) if trial.state == TrialState.COMPLETE and trial.value is not None: if best_trial is None: best_trial = trial else: if direction == StudyDirection.MINIMIZE and trial.value < best_trial.value: best_trial = trial elif direction == StudyDirection.MAXIMIZE and best_trial.value < trial.value: best_trial = trial study_id_to_trials[study_id] = trials study_id_to_summary[study_id] = StudySummary( study_name=study_name, direction=direction, best_trial=best_trial, user_attrs={}, system_attrs={}, n_trials=len(trials), datetime_start=datetime_start, study_id=study_id, ) return study_id_to_summary, study_id_to_trials
def _build_study_summary(self, study_id: int) -> StudySummary: study = self._studies[study_id] return StudySummary( study_name=study.name, direction=study.direction, best_trial=copy.deepcopy(self._get_trial(study.best_trial_id)) if study.best_trial_id is not None else None, user_attrs=copy.deepcopy(study.user_attrs), system_attrs=copy.deepcopy(study.system_attrs), n_trials=len(study.trials), datetime_start=min([ trial.datetime_start for trial in self.get_all_trials(study_id, deepcopy=False) ]) if study.trials else None, study_id=study_id, )
def create_new_study(self, study_name: Optional[str] = None) -> int: if study_name is not None and self._redis.exists( self._key_study_name(study_name)): raise exceptions.DuplicatedStudyError if not self._redis.exists("study_counter"): # We need the counter to start with 0. self._redis.set("study_counter", -1) study_id = self._redis.incr("study_counter", 1) # We need the trial_number counter to start with 0. self._redis.set("study_id:{:010d}:trial_number".format(study_id), -1) if study_name is None: study_name = "{}{:010d}".format(DEFAULT_STUDY_NAME_PREFIX, study_id) with self._redis.pipeline() as pipe: pipe.multi() pipe.set(self._key_study_name(study_name), pickle.dumps(study_id)) pipe.set("study_id:{:010d}:study_name".format(study_id), pickle.dumps(study_name)) pipe.set( "study_id:{:010d}:directions".format(study_id), pickle.dumps([StudyDirection.NOT_SET]), ) study_summary = StudySummary( study_name=study_name, direction=StudyDirection.NOT_SET, best_trial=None, user_attrs={}, system_attrs={}, n_trials=0, datetime_start=None, study_id=study_id, ) pipe.rpush("study_list", pickle.dumps(study_id)) pipe.set(self._key_study_summary(study_id), pickle.dumps(study_summary)) pipe.execute() _logger.info( "A new study created in Redis with name: {}".format(study_name)) return study_id
def get_all_study_summaries(self) -> List[StudySummary]: session = self.scoped_session() summarized_trial = (session.query( models.TrialModel.study_id, functions.min( models.TrialModel.datetime_start).label("datetime_start"), functions.count(models.TrialModel.trial_id).label("n_trial"), ).group_by(models.TrialModel.study_id).with_labels().subquery()) study_summary_stmt = session.query( models.StudyModel.study_id, models.StudyModel.study_name, models.StudyModel.direction, summarized_trial.c.datetime_start, functions.coalesce(summarized_trial.c.n_trial, 0).label("n_trial"), ).select_from(orm.outerjoin(models.StudyModel, summarized_trial)) study_summary = study_summary_stmt.all() study_summaries = [] for study in study_summary: best_trial: Optional[models.TrialModel] = None try: if study.direction == StudyDirection.MAXIMIZE: best_trial = models.TrialModel.find_max_value_trial( study.study_id, session) else: best_trial = models.TrialModel.find_min_value_trial( study.study_id, session) except ValueError: best_trial_frozen: Optional[FrozenTrial] = None if best_trial: params = (session.query( models.TrialParamModel.param_name, models.TrialParamModel.param_value, models.TrialParamModel.distribution_json, ).filter(models.TrialParamModel.trial_id == best_trial.trial_id).all()) param_dict = {} param_distributions = {} for param in params: distribution = distributions.json_to_distribution( param.distribution_json) param_dict[ param.param_name] = distribution.to_external_repr( param.param_value) param_distributions[param.param_name] = distribution user_attrs = session.query( models.TrialUserAttributeModel).filter( models.TrialUserAttributeModel.trial_id == best_trial.trial_id) system_attrs = session.query( models.TrialSystemAttributeModel).filter( models.TrialSystemAttributeModel.trial_id == best_trial.trial_id) intermediate = session.query(models.TrialValueModel).filter( models.TrialValueModel.trial_id == best_trial.trial_id) best_trial_frozen = FrozenTrial( best_trial.number, TrialState.COMPLETE, best_trial.value, best_trial.datetime_start, best_trial.datetime_complete, param_dict, param_distributions, {i.key: json.loads(i.value_json) for i in user_attrs}, {i.key: json.loads(i.value_json) for i in system_attrs}, {value.step: value.value for value in intermediate}, best_trial.trial_id, ) user_attrs = session.query(models.StudyUserAttributeModel).filter( models.StudyUserAttributeModel.study_id == study.study_id) system_attrs = session.query( models.StudySystemAttributeModel).filter( models.StudySystemAttributeModel.study_id == study.study_id) study_summaries.append( StudySummary( study_name=study.study_name, direction=study.direction, best_trial=best_trial_frozen, user_attrs={ i.key: json.loads(i.value_json) for i in user_attrs }, system_attrs={ i.key: json.loads(i.value_json) for i in system_attrs }, n_trials=study.n_trial, datetime_start=study.datetime_start, study_id=study.study_id, )) # Terminate transaction explicitly to avoid connection timeout during transaction. self._commit(session) return study_summaries