def handle_report_metric_data(self, data): """reveice the metric data and update Bayesian optimization with final result Parameters ---------- data: it is an object which has keys 'parameter_id', 'value', 'trial_job_id', 'type', 'sequence'. Raises ------ ValueError Data type not supported """ logger.debug('handle report metric data = %s', data) assert 'value' in data value = extract_scalar_reward(data['value']) if self.optimize_mode is OptimizeMode.Maximize: reward = -value else: reward = value assert 'parameter_id' in data s, i, _ = data['parameter_id'].split('_') logger.debug('bracket id = %s, metrics value = %s, type = %s', s, value, data['type']) s = int(s) assert 'type' in data if data['type'] == 'FINAL': # and PERIODICAL metric are independent, thus, not comparable. assert 'sequence' in data self.brackets[s].set_config_perf(int(i), data['parameter_id'], sys.maxsize, value) self.completed_hyper_configs.append(data) _parameters = self.parameters[data['parameter_id']] _parameters.pop(_KEY) # update BO with loss, max_s budget, hyperparameters self.cg.new_result(loss=reward, budget=data['sequence'], parameters=_parameters, update_model=True) elif data['type'] == 'PERIODICAL': self.brackets[s].set_config_perf(int(i), data['parameter_id'], data['sequence'], value) else: raise ValueError('Data type not supported: {}'.format( data['type']))
def handle_report_metric_data(self, data): """ Parameters ---------- data: it is an object which has keys 'parameter_id', 'value', 'trial_job_id', 'type', 'sequence'. Raises ------ ValueError Data type not supported """ if 'value' in data: data['value'] = json_tricks.loads(data['value']) if data['type'] == MetricType.REQUEST_PARAMETER: assert multi_phase_enabled() assert data['trial_job_id'] is not None assert data['parameter_index'] is not None assert data['trial_job_id'] in self.job_id_para_id_map self._handle_trial_end(self.job_id_para_id_map[data['trial_job_id']]) ret = self._get_one_trial_job() if data['trial_job_id'] is not None: ret['trial_job_id'] = data['trial_job_id'] if data['parameter_index'] is not None: ret['parameter_index'] = data['parameter_index'] self.job_id_para_id_map[data['trial_job_id']] = ret['parameter_id'] send(CommandType.SendTrialJobParameter, json_tricks.dumps(ret)) else: value = extract_scalar_reward(data['value']) bracket_id, i, _ = data['parameter_id'].split('_') bracket_id = int(bracket_id) # add <trial_job_id, parameter_id> to self.job_id_para_id_map here, # because when the first parameter_id is created, trial_job_id is not known yet. if data['trial_job_id'] in self.job_id_para_id_map: assert self.job_id_para_id_map[data['trial_job_id']] == data['parameter_id'] else: self.job_id_para_id_map[data['trial_job_id']] = data['parameter_id'] if data['type'] == MetricType.FINAL: # sys.maxsize indicates this value is from FINAL metric data, because data['sequence'] from FINAL metric # and PERIODICAL metric are independent, thus, not comparable. self.brackets[bracket_id].set_config_perf(int(i), data['parameter_id'], sys.maxsize, value) self.completed_hyper_configs.append(data) elif data['type'] == MetricType.PERIODICAL: self.brackets[bracket_id].set_config_perf(int(i), data['parameter_id'], data['sequence'], value) else: raise ValueError('Data type not supported: {}'.format(data['type']))
def import_data(self, data): """ Import additional data for tuning. Parameters ---------- data : list of dict Each of which has at least two keys, ``parameter`` and ``value``. """ _completed_num = 0 for trial_info in data: self.logger.info("Importing data, current processing progress %s / %s", _completed_num, len(data)) # simply validate data format assert "parameter" in trial_info _params = trial_info["parameter"] assert "value" in trial_info _value = trial_info['value'] if not _value: self.logger.info("Useless trial data, value is %s, skip this trial data.", _value) continue _value = extract_scalar_reward(_value) # convert the keys in loguniform and categorical types valid_entry = True for key, value in _params.items(): if key in self.loguniform_key: _params[key] = np.log(value) elif key in self.categorical_dict: if value in self.categorical_dict[key]: _params[key] = self.categorical_dict[key].index(value) else: self.logger.info("The value %s of key %s is not in search space.", str(value), key) valid_entry = False break if not valid_entry: continue # start import this data entry _completed_num += 1 config = Configuration(self.cs, values=_params) if self.optimize_mode is OptimizeMode.Maximize: _value = -_value if self.first_one: self.smbo_solver.nni_smac_receive_first_run(config, _value) self.first_one = False else: self.smbo_solver.nni_smac_receive_runs(config, _value) self.logger.info("Successfully import data to smac tuner, total data: %d, imported data: %d.", len(data), _completed_num)
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): """ receive trial's result. if the number of finished trials equals self.inf_batch_size, start the next update to train the model """ trial_info_idx = self.running_trials.pop(parameter_id, None) assert trial_info_idx is not None value = extract_scalar_reward(value) if self.optimize_mode == OptimizeMode.Minimize: value = -value self.trials_result[trial_info_idx] = value self.finished_trials += 1 if self.finished_trials == self.inf_batch_size: self._next_round_inference()
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): ''' Record an observation of the objective function parameter_id : int parameters : dict of parameters value: final metrics of the trial, including reward ''' reward = extract_scalar_reward(value) if self.optimize_mode is OptimizeMode.Minimize: reward = -reward logger.debug('receive trial result is:\n') logger.debug(str(parameters)) logger.debug(str(reward)) indiv = Individual(graph_loads(parameters), result=reward) self.population.append(indiv) return
def receive_trial_result(self, parameter_id, parameters, value): """Tuner receive result from trial. Parameters ---------- parameter_id : int parameters : dict value : dict/float if value is dict, it should have "default" key. """ value = extract_scalar_reward(value) if self.optimize_mode == OptimizeMode.Minimize: value = -value logger.info("Received trial result.") logger.info("value :%s", value) logger.info("parameter : %s", parameters) self._space.register(parameters, value)
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): """ Tuner receive result from trial. Parameters ---------- parameter_id : int The id of parameters, generated by nni manager. parameters : dict A group of parameters that trial has tried. value : dict/float if value is dict, it should have "default" key. """ value = extract_scalar_reward(value) if self.optimize_mode == OptimizeMode.Maximize: value = -value logger.info("Received trial result.") logger.info("value is : %s", str(value)) logger.info("parameter is : %s", str(parameters)) # parse parameter to sample_x sample_x = [0 for i in range(len(self.key_order))] for key in parameters: idx = self.key_order.index(key) sample_x[idx] = parameters[key] # parse value to sample_y temp_y = [] if sample_x in self.samples_x: idx = self.samples_x.index(sample_x) temp_y = self.samples_y[idx] temp_y.append(value) self.samples_y[idx] = temp_y # calculate y aggregation median = get_median(temp_y) self.samples_y_aggregation[idx] = [median] else: self.samples_x.append(sample_x) self.samples_y.append([value]) # calculate y aggregation self.samples_y_aggregation.append([value])
def handle_import_data(self, data): """Import additional data for tuning Parameters ---------- data: a list of dictionarys, each of which has at least two keys, 'parameter' and 'value' Raises ------ AssertionError data doesn't have required key 'parameter' and 'value' """ for entry in data: entry['value'] = nni.load(entry['value']) _completed_num = 0 for trial_info in data: logger.info("Importing data, current processing progress %s / %s", _completed_num, len(data)) _completed_num += 1 assert "parameter" in trial_info _params = trial_info["parameter"] assert "value" in trial_info _value = trial_info['value'] if not _value: logger.info("Useless trial data, value is %s, skip this trial data.", _value) continue _value = extract_scalar_reward(_value) budget_exist_flag = False barely_params = dict() for keys in _params: if keys == _KEY: _budget = _params[keys] budget_exist_flag = True else: barely_params[keys] = _params[keys] if not budget_exist_flag: _budget = self.max_budget logger.info("Set \"TRIAL_BUDGET\" value to %s (max budget)", self.max_budget) if self.optimize_mode is OptimizeMode.Maximize: reward = -_value else: reward = _value self.cg.new_result(loss=reward, budget=_budget, parameters=barely_params, update_model=True) logger.info("Successfully import tuning data to BOHB advisor.")
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): ''' Receive trial's final result. parameter_id: int parameters: object created by 'generate_parameters()' value: final metrics of the trial, including default metric ''' result = {} for key, value in parameters: result['config/' + key] = value reward = extract_scalar_reward(value) result[self._metric] = reward # if nni does not report training cost, # using sequence as an approximation. # if no sequence, using a constant 1 result[self.cost_attr] = value.get(self.cost_attr, value.get('sequence', 1)) self.on_trial_complete(str(parameter_id), result)
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): """Receive trial's final result. Args: parameter_id: int. parameters: object created by `generate_parameters()`. value: final metrics of the trial, including default metric. """ result = { "config": parameters, self._metric: extract_scalar_reward(value), self.cost_attr: 1 if isinstance(value, float) else value.get( self.cost_attr, value.get("sequence", 1)) # if nni does not report training cost, # using sequence as an approximation. # if no sequence, using a constant 1 } self.on_trial_complete(str(parameter_id), result)
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): """ Record an observation of the objective function. Parameters ---------- parameter_id : int parameters : dict value : dict/float if value is dict, it should have "default" key. """ reward = extract_scalar_reward(value) if parameter_id not in self.total_data: raise RuntimeError("Received parameter_id not in total_data.") (_, father_id, model_id) = self.total_data[parameter_id] graph = self.bo.searcher.load_model_by_id(model_id) # to use the value and graph self.add_model(reward, model_id) self.update(father_id, graph, reward, model_id)
def receive_trial_result(self, parameter_id, parameters, value): '''Record the result from a trial Parameters ---------- parameters: dict value : dict/float if value is dict, it should have "default" key. value is final metrics of the trial. ''' reward = extract_scalar_reward(value) if parameter_id not in self.total_data: raise RuntimeError('Received parameter_id not in total_data.') # restore the paramsters contains "_index" params = self.total_data[parameter_id] if self.optimize_mode == OptimizeMode.Minimize: reward = -reward indiv = Individual(config=params, result=reward) self.population.append(indiv)
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): """ Receive trial's result. if the number of finished trials equals ``self.population_size``, start the next epoch to train the model. Parameters ---------- parameter_id : int Unique identifier of used hyper-parameters, same with :meth:`generate_parameters`. parameters : dict Hyper-parameters generated by :meth:`generate_parameters`. value : dict Result from trial (the return value of :func:`nni.report_final_result`). """ logger.info('Get one trial result, id = %d, value = %s', parameter_id, value) value = extract_scalar_reward(value) trial_info = self.running.pop(parameter_id, None) trial_info.score = value self.finished.append(trial_info) self.finished_trials += 1 if self.finished_trials == self.population_size: self._proceed_next_epoch()
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): """ Record the result from a trial Parameters ---------- parameter_id : int parameters : dict value : dict/float if value is dict, it should have "default" key. value is final metrics of the trial. """ reward = extract_scalar_reward(value) if parameter_id not in self.history: raise RuntimeError('Received parameter_id not in total_data.') params = self.history[parameter_id] if self.optimize_mode == OptimizeMode.Minimize: reward = -reward self.population.append(FinishedIndividual(parameter_id, params, reward)) if len(self.population) > self.population_size: self.population.popleft()
def handle_report_metric_data(self, data): """ Parameters ---------- data: it is an object which has keys 'parameter_id', 'value', 'trial_job_id', 'type', 'sequence'. Raises ------ ValueError Data type not supported """ value = extract_scalar_reward(data['value']) bracket_id, i, _ = data['parameter_id'].split('_') bracket_id = int(bracket_id) if data['type'] == 'FINAL': # sys.maxsize indicates this value is from FINAL metric data, because data['sequence'] from FINAL metric # and PERIODICAL metric are independent, thus, not comparable. self.brackets[bracket_id].set_config_perf(int(i), data['parameter_id'], sys.maxsize, value) self.completed_hyper_configs.append(data) elif data['type'] == 'PERIODICAL': self.brackets[bracket_id].set_config_perf(int(i), data['parameter_id'], data['sequence'], value) else: raise ValueError('Data type not supported: {}'.format(data['type']))
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): ''' Record an observation of the objective function parameter_id : int parameters : dict of parameters value: final metrics of the trial ''' # get the default feature importance if self.search_space is None: self.search_space = value['feature_importance'] self.estimate_sample_prob = self.estimate_candidate_probility() else: self.epoch_importance.append(value['feature_importance']) # TODO self.update_candidate_probility() reward = extract_scalar_reward(value) if self.optimize_mode is OptimizeMode.Minimize: reward = -reward logger.info('receive trial result is:\n') logger.info(str(parameters)) logger.info(str(reward)) return
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): """ Receive trial's result. if the number of finished trials equals ``self.population_size``, start the next epoch to train the model. Parameters ---------- parameter_id : int Unique identifier of used hyper-parameters, same with :meth:`generate_parameters`. parameters : dict Hyper-parameters generated by :meth:`generate_parameters`. value : dict Result from trial (the return value of :func:`nni.report_final_result`). """ logger.info('Get one trial result, id = %d, value = %s', parameter_id, value) value = extract_scalar_reward(value) if self.optimize_mode == OptimizeMode.Minimize: value = -value trial_info = self.running.pop(parameter_id, None) trial_info.score = value self.finished.append(trial_info) self.finished_trials += 1 if self.finished_trials == self.population_size: logger.info('Proceeding to next epoch') self.epoch += 1 self.population = [] self.pos = -1 self.running = {} #exploit and explore self.finished = sorted(self.finished, key=lambda x: x.score, reverse=True) cutoff = int(np.ceil(self.fraction * len(self.finished))) tops = self.finished[:cutoff] bottoms = self.finished[self.finished_trials - cutoff:] for bottom in bottoms: top = np.random.choice(tops) exploit_and_explore(bottom, top, self.factor, self.resample_probability, self.epoch, self.searchspace_json) for trial in self.finished: if trial not in bottoms: trial.clean_id() trial.hyper_parameters[ 'load_checkpoint_dir'] = trial.hyper_parameters[ 'save_checkpoint_dir'] trial.hyper_parameters[ 'save_checkpoint_dir'] = os.path.join( trial.checkpoint_dir, str(self.epoch)) self.finished_trials = 0 for _ in range(self.population_size): trial_info = self.finished.pop() self.population.append(trial_info) while self.credit > 0 and self.pos + 1 < len(self.population): self.credit -= 1 self.pos += 1 parameter_id = self.param_ids.pop() trial_info = self.population[self.pos] trial_info.parameter_id = parameter_id self.running[parameter_id] = trial_info self.send_trial_callback( parameter_id, split_index(trial_info.hyper_parameters))
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): reward = extract_scalar_reward(value) self.trial_results.append((parameter_id, parameters['param'], reward, kwargs.get("customized")))
def handle_report_metric_data(self, data): """reveice the metric data and update Bayesian optimization with final result Parameters ---------- data: it is an object which has keys 'parameter_id', 'value', 'trial_job_id', 'type', 'sequence'. Raises ------ ValueError Data type not supported """ logger.debug('handle report metric data = %s', data) if data['type'] == MetricType.REQUEST_PARAMETER: assert multi_phase_enabled() assert data['trial_job_id'] is not None assert data['parameter_index'] is not None assert data['trial_job_id'] in self.job_id_para_id_map self._handle_trial_end( self.job_id_para_id_map[data['trial_job_id']]) ret = self._get_one_trial_job() if ret is None: self.unsatisfied_jobs.append({ 'trial_job_id': data['trial_job_id'], 'parameter_index': data['parameter_index'] }) else: ret['trial_job_id'] = data['trial_job_id'] ret['parameter_index'] = data['parameter_index'] # update parameter_id in self.job_id_para_id_map self.job_id_para_id_map[ data['trial_job_id']] = ret['parameter_id'] send(CommandType.SendTrialJobParameter, json_tricks.dumps(ret)) else: assert 'value' in data value = extract_scalar_reward(data['value']) if self.optimize_mode is OptimizeMode.Maximize: reward = -value else: reward = value assert 'parameter_id' in data s, i, _ = data['parameter_id'].split('_') logger.debug('bracket id = %s, metrics value = %s, type = %s', s, value, data['type']) s = int(s) # add <trial_job_id, parameter_id> to self.job_id_para_id_map here, # because when the first parameter_id is created, trial_job_id is not known yet. if data['trial_job_id'] in self.job_id_para_id_map: assert self.job_id_para_id_map[ data['trial_job_id']] == data['parameter_id'] else: self.job_id_para_id_map[ data['trial_job_id']] = data['parameter_id'] assert 'type' in data if data['type'] == MetricType.FINAL: # and PERIODICAL metric are independent, thus, not comparable. assert 'sequence' in data self.brackets[s].set_config_perf(int(i), data['parameter_id'], sys.maxsize, value) self.completed_hyper_configs.append(data) _parameters = self.parameters[data['parameter_id']] _parameters.pop(_KEY) # update BO with loss, max_s budget, hyperparameters self.cg.new_result(loss=reward, budget=data['sequence'], parameters=_parameters, update_model=True) elif data['type'] == MetricType.PERIODICAL: self.brackets[s].set_config_perf(int(i), data['parameter_id'], data['sequence'], value) else: raise ValueError('Data type not supported: {}'.format( data['type']))
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): """ Record an observation of the objective function Parameters ---------- parameter_id : int parameters : dict value : dict/float if value is dict, it should have "default" key. value is final metrics of the trial. """ reward = extract_scalar_reward(value) # restore the paramsters contains '_index' if parameter_id not in self.total_data: raise RuntimeError('Received parameter_id not in total_data.') params = self.total_data[parameter_id] # code for parallel if self.parallel: constant_liar = kwargs.get('constant_liar', False) if constant_liar: rval = self.CL_rval else: rval = self.rval # ignore duplicated reported final result (due to aware of intermedate result) if parameter_id not in self.running_data: logger.info( "Received duplicated final result with parameter id: %s", parameter_id) return self.running_data.remove(parameter_id) # update the reward of optimal_y if self.optimal_y is None: if self.constant_liar_type == 'mean': self.optimal_y = [reward, 1] else: self.optimal_y = reward else: if self.constant_liar_type == 'mean': _sum = self.optimal_y[0] + reward _number = self.optimal_y[1] + 1 self.optimal_y = [_sum, _number] elif self.constant_liar_type == 'min': self.optimal_y = min(self.optimal_y, reward) elif self.constant_liar_type == 'max': self.optimal_y = max(self.optimal_y, reward) logger.debug("Update optimal_y with reward, optimal_y = %s", self.optimal_y) else: rval = self.rval if self.optimize_mode is OptimizeMode.Maximize: reward = -reward domain = rval.domain trials = rval.trials new_id = len(trials) rval_specs = [None] rval_results = [domain.new_result()] rval_miscs = [dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)] vals = params idxs = dict() out_y = dict() json2vals(self.json, vals, out_y) vals = out_y for key in domain.params: if key in [NodeType.VALUE, NodeType.INDEX]: continue if key not in vals or vals[key] is None or vals[key] == []: idxs[key] = vals[key] = [] else: idxs[key] = [new_id] vals[key] = [vals[key]] self.miscs_update_idxs_vals(rval_miscs, idxs, vals, idxs_map={new_id: new_id}, assert_all_vals_used=False) trial = trials.new_trial_docs([new_id], rval_specs, rval_results, rval_miscs)[0] trial['result'] = {'loss': reward, 'status': 'ok'} trial['state'] = hp.JOB_STATE_DONE trials.insert_trial_docs([trial]) trials.refresh()
def import_data(self, data): """ Parameters ---------- data : json obj imported data records Returns ------- int the start epoch number after data imported, only used for unittest """ if self.running: logger.warning("Do not support importing data in the middle of experiment") return # the following is for experiment resume _completed_num = 0 epoch_data_dict = {} for trial_info in data: logger.info("Process data record %s / %s", _completed_num, len(data)) _completed_num += 1 # simply validate data format _params = trial_info["parameter"] _value = trial_info['value'] # assign fake value for failed trials if not _value: logger.info("Useless trial data, value is %s, skip this trial data.", _value) _value = float('inf') if self.optimize_mode == OptimizeMode.Minimize else float('-inf') _value = extract_scalar_reward(_value) if 'save_checkpoint_dir' not in _params: logger.warning("Invalid data record: save_checkpoint_dir is missing, abandon data import.") return epoch_num = int(os.path.basename(_params['save_checkpoint_dir'])) if epoch_num not in epoch_data_dict: epoch_data_dict[epoch_num] = [] epoch_data_dict[epoch_num].append((_params, _value)) if not epoch_data_dict: logger.warning("No valid epochs, abandon data import.") return # figure out start epoch for resume max_epoch_num = max(epoch_data_dict, key=int) if len(epoch_data_dict[max_epoch_num]) < self.population_size: max_epoch_num -= 1 # If there is no a single complete round, no data to import, start from scratch if max_epoch_num < 0: logger.warning("No completed epoch, abandon data import.") return assert len(epoch_data_dict[max_epoch_num]) == self.population_size # check existence of trial save checkpoint dir for params, _ in epoch_data_dict[max_epoch_num]: if not os.path.isdir(params['save_checkpoint_dir']): logger.warning("save_checkpoint_dir %s does not exist, data will not be resumed", params['save_checkpoint_dir']) return # resume data self.epoch = max_epoch_num self.finished_trials = self.population_size for params, value in epoch_data_dict[max_epoch_num]: checkpoint_dir = os.path.dirname(params['save_checkpoint_dir']) self.finished.append(TrialInfo(checkpoint_dir=checkpoint_dir, hyper_parameters=params, score=value)) self._proceed_next_epoch() logger.info("Successfully import data to PBT tuner, total data: %d, imported data: %d.", len(data), self.population_size) logger.info("Start from epoch %d ...", self.epoch) return self.epoch # return for test
def receive_customized_trial_result(self, parameter_id, parameters, value): reward = extract_scalar_reward(value) self.trial_results.append( (parameter_id, parameters['param'], reward, True))
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): reward = extract_scalar_reward(value) _logger.info('receive trial result: %s, %s, %s', parameter_id, parameters, reward) _result.write('%d %d\n' % (parameters['x'], reward)) _result.flush()
def receive_trial_result(self, parameter_id, parameters, value, **kwargs): reward = extract_scalar_reward(value) self.trial_results.append( (parameter_id, parameters['param'], reward, False))