def GetSuggestions(self, request, context): """ Main function to provide suggestion. """ experiment = self._get_experiment(request.experiment_name) parameter_config = parsing_util.parse_parameter_configs( experiment.spec.parameter_specs.parameters) trial_specs = [] for _ in range(request.request_number): sample = parameter_config.random_sample() suggestion = parsing_util.parse_x_next_vector( sample, parameter_config.parameter_types, parameter_config.names, parameter_config.discrete_info, parameter_config.categorical_info) trial_spec = api_pb2.TrialSpec() trial_spec.experiment_name = request.experiment_name for param in suggestion: trial_spec.parameter_assignments.assignments.add( name=param['name'], value=str(param['value'])) trial_specs.append(trial_spec) reply = api_pb2.GetSuggestionsReply() for trial_spec in trial_specs: reply.trials.add(spec=trial_spec) return reply
def _copy_trials(self, trials, r_i, resourceName): trialSpecs = [] for t in trials: trial_spec = api_pb2.TrialSpec() for assignment in t.spec.parameter_assignments.assignments: if assignment.name == resourceName: value = str(r_i) else: value = assignment.value trial_spec.parameter_assignments.assignments.add( name=assignment.name, value=value) trialSpecs.append(trial_spec) return trialSpecs
def GetSuggestions(self, request, context): """ Main function to provide suggestion. """ experiment_name = request.experiment_name request_number = request.request_number experiment = self._get_experiment(experiment_name) parameters = experiment.spec.parameter_specs.parameters alg_settings = self._get_algorithm_settings(experiment_name) combinations, parameter_config = self._create_all_combinations( parameters, alg_settings) total_combinations = len(combinations) allocated_trials = self._get_trials(experiment_name) total_allocated_trials = len(allocated_trials) return_start_index = total_allocated_trials return_end_index = return_start_index + request_number if return_start_index > total_combinations: return_start_index = 0 return_end_index = return_start_index + request_number elif return_start_index + request_number > total_combinations: return_start_index = total_combinations - request_number return_end_index = total_combinations if return_start_index < 0: return_start_index = 0 trial_specs = [] for elem in combinations[return_start_index:return_end_index]: suggestion = parsing_util.parse_x_next_tuple( elem, parameter_config.parameter_types, parameter_config.names) trial_spec = api_pb2.TrialSpec() trial_spec.experiment_name = experiment_name for param in suggestion: trial_spec.parameter_assignments.assignments.add( name=param['name'], value=str(param['value'])) trial_specs.append(trial_spec) reply = api_pb2.GetSuggestionsReply() for trial_spec in trial_specs: reply.trials.add(spec=trial_spec) return reply
def _make_master_bracket(self, experiment, sParams): n = sParams["n"] r = int(sParams["r"]) parameter_config = parsing_util.parse_parameter_configs( experiment.spec.parameter_specs.parameters) trial_specs = [] for _ in range(n): sample = parameter_config.random_sample() suggestion = parsing_util.parse_x_next_vector( sample, parameter_config.parameter_types, parameter_config.names, parameter_config.discrete_info, parameter_config.categorical_info) trial_spec = api_pb2.TrialSpec() trial_spec.experiment_name = experiment.name for param in suggestion: if param['name'] == sParams["resourceName"]: param['value'] = str(r) trial_spec.parameter_assignments.assignments.add( name=param['name'], value=str(param['value'])) trial_specs.append(trial_spec) self.logger.info("Generate %d trials by master bracket.", n, extra={"experiment_name": experiment.name}) return trial_specs
def GetSuggestions(self, request, context): if request.experiment_name not in self.registered_experiments: self.registered_experiments[request.experiment_name] = NAS_RL_Experiment(request, self.logger) experiment = self.registered_experiments[request.experiment_name] self.logger.info("-" * 100 + "\nSuggestion Step {} for Experiment {}\n".format(experiment.ctrl_step, experiment.experiment_name) + "-" * 100) with experiment.tf_graph.as_default(): saver = tf.train.Saver() ctrl = experiment.controller controller_ops = { "train_step": ctrl.train_step, "loss": ctrl.loss, "train_op": ctrl.train_op, "lr": ctrl.lr, "grad_norm": ctrl.grad_norm, "optimizer": ctrl.optimizer, "baseline": ctrl.baseline, "entropy": ctrl.sample_entropy, "sample_arc": ctrl.sample_arc, "skip_rate": ctrl.skip_rate} run_ops = [ controller_ops["loss"], controller_ops["entropy"], controller_ops["lr"], controller_ops["grad_norm"], controller_ops["baseline"], controller_ops["skip_rate"], controller_ops["train_op"]] if experiment.is_first_run: self.logger.info(">>> First time running suggestion for {}. Random architecture will be given.".format(experiment.experiment_name)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) candidates = list() for _ in range(experiment.num_trials): candidates.append(sess.run(controller_ops["sample_arc"])) # TODO: will use PVC to store the checkpoint to protect against unexpected suggestion pod restart saver.save(sess, experiment.ctrl_cache_file) experiment.is_first_run = False else: with tf.Session() as sess: saver.restore(sess, experiment.ctrl_cache_file) valid_acc = ctrl.reward result = self.GetEvaluationResult(experiment) # TODO: (andreyvelich) I deleted this part, should it be handle by controller? # Sometimes training container may fail and GetEvaluationResult() will return None # In this case, the Suggestion will: # 1. Firstly try to respawn the previous trials after waiting for RESPAWN_SLEEP seconds # 2. If respawning the trials for RESPAWN_LIMIT times still cannot collect valid results, # then fail the task because it may indicate that the training container has errors. if result is None: self.logger.warning(">>> Suggestion has spawned trials, but they all failed.") self.logger.warning(">>> Please check whether the training container is correctly implemented") self.logger.info(">>> Experiment {} failed".format(experiment.experiment_name)) return [] # This LSTM network is designed to maximize the metrics # However, if the user wants to minimize the metrics, we can take the negative of the result if experiment.opt_direction == api_pb2.MINIMIZE: result = -result loss, entropy, lr, gn, bl, skip, _ = sess.run( fetches=run_ops, feed_dict={valid_acc: result}) self.logger.info(">>> Suggestion updated. LSTM Controller Reward: {}".format(loss)) candidates = list() for _ in range(experiment.num_trials): candidates.append(sess.run(controller_ops["sample_arc"])) saver.save(sess, experiment.ctrl_cache_file) organized_candidates = list() trials = list() for i in range(experiment.num_trials): arc = candidates[i].tolist() organized_arc = [0 for _ in range(experiment.num_layers)] record = 0 for l in range(experiment.num_layers): organized_arc[l] = arc[record: record + l + 1] record += l + 1 organized_candidates.append(organized_arc) nn_config = dict() nn_config['num_layers'] = experiment.num_layers nn_config['input_sizes'] = experiment.input_sizes nn_config['output_sizes'] = experiment.output_sizes nn_config['embedding'] = dict() for l in range(experiment.num_layers): opt = organized_arc[l][0] nn_config['embedding'][opt] = experiment.search_space[opt].get_dict() organized_arc_json = json.dumps(organized_arc) nn_config_json = json.dumps(nn_config) organized_arc_str = str(organized_arc_json).replace('\"', '\'') nn_config_str = str(nn_config_json).replace('\"', '\'') self.logger.info("\n>>> New Neural Network Architecture Candidate #{} (internal representation):".format(i)) self.logger.info(organized_arc_json) self.logger.info("\n>>> Corresponding Seach Space Description:") self.logger.info(nn_config_str) trials.append(api_pb2.Trial( spec=api_pb2.TrialSpec( experiment_name=request.experiment_name, parameter_assignments=api_pb2.TrialSpec.ParameterAssignments( assignments=[ api_pb2.ParameterAssignment( name="architecture", value=organized_arc_str ), api_pb2.ParameterAssignment( name="nn_config", value=nn_config_str ) ] ) ) )) self.logger.info("") self.logger.info(">>> {} Trials were created for Experiment {}".format(experiment.num_trials, experiment.experiment_name)) self.logger.info("") experiment.ctrl_step += 1 return api_pb2.GetSuggestionsReply(trials=trials)
def GetSuggestions(self, request, context): """ Main function to provide suggestion. """ service_params = self.parseParameters(request.experiment_name) experiment = self._get_experiment(request.experiment_name) X_train, y_train = self.getEvalHistory( request.experiment_name, experiment.spec.objective.objective_metric_name, service_params["burn_in"]) parameter_config = parsing_util.parse_parameter_configs( experiment.spec.parameter_specs.parameters) algo_manager = AlgorithmManager( experiment_name=request.experiment_name, experiment=experiment, parameter_config=parameter_config, X_train=X_train, y_train=y_train, logger=self.logger, ) lowerbound = np.array(algo_manager.lower_bound) upperbound = np.array(algo_manager.upper_bound) self.logger.debug("lowerbound: %r", lowerbound, extra={"experiment_name": request.experiment_name}) self.logger.debug("upperbound: %r", upperbound, extra={"experiment_name": request.experiment_name}) alg = BOAlgorithm( experiment_name=request.experiment_name, dim=algo_manager.dim, N=int(service_params["N"]), lowerbound=lowerbound, upperbound=upperbound, X_train=algo_manager.X_train, y_train=algo_manager.y_train, mode=service_params["mode"], trade_off=service_params["trade_off"], # todo: support length_scale with array type length_scale=service_params["length_scale"], noise=service_params["noise"], nu=service_params["nu"], kernel_type=service_params["kernel_type"], n_estimators=service_params["n_estimators"], max_features=service_params["max_features"], model_type=service_params["model_type"], logger=self.logger, ) self.logger.debug("alg: %r", alg, extra={"experiment_name": request.experiment_name}) trials = [] x_next_list = alg.get_suggestion(request.request_number) self.logger.debug("x_next_list: %r", x_next_list, extra={"experiment_name": request.experiment_name}) for x_next in x_next_list: x_next = x_next.squeeze() self.logger.debug( "xnext: %r ", x_next, extra={"experiment_name": request.experiment_name}) x_next = algo_manager.parse_x_next(x_next) x_next = algo_manager.convert_to_dict(x_next) trials.append( api_pb2.Trial(spec=api_pb2.TrialSpec( experiment_name=request.experiment_name, parameter_assignments=api_pb2.TrialSpec. ParameterAssignments(assignments=[ api_pb2.ParameterAssignment( name=x["name"], value=str(x["value"]), ) for x in x_next ])))) return api_pb2.GetSuggestionsReply(trials=trials)