def registerTrials(self, trials): channel = grpc.beta.implementations.insecure_channel( self.manager_addr, self.manager_port) with api_pb2.beta_create_Manager_stub(channel) as client: for i, t in enumerate(trials): ctrep = client.CreateTrial(api_pb2.CreateTrialRequest(trial=t), 10) trials[i].trial_id = ctrep.trial_id return trials
def GetSuggestions(self, request, context): if request.study_id != self.current_study_id: self.generate_arch(request) if self.current_itr == 0: self.arch = self.generator.get_init_arch() elif self.current_itr <= self.restruct_itr: result = self.GetEvaluationResult(request.study_id, self.prev_trial_id) self.arch = self.generator.get_arch(self.arch, result) self.logger.info("Architecture at itr={}".format(self.current_itr)) self.logger.info(self.arch) arch_json = json.dumps(self.arch) config_json = json.dumps(self.suggestion_config) arch = str(arch_json).replace('\"', '\'') config = str(config_json).replace('\"', '\'') trials = [] trials.append( api_pb2.Trial( study_id=request.study_id, parameter_set=[ api_pb2.Parameter(name="architecture", value=arch, parameter_type=api_pb2.CATEGORICAL), api_pb2.Parameter(name="parameters", value=config, parameter_type=api_pb2.CATEGORICAL), api_pb2.Parameter(name="current_itr", value=str(self.current_itr), parameter_type=api_pb2.CATEGORICAL) ], )) channel = grpc.beta.implementations.insecure_channel( self.manager_addr, self.manager_port) with api_pb2.beta_create_Manager_stub(channel) as client: for i, t in enumerate(trials): ctrep = client.CreateTrial(api_pb2.CreateTrialRequest(trial=t), 10) trials[i].trial_id = ctrep.trial_id self.prev_trial_id = ctrep.trial_id self.current_itr += 1 return api_pb2.GetSuggestionsReply(trials=trials)
def GetSuggestions(self, request, context): if request.study_id not in self.registered_studies: self.setup_controller(request) self.is_first_run = True self.registered_studies.append(request.study_id) self.logger.info("-" * 80 + "\nSuggestion Step {} for Study {}\n".format( self.ctrl_step, request.study_id) + "-" * 80) with self.tf_graph.as_default(): saver = tf.train.Saver() ctrl = self.controllers controller_ops = { "train_step": ctrl.train_step, "loss": ctrl.loss, "train_op": ctrl.train_op, "lr": ctrl.lr, "grad_norm": ctrl.grad_norm, "optimizer": ctrl.optimizer, "baseline": ctrl.baseline, "entropy": ctrl.sample_entropy, "sample_arc": ctrl.sample_arc, "skip_rate": ctrl.skip_rate } run_ops = [ controller_ops["loss"], controller_ops["entropy"], controller_ops["lr"], controller_ops["grad_norm"], controller_ops["baseline"], controller_ops["skip_rate"], controller_ops["train_op"] ] if self.is_first_run: self.logger.info( "First time running suggestion for {}. Random architecture will be given." .format(request.study_id)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) arc = sess.run(controller_ops["sample_arc"]) # TODO: will use PVC to store the checkpoint to protect against unexpected suggestion pod restart saver.save(sess, self.ctrl_cache_file) self.is_first_run = False else: with tf.Session() as sess: saver.restore(sess, self.ctrl_cache_file) valid_acc = ctrl.reward result = self.GetEvaluationResult(request.study_id) # This lstm cell is designed to maximize the metrics # However, if the user want to minimize the metrics, we can take the negative of the result if self.opt_direction == api_pb2.MINIMIZE: result = -result loss, entropy, lr, gn, bl, skip, _ = sess.run( fetches=run_ops, feed_dict={valid_acc: result}) self.logger.info( "Suggetion updated. LSTM Controller Loss: {}".format( loss)) arc = sess.run(controller_ops["sample_arc"]) saver.save(sess, self.ctrl_cache_file) arc = arc.tolist() organized_arc = [0 for _ in range(self.num_layers)] record = 0 for l in range(self.num_layers): organized_arc[l] = arc[record:record + l + 1] record += l + 1 nn_config = dict() nn_config['num_layers'] = self.num_layers nn_config['input_size'] = self.input_size nn_config['output_size'] = self.output_size nn_config['embedding'] = dict() for l in range(self.num_layers): opt = organized_arc[l][0] nn_config['embedding'][opt] = self.search_space[opt].get_dict() organized_arc_json = json.dumps(organized_arc) nn_config_json = json.dumps(nn_config) organized_arc_str = str(organized_arc_json).replace('\"', '\'') nn_config_str = str(nn_config_json).replace('\"', '\'') self.logger.info( "\nNew Neural Network Architecture (internal representation):") self.logger.info(organized_arc_json) self.logger.info("\nCorresponding Seach Space Description:") self.logger.info(nn_config_str) self.logger.info("") trials = [] trials.append( api_pb2.Trial( study_id=request.study_id, parameter_set=[ api_pb2.Parameter(name="architecture", value=organized_arc_str, parameter_type=api_pb2.CATEGORICAL), api_pb2.Parameter(name="nn_config", value=nn_config_str, parameter_type=api_pb2.CATEGORICAL) ], )) channel = grpc.beta.implementations.insecure_channel( self.manager_addr, self.manager_port) with api_pb2.beta_create_Manager_stub(channel) as client: for i, t in enumerate(trials): ctrep = client.CreateTrial(api_pb2.CreateTrialRequest(trial=t), 10) trials[i].trial_id = ctrep.trial_id self.logger.info("Trial {} Created\n".format(ctrep.trial_id)) self.prev_trial_id = ctrep.trial_id self.ctrl_step += 1 return api_pb2.GetSuggestionsReply(trials=trials)
def GetSuggestions(self, request, context): trials = [] ret = self.stub.GetStudy( api_pb2.GetStudyRequest(study_id=request.study_id, )) algo_manager = AlgorithmManager( study_id=request.study_id, study_config=ret.study_config, X_train=[], y_train=[], ) lowerbound = np.array(algo_manager.lower_bound) upperbound = np.array(algo_manager.upper_bound) cma = CMAES( dim=algo_manager.dim, upperbound=upperbound, lowerbound=lowerbound, ) param_names = [ 'population', 'path_sigma', 'path_c', 'C', 'sigma', 'mean' ] param_info = {} for p in param_names: param_info[p] = dict(id="", value="") ret = self.stub.GetSuggestionParameterList( api_pb2.GetSuggestionParameterListRequest( study_id=request.study_id, )) """ metrics [ { "x": [], "y": , "penalty": } ] """ metrics = [] path_sigma, path_c, C, sigma, mean = cma.init_params() for param in ret.suggestion_parameter_sets: new_param = [] for suggestion_param in param.suggestion_parameters: param_info[suggestion_param.name]["id"] = param.param_id if suggestion_param.name == "path_sigma": path_sigma = np.array(json.loads(suggestion_param.value)) elif suggestion_param.name == "path_c": path_c = np.array(json.loads(suggestion_param.value)) elif suggestion_param.name == "C": C = np.array(json.loads(suggestion_param.value)) elif suggestion_param.name == "sigma": sigma = np.array(json.loads(suggestion_param.value)) elif suggestion_param.name == "mean": mean = np.array(json.loads(suggestion_param.value)) elif suggestion_param.name == "population": value = json.loads(suggestion_param.value) if value["y"] == "": ret = self.stub.GetWorkers( api_pb2.GetWorkersRequest( study_id=request.study_id, trial_id=value["trial_id"], )) worker_ids = [] for worker in ret.workers: worker_ids.append(worker.worker_id) ret = self.stub.GetMetrics( api_pb2.GetMetricsRequest( study_id=request.study_id, worker_ids=worker_ids, )) objective_value = 0 for metrics_log_set in ret.metrics_log_sets: # the algorithm cannot continue without all trials in the population are evaluated if metrics_log_set.worker_status != api_pb2.COMPLETED: context.set_code( grpc.StatusCode.FAILED_PRECONDITION) context.set_details( "all trials in the population should be evaluated" ) return api_pb2.GetSuggestionsReply(trials=[], ) objective_value += float( metrics_log_set.metrics_logs[-1].values[-1]) objective_value /= len(ret.metrics_log_sets) value["y"] = objective_value # the algorithm is originally for minimization if algo_manager.goal == api_pb2.MAXIMIZE: y = -float(objective_value) else: y = float(objective_value) metrics.append( dict( x=np.array(json.loads(value["x"])), y=y, penalty=value["penalty"], )) new_param.append( api_pb2.SuggestionParameter(name="population", value=json.dumps(value))) if len(new_param) > 0: ret = self.stub.SetSuggestionParameters( api_pb2.SetSuggestionParametersRequest( study_id=request.study_id, suggestion_algorithm=request.suggestion_algorithm, param_id=param.param_id, suggestion_parameters=new_param, )) param_info["path_sigma"]["value"], param_info["path_c"]["value"], param_info["C"]["value"], \ param_info["sigma"]["value"], param_info["mean"]["value"] = cma.report_metric( objective_dict=metrics, mean=mean, sigma=sigma, C=C, path_sigma=path_sigma, path_c=path_c, ) """ raw_suggestions: [ { "suggestion":[] "penalty": } ] """ raw_suggestions = cma.get_suggestion( mean=param_info["mean"]["value"], sigma=param_info["sigma"]["value"], C=param_info["C"]["value"], ) suggestion_params = [] for raw_suggestion in raw_suggestions: # parse the raw suggestions to desired format trial = algo_manager.parse_x_next(raw_suggestion["suggestion"]) trial = algo_manager.convert_to_dict(trial) new_trial = api_pb2.Trial( study_id=request.study_id, parameter_set=[ api_pb2.Parameter( name=x["name"], value=str(x["value"]), parameter_type=x["type"], ) for x in trial ], status=api_pb2.PENDING, objective_value="", ) ret = self.stub.CreateTrial( api_pb2.CreateTrialRequest(trial=new_trial)) new_trial.trial_id = ret.trial_id trials.append(new_trial) value = dict( trial_id=ret.trial_id, x=str(raw_suggestion["suggestion"].tolist()), y="", penalty=raw_suggestion["penalty"], ) suggestion_params.append( api_pb2.SuggestionParameter(name="population", value=json.dumps(value))) ret = self.stub.SetSuggestionParameters( api_pb2.SetSuggestionParametersRequest( study_id=request.study_id, param_id=param_info["population"]["id"], suggestion_algorithm=request.suggestion_algorithm, suggestion_parameters=suggestion_params, )) for param_name, info in param_info.items(): if param_name != "population": ret = self.stub.SetSuggestionParameters( api_pb2.SetSuggestionParametersRequest( study_id=request.study_id, param_id=info["id"], suggestion_algorithm=request.suggestion_algorithm, suggestion_parameters=[ api_pb2.SuggestionParameter( name=param_name, value=str(info["value"].tolist())) ])) return api_pb2.GetSuggestionsReply(trials=trials, )
def GetSuggestions(self, request, context): if request.study_id not in self.registered_studies: self.registered_studies[request.study_id] = NAS_RL_StudyJob( request, self.logger) study = self.registered_studies[request.study_id] self.logger.info( "-" * 100 + "\nSuggestion Step {} for StudyJob {} (ID: {})\n".format( study.ctrl_step, study.study_name, study.study_id) + "-" * 100) with study.tf_graph.as_default(): saver = tf.train.Saver() ctrl = study.controller controller_ops = { "train_step": ctrl.train_step, "loss": ctrl.loss, "train_op": ctrl.train_op, "lr": ctrl.lr, "grad_norm": ctrl.grad_norm, "optimizer": ctrl.optimizer, "baseline": ctrl.baseline, "entropy": ctrl.sample_entropy, "sample_arc": ctrl.sample_arc, "skip_rate": ctrl.skip_rate } run_ops = [ controller_ops["loss"], controller_ops["entropy"], controller_ops["lr"], controller_ops["grad_norm"], controller_ops["baseline"], controller_ops["skip_rate"], controller_ops["train_op"] ] if study.is_first_run: self.logger.info( ">>> First time running suggestion for {}. Random architecture will be given." .format(study.study_name)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) candidates = list() for _ in range(study.num_trials): candidates.append( sess.run(controller_ops["sample_arc"])) # TODO: will use PVC to store the checkpoint to protect against unexpected suggestion pod restart saver.save(sess, study.ctrl_cache_file) study.is_first_run = False else: with tf.Session() as sess: saver.restore(sess, study.ctrl_cache_file) valid_acc = ctrl.reward result = self.GetEvaluationResult(study) # In some rare cases, GetEvaluationResult() may return None # if GetSuggestions() is called before all the trials are completed while result is None: self.logger.warning( ">>> GetEvaluationResult() returns None") time.sleep(20) result = self.GetEvaluationResult(study) # This LSTM network is designed to maximize the metrics # However, if the user wants to minimize the metrics, we can take the negative of the result if study.opt_direction == api_pb2.MINIMIZE: result = -result loss, entropy, lr, gn, bl, skip, _ = sess.run( fetches=run_ops, feed_dict={valid_acc: result}) self.logger.info( ">>> Suggetion updated. LSTM Controller Reward: {}". format(loss)) candidates = list() for _ in range(study.num_trials): candidates.append( sess.run(controller_ops["sample_arc"])) saver.save(sess, study.ctrl_cache_file) organized_candidates = list() trials = list() for i in range(study.num_trials): arc = candidates[i].tolist() organized_arc = [0 for _ in range(study.num_layers)] record = 0 for l in range(study.num_layers): organized_arc[l] = arc[record:record + l + 1] record += l + 1 organized_candidates.append(organized_arc) nn_config = dict() nn_config['num_layers'] = study.num_layers nn_config['input_size'] = study.input_size nn_config['output_size'] = study.output_size nn_config['embedding'] = dict() for l in range(study.num_layers): opt = organized_arc[l][0] nn_config['embedding'][opt] = study.search_space[opt].get_dict( ) organized_arc_json = json.dumps(organized_arc) nn_config_json = json.dumps(nn_config) organized_arc_str = str(organized_arc_json).replace('\"', '\'') nn_config_str = str(nn_config_json).replace('\"', '\'') self.logger.info( "\n>>> New Neural Network Architecture Candidate #{} (internal representation):" .format(i)) self.logger.info(organized_arc_json) self.logger.info("\n>>> Corresponding Seach Space Description:") self.logger.info(nn_config_str) trials.append( api_pb2.Trial( study_id=request.study_id, parameter_set=[ api_pb2.Parameter(name="architecture", value=organized_arc_str, parameter_type=api_pb2.CATEGORICAL), api_pb2.Parameter(name="nn_config", value=nn_config_str, parameter_type=api_pb2.CATEGORICAL) ], )) self.prev_trial_ids = list() self.logger.info("") channel = grpc.beta.implementations.insecure_channel( MANAGER_ADDRESS, MANAGER_PORT) with api_pb2.beta_create_Manager_stub(channel) as client: for i, t in enumerate(trials): ctrep = client.CreateTrial(api_pb2.CreateTrialRequest(trial=t), 10) trials[i].trial_id = ctrep.trial_id self.prev_trial_ids.append(ctrep.trial_id) self.logger.info(">>> {} Trials were created:".format( study.num_trials)) for t in self.prev_trial_ids: self.logger.info(t) self.logger.info("") study.ctrl_step += 1 return api_pb2.GetSuggestionsReply(trials=trials)