def GetSuggestions(self, request, context): if self.is_first_run: nas_config = request.experiment.spec.nas_config num_layers = str(nas_config.graph_config.num_layers) search_space = get_search_space(nas_config.operations) settings_raw = request.experiment.spec.algorithm.algorithm_settings algorithm_settings = get_algorithm_settings(settings_raw) search_space_json = json.dumps(search_space) algorithm_settings_json = json.dumps(algorithm_settings) search_space_str = str(search_space_json).replace('\"', '\'') algorithm_settings_str = str(algorithm_settings_json).replace( '\"', '\'') self.is_first_run = False parameter_assignments = [] for i in range(request.current_request_number): self.logger.info(">>> Generate new Darts Trial Job") self.logger.info(">>> Number of layers {}\n".format(num_layers)) self.logger.info(">>> Search Space") self.logger.info("{}\n".format(search_space_str)) self.logger.info(">>> Algorithm Settings") self.logger.info("{}\n\n".format(algorithm_settings_str)) parameter_assignments.append( api_pb2.GetSuggestionsReply.ParameterAssignments(assignments=[ api_pb2.ParameterAssignment(name="algorithm-settings", value=algorithm_settings_str), api_pb2.ParameterAssignment(name="search-space", value=search_space_str), api_pb2.ParameterAssignment(name="num-layers", value=num_layers) ])) return api_pb2.GetSuggestionsReply( parameter_assignments=parameter_assignments)
def generate(list_of_assignments): res = [] for assignments in list_of_assignments: buf = [] for assignment in assignments: buf.append( api.ParameterAssignment(name=assignment.name, value=str(assignment.value))) rt = api.GetSuggestionsReply.ParameterAssignments( assignments=buf) res.append(rt) return res
def test_get_suggestion(self): trials = [ api_pb2.Trial( name="test-asfjh", spec=api_pb2.TrialSpec(objective=api_pb2.ObjectiveSpec( type=api_pb2.MAXIMIZE, objective_metric_name="metric-2", goal=0.9), parameter_assignments=api_pb2.TrialSpec. ParameterAssignments(assignments=[ api_pb2.ParameterAssignment( name="param-1", value="2", ), api_pb2.ParameterAssignment( name="param-2", value="cat1", ), api_pb2.ParameterAssignment( name="param-3", value="2", ), api_pb2.ParameterAssignment( name="param-4", value="3.44", ) ])), status=api_pb2.TrialStatus(observation=api_pb2.Observation( metrics=[ api_pb2.Metric(name="metric=1", value="435"), api_pb2.Metric(name="metric=2", value="5643"), ]))), api_pb2.Trial( name="test-234hs", spec=api_pb2.TrialSpec(objective=api_pb2.ObjectiveSpec( type=api_pb2.MAXIMIZE, objective_metric_name="metric-2", goal=0.9), parameter_assignments=api_pb2.TrialSpec. ParameterAssignments(assignments=[ api_pb2.ParameterAssignment( name="param-1", value="3", ), api_pb2.ParameterAssignment( name="param-2", value="cat2", ), api_pb2.ParameterAssignment( name="param-3", value="6", ), api_pb2.ParameterAssignment( name="param-4", value="4.44", ) ])), status=api_pb2.TrialStatus(observation=api_pb2.Observation( metrics=[ api_pb2.Metric(name="metric=1", value="123"), api_pb2.Metric(name="metric=2", value="3028"), ]))) ] experiment = api_pb2.Experiment( name="test", spec=api_pb2.ExperimentSpec( algorithm=api_pb2.AlgorithmSpec( algorithm_name="tpe", algorithm_settings=[ api_pb2.AlgorithmSetting(name="random_state", value="10"), api_pb2.AlgorithmSetting(name="gamma", value="0.25"), api_pb2.AlgorithmSetting(name="prior_weight", value="1.0"), api_pb2.AlgorithmSetting(name="n_EI_candidates", value="24"), ], ), objective=api_pb2.ObjectiveSpec(type=api_pb2.MAXIMIZE, goal=0.9), parameter_specs=api_pb2.ExperimentSpec. ParameterSpecs(parameters=[ api_pb2.ParameterSpec( name="param-1", parameter_type=api_pb2.INT, feasible_space=api_pb2.FeasibleSpace( max="5", min="1", list=[]), ), api_pb2.ParameterSpec(name="param-2", parameter_type=api_pb2.CATEGORICAL, feasible_space=api_pb2.FeasibleSpace( max=None, min=None, list=["cat1", "cat2", "cat3"])), api_pb2.ParameterSpec( name="param-3", parameter_type=api_pb2.DISCRETE, feasible_space=api_pb2.FeasibleSpace( max=None, min=None, list=["3", "2", "6"])), api_pb2.ParameterSpec(name="param-4", parameter_type=api_pb2.DOUBLE, feasible_space=api_pb2.FeasibleSpace( max="5", min="1", list=[])) ]))) request = api_pb2.GetSuggestionsRequest( experiment=experiment, trials=trials, request_number=2, ) get_suggestion = self.test_server.invoke_unary_unary( method_descriptor=( api_pb2.DESCRIPTOR.services_by_name['Suggestion']. methods_by_name['GetSuggestions']), invocation_metadata={}, request=request, timeout=1) response, metadata, code, details = get_suggestion.termination() print(response.parameter_assignments) self.assertEqual(code, grpc.StatusCode.OK) self.assertEqual(2, len(response.parameter_assignments))
def GetSuggestions(self, request, context): if self.is_first_run: self.experiment = EnasExperiment(request, self.logger) experiment = self.experiment if request.current_request_number > 0: experiment.num_trials = request.current_request_number self.logger.info( "-" * 100 + "\nSuggestion Step {} for Experiment {}\n".format( experiment.suggestion_step, experiment.experiment_name) + "-" * 100) self.logger.info("") self.logger.info(">>> Current Request Number:\t\t{}".format( experiment.num_trials)) self.logger.info("") with experiment.tf_graph.as_default(): saver = tf.compat.v1.train.Saver() ctrl = experiment.controller controller_ops = { "loss": ctrl.loss, "entropy": ctrl.sample_entropy, "grad_norm": ctrl.grad_norm, "baseline": ctrl.baseline, "skip_rate": ctrl.skip_rate, "train_op": ctrl.train_op, "train_step": ctrl.train_step, "sample_arc": ctrl.sample_arc, "child_val_accuracy": ctrl.child_val_accuracy, } if self.is_first_run: self.logger.info( ">>> First time running suggestion for {}. Random architecture will be given." .format(experiment.experiment_name)) with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) candidates = list() for _ in range(experiment.num_trials): candidates.append( sess.run(controller_ops["sample_arc"])) # TODO: will use PVC to store the checkpoint to protect against unexpected suggestion pod restart saver.save(sess, experiment.ctrl_cache_file) self.is_first_run = False else: with tf.compat.v1.Session() as sess: saver.restore(sess, experiment.ctrl_cache_file) result = self.GetEvaluationResult(request.trials) # TODO: (andreyvelich) I deleted this part, should it be handle by controller? # Sometimes training container may fail and GetEvaluationResult() will return None # In this case, the Suggestion will: # 1. Firstly try to respawn the previous trials after waiting for RESPAWN_SLEEP seconds # 2. If respawning the trials for RESPAWN_LIMIT times still cannot collect valid results, # then fail the task because it may indicate that the training container has errors. if result is None: self.logger.warning( ">>> Suggestion has spawned trials, but they all failed." ) self.logger.warning( ">>> Please check whether the training container is correctly implemented" ) self.logger.info(">>> Experiment {} failed".format( experiment.experiment_name)) return [] # This LSTM network is designed to maximize the metrics # However, if the user wants to minimize the metrics, we can take the negative of the result if experiment.opt_direction == api_pb2.MINIMIZE: result = -result self.logger.info( ">>> Suggestion updated. LSTM Controller Training\n") log_every = experiment.algorithm_settings[ "controller_log_every_steps"] for ctrl_step in range( 1, experiment. algorithm_settings["controller_train_steps"] + 1): run_ops = [ controller_ops["loss"], controller_ops["entropy"], controller_ops["grad_norm"], controller_ops["baseline"], controller_ops["skip_rate"], controller_ops["train_op"] ] loss, entropy, grad_norm, baseline, skip_rate, _ = sess.run( fetches=run_ops, feed_dict={ controller_ops["child_val_accuracy"]: result }) controller_step = sess.run( controller_ops["train_step"]) if ctrl_step % log_every == 0: log_string = "" log_string += "Controller Step: {} - ".format( controller_step) log_string += "Loss: {:.4f} - ".format(loss) log_string += "Entropy: {:.9} - ".format(entropy) log_string += "Gradient Norm: {:.7f} - ".format( grad_norm) log_string += "Baseline={:.4f} - ".format(baseline) log_string += "Skip Rate={:.4f}".format(skip_rate) self.logger.info(log_string) candidates = list() for _ in range(experiment.num_trials): candidates.append( sess.run(controller_ops["sample_arc"])) saver.save(sess, experiment.ctrl_cache_file) organized_candidates = list() parameter_assignments = list() for i in range(experiment.num_trials): arc = candidates[i].tolist() organized_arc = [0 for _ in range(experiment.num_layers)] record = 0 for layer in range(experiment.num_layers): organized_arc[layer] = arc[record:record + layer + 1] record += layer + 1 organized_candidates.append(organized_arc) nn_config = dict() nn_config['num_layers'] = experiment.num_layers nn_config['input_sizes'] = experiment.input_sizes nn_config['output_sizes'] = experiment.output_sizes nn_config['embedding'] = dict() for layer in range(experiment.num_layers): opt = organized_arc[layer][0] nn_config['embedding'][opt] = experiment.search_space[ opt].get_dict() organized_arc_json = json.dumps(organized_arc) nn_config_json = json.dumps(nn_config) organized_arc_str = str(organized_arc_json).replace('\"', '\'') nn_config_str = str(nn_config_json).replace('\"', '\'') self.logger.info( "\n>>> New Neural Network Architecture Candidate #{} (internal representation):" .format(i)) self.logger.info(organized_arc_json) self.logger.info("\n>>> Corresponding Seach Space Description:") self.logger.info(nn_config_str) parameter_assignments.append( api_pb2.GetSuggestionsReply.ParameterAssignments(assignments=[ api_pb2.ParameterAssignment(name="architecture", value=organized_arc_str), api_pb2.ParameterAssignment(name="nn_config", value=nn_config_str) ])) self.logger.info("") self.logger.info(">>> {} Trials were created for Experiment {}".format( experiment.num_trials, experiment.experiment_name)) self.logger.info("") experiment.suggestion_step += 1 return api_pb2.GetSuggestionsReply( parameter_assignments=parameter_assignments)
def test_get_suggestion(self): trials = [ api_pb2.Trial( name="first-trial", spec=api_pb2.TrialSpec( objective=api_pb2.ObjectiveSpec( type=api_pb2.MAXIMIZE, objective_metric_name="Validation-Accuracy", goal=0.99), parameter_assignments=api_pb2.TrialSpec. ParameterAssignments(assignments=[ api_pb2.ParameterAssignment( name="architecture", value="[[3], [0, 1], [0, 0, 1], [2, 1, 0, 0]]", ), api_pb2.ParameterAssignment( name="nn_config", value="{'num_layers': 4}", ), ])), status=api_pb2.TrialStatus( observation=api_pb2.Observation(metrics=[ api_pb2.Metric(name="Validation-Accuracy", value="0.88"), ]), condition=api_pb2.TrialStatus.TrialConditionType.SUCCEEDED, )), api_pb2.Trial( name="second-trial", spec=api_pb2.TrialSpec( objective=api_pb2.ObjectiveSpec( type=api_pb2.MAXIMIZE, objective_metric_name="Validation-Accuracy", goal=0.99), parameter_assignments=api_pb2.TrialSpec. ParameterAssignments(assignments=[ api_pb2.ParameterAssignment( name="architecture", value="[[1], [0, 1], [2, 1, 1], [2, 1, 1, 0]]", ), api_pb2.ParameterAssignment( name="nn_config", value="{'num_layers': 4}", ), ], )), status=api_pb2.TrialStatus( observation=api_pb2.Observation(metrics=[ api_pb2.Metric(name="Validation-Accuracy", value="0.84"), ]), condition=api_pb2.TrialStatus.TrialConditionType.SUCCEEDED, )) ] experiment = api_pb2.Experiment( name="enas-experiment", spec=api_pb2.ExperimentSpec( algorithm=api_pb2.AlgorithmSpec(algorithm_name="enas", ), objective=api_pb2.ObjectiveSpec( type=api_pb2.MAXIMIZE, goal=0.9, objective_metric_name="Validation-Accuracy"), parallel_trial_count=2, max_trial_count=10, nas_config=api_pb2.NasConfig( graph_config=api_pb2.GraphConfig(num_layers=4, input_sizes=[32, 32, 8], output_sizes=[10]), operations=api_pb2.NasConfig.Operations(operation=[ api_pb2.Operation( operation_type="convolution", parameter_specs=api_pb2.Operation. ParameterSpecs(parameters=[ api_pb2.ParameterSpec( name="filter_size", parameter_type=api_pb2.CATEGORICAL, feasible_space=api_pb2.FeasibleSpace( max=None, min=None, list=["5"])), api_pb2.ParameterSpec( name="num_filter", parameter_type=api_pb2.CATEGORICAL, feasible_space=api_pb2.FeasibleSpace( max=None, min=None, list=["128"])), api_pb2.ParameterSpec( name="stride", parameter_type=api_pb2.CATEGORICAL, feasible_space=api_pb2.FeasibleSpace( max=None, min=None, list=["1", "2"])), ])), api_pb2.Operation( operation_type="reduction", parameter_specs=api_pb2.Operation. ParameterSpecs(parameters=[ api_pb2.ParameterSpec( name="reduction_type", parameter_type=api_pb2.CATEGORICAL, feasible_space=api_pb2.FeasibleSpace( max=None, min=None, list=["max_pooling"])), api_pb2.ParameterSpec( name="pool_size", parameter_type=api_pb2.INT, feasible_space=api_pb2.FeasibleSpace( min="2", max="3", step="1", list=[])), ])), ], )))) request = api_pb2.GetSuggestionsRequest( experiment=experiment, trials=trials, request_number=2, ) get_suggestion = self.test_server.invoke_unary_unary( method_descriptor=( api_pb2.DESCRIPTOR.services_by_name['Suggestion']. methods_by_name['GetSuggestions']), invocation_metadata={}, request=request, timeout=100) response, metadata, code, details = get_suggestion.termination() print(response.parameter_assignments) self.assertEqual(code, grpc.StatusCode.OK) self.assertEqual(2, len(response.parameter_assignments))