Ejemplo n.º 1
0
    def GetSuggestions(self, request, context):
        """
        Main function to provide suggestion.
        """
        algorithm_name, config = OptimizerConfiguration.convertAlgorithmSpec(
            request.experiment.spec.algorithm)
        if algorithm_name != "bayesianoptimization":
            raise Exception(
                "Failed to create the algorithm: {}".format(algorithm_name))

        if self.is_first_run:
            search_space = HyperParameterSearchSpace.convert(
                request.experiment)
            self.base_service = BaseSkoptService(
                base_estimator=config.base_estimator,
                n_initial_points=config.n_initial_points,
                acq_func=config.acq_func,
                acq_optimizer=config.acq_optimizer,
                random_state=config.random_state,
                search_space=search_space)
            self.is_first_run = False

        trials = Trial.convert(request.trials)
        new_trials = self.base_service.getSuggestions(trials,
                                                      request.request_number)
        return api_pb2.GetSuggestionsReply(
            parameter_assignments=Assignment.generate(new_trials))
Ejemplo n.º 2
0
    def GetSuggestions(self, request, context):
        """
        Main function to provide suggestion.
        """
        try:
            reply = api_pb2.GetSuggestionsReply()
            experiment = request.experiment
            self.all_trials = request.trials
            alg_settings = experiment.spec.algorithm.algorithm_settings

            param = HyperBandParam.convert(alg_settings)
            if param.current_s < 0:
                # Hyperband outlerloop has finished
                return reply
            # This is a hack to get request number.
            param.n = request.request_number

            trials = self._make_bracket(experiment, param)
            for trial in trials:
                reply.parameter_assignments.add(
                    assignments=trial.parameter_assignments.assignments)
            reply.algorithm.CopyFrom(HyperBandParam.generate(param))
            return reply
        except Exception as e:
            logger.error("Fail to generate trials: \n%s",
                         traceback.format_exc(),
                         extra={"experiment_name": experiment.name})
            raise e
Ejemplo n.º 3
0
    def GetSuggestions(self, request, context):
        """
        Main function to provide suggestion.
        """
        with self.lock:
            if self.study is None:
                self.search_space = HyperParameterSearchSpace.convert(request.experiment)
                self.study = self._create_study(request.experiment.spec.algorithm, self.search_space)

            trials = Trial.convert(request.trials)

            if len(trials) != 0:
                self._tell(trials)
            list_of_assignments = self._ask(request.request_number)

            return api_pb2.GetSuggestionsReply(
                parameter_assignments=Assignment.generate(list_of_assignments)
            )
Ejemplo n.º 4
0
    def GetSuggestions(self, request, context):
        if self.is_first_run:
            nas_config = request.experiment.spec.nas_config
            num_layers = str(nas_config.graph_config.num_layers)

            search_space = get_search_space(nas_config.operations)

            settings_raw = request.experiment.spec.algorithm.algorithm_settings
            algorithm_settings = get_algorithm_settings(settings_raw)

            search_space_json = json.dumps(search_space)
            algorithm_settings_json = json.dumps(algorithm_settings)

            search_space_str = str(search_space_json).replace('\"', '\'')
            algorithm_settings_str = str(algorithm_settings_json).replace(
                '\"', '\'')

            self.is_first_run = False

        parameter_assignments = []
        for i in range(request.current_request_number):

            self.logger.info(">>> Generate new Darts Trial Job")

            self.logger.info(">>> Number of layers {}\n".format(num_layers))

            self.logger.info(">>> Search Space")
            self.logger.info("{}\n".format(search_space_str))

            self.logger.info(">>> Algorithm Settings")
            self.logger.info("{}\n\n".format(algorithm_settings_str))

            parameter_assignments.append(
                api_pb2.GetSuggestionsReply.ParameterAssignments(assignments=[
                    api_pb2.ParameterAssignment(name="algorithm-settings",
                                                value=algorithm_settings_str),
                    api_pb2.ParameterAssignment(name="search-space",
                                                value=search_space_str),
                    api_pb2.ParameterAssignment(name="num-layers",
                                                value=num_layers)
                ]))

        return api_pb2.GetSuggestionsReply(
            parameter_assignments=parameter_assignments)
Ejemplo n.º 5
0
    def GetSuggestions(self, request, context):
        """
        Main function to provide suggestion.
        """

        if self.is_first_run:
            search_space = HyperParameterSearchSpace.convert(
                request.experiment)
            self.base_service = BaseChocolateService(
                algorithm_name=request.experiment.spec.algorithm.
                algorithm_name,
                search_space=search_space)
            self.is_first_run = False

        trials = Trial.convert(request.trials)
        new_assignments = self.base_service.getSuggestions(
            trials, request.request_number)
        return api_pb2.GetSuggestionsReply(
            parameter_assignments=Assignment.generate(new_assignments))
Ejemplo n.º 6
0
    def GetSuggestions(self, request, context):
        """
        Main function to provide suggestion.
        """
        name, config = OptimizerConfiguration.convert_algorithm_spec(
            request.experiment.spec.algorithm)

        if self.is_first_run:
            search_space = HyperParameterSearchSpace.convert(
                request.experiment)
            self.base_service = BaseHyperoptService(algorithm_name=name,
                                                    algorithm_conf=config,
                                                    search_space=search_space)
            self.is_first_run = False

        trials = Trial.convert(request.trials)
        new_assignments = self.base_service.getSuggestions(
            trials, request.current_request_number)
        return api_pb2.GetSuggestionsReply(
            parameter_assignments=Assignment.generate(new_assignments))
Ejemplo n.º 7
0
    def GetSuggestions(self, request, context):
        if self.is_first_run:
            self.experiment = EnasExperiment(request, self.logger)
        experiment = self.experiment
        if request.current_request_number > 0:
            experiment.num_trials = request.current_request_number
        self.logger.info(
            "-" * 100 + "\nSuggestion Step {} for Experiment {}\n".format(
                experiment.suggestion_step, experiment.experiment_name) +
            "-" * 100)

        self.logger.info("")
        self.logger.info(">>> Current Request Number:\t\t{}".format(
            experiment.num_trials))
        self.logger.info("")

        with experiment.tf_graph.as_default():
            saver = tf.compat.v1.train.Saver()
            ctrl = experiment.controller

            controller_ops = {
                "loss": ctrl.loss,
                "entropy": ctrl.sample_entropy,
                "grad_norm": ctrl.grad_norm,
                "baseline": ctrl.baseline,
                "skip_rate": ctrl.skip_rate,
                "train_op": ctrl.train_op,
                "train_step": ctrl.train_step,
                "sample_arc": ctrl.sample_arc,
                "child_val_accuracy": ctrl.child_val_accuracy,
            }

            if self.is_first_run:
                self.logger.info(
                    ">>> First time running suggestion for {}. Random architecture will be given."
                    .format(experiment.experiment_name))
                with tf.compat.v1.Session() as sess:
                    sess.run(tf.compat.v1.global_variables_initializer())
                    candidates = list()
                    for _ in range(experiment.num_trials):
                        candidates.append(
                            sess.run(controller_ops["sample_arc"]))

                    # TODO: will use PVC to store the checkpoint to protect against unexpected suggestion pod restart
                    saver.save(sess, experiment.ctrl_cache_file)

                self.is_first_run = False

            else:
                with tf.compat.v1.Session() as sess:
                    saver.restore(sess, experiment.ctrl_cache_file)

                    result = self.GetEvaluationResult(request.trials)

                    # TODO: (andreyvelich) I deleted this part, should it be handle by controller?
                    # Sometimes training container may fail and GetEvaluationResult() will return None
                    # In this case, the Suggestion will:
                    # 1. Firstly try to respawn the previous trials after waiting for RESPAWN_SLEEP seconds
                    # 2. If respawning the trials for RESPAWN_LIMIT times still cannot collect valid results,
                    #    then fail the task because it may indicate that the training container has errors.
                    if result is None:
                        self.logger.warning(
                            ">>> Suggestion has spawned trials, but they all failed."
                        )
                        self.logger.warning(
                            ">>> Please check whether the training container is correctly implemented"
                        )
                        self.logger.info(">>> Experiment {} failed".format(
                            experiment.experiment_name))
                        return []

                    # This LSTM network is designed to maximize the metrics
                    # However, if the user wants to minimize the metrics, we can take the negative of the result

                    if experiment.opt_direction == api_pb2.MINIMIZE:
                        result = -result

                    self.logger.info(
                        ">>> Suggestion updated. LSTM Controller Training\n")
                    log_every = experiment.algorithm_settings[
                        "controller_log_every_steps"]
                    for ctrl_step in range(
                            1, experiment.
                            algorithm_settings["controller_train_steps"] + 1):
                        run_ops = [
                            controller_ops["loss"], controller_ops["entropy"],
                            controller_ops["grad_norm"],
                            controller_ops["baseline"],
                            controller_ops["skip_rate"],
                            controller_ops["train_op"]
                        ]

                        loss, entropy, grad_norm, baseline, skip_rate, _ = sess.run(
                            fetches=run_ops,
                            feed_dict={
                                controller_ops["child_val_accuracy"]: result
                            })

                        controller_step = sess.run(
                            controller_ops["train_step"])
                        if ctrl_step % log_every == 0:
                            log_string = ""
                            log_string += "Controller Step: {} - ".format(
                                controller_step)
                            log_string += "Loss: {:.4f} - ".format(loss)
                            log_string += "Entropy: {:.9} - ".format(entropy)
                            log_string += "Gradient Norm: {:.7f} - ".format(
                                grad_norm)
                            log_string += "Baseline={:.4f} - ".format(baseline)
                            log_string += "Skip Rate={:.4f}".format(skip_rate)
                            self.logger.info(log_string)

                    candidates = list()
                    for _ in range(experiment.num_trials):
                        candidates.append(
                            sess.run(controller_ops["sample_arc"]))

                    saver.save(sess, experiment.ctrl_cache_file)

        organized_candidates = list()
        parameter_assignments = list()

        for i in range(experiment.num_trials):
            arc = candidates[i].tolist()
            organized_arc = [0 for _ in range(experiment.num_layers)]
            record = 0
            for layer in range(experiment.num_layers):
                organized_arc[layer] = arc[record:record + layer + 1]
                record += layer + 1
            organized_candidates.append(organized_arc)

            nn_config = dict()
            nn_config['num_layers'] = experiment.num_layers
            nn_config['input_sizes'] = experiment.input_sizes
            nn_config['output_sizes'] = experiment.output_sizes
            nn_config['embedding'] = dict()
            for layer in range(experiment.num_layers):
                opt = organized_arc[layer][0]
                nn_config['embedding'][opt] = experiment.search_space[
                    opt].get_dict()

            organized_arc_json = json.dumps(organized_arc)
            nn_config_json = json.dumps(nn_config)

            organized_arc_str = str(organized_arc_json).replace('\"', '\'')
            nn_config_str = str(nn_config_json).replace('\"', '\'')

            self.logger.info(
                "\n>>> New Neural Network Architecture Candidate #{} (internal representation):"
                .format(i))
            self.logger.info(organized_arc_json)
            self.logger.info("\n>>> Corresponding Seach Space Description:")
            self.logger.info(nn_config_str)

            parameter_assignments.append(
                api_pb2.GetSuggestionsReply.ParameterAssignments(assignments=[
                    api_pb2.ParameterAssignment(name="architecture",
                                                value=organized_arc_str),
                    api_pb2.ParameterAssignment(name="nn_config",
                                                value=nn_config_str)
                ]))

        self.logger.info("")
        self.logger.info(">>> {} Trials were created for Experiment {}".format(
            experiment.num_trials, experiment.experiment_name))
        self.logger.info("")

        experiment.suggestion_step += 1

        return api_pb2.GetSuggestionsReply(
            parameter_assignments=parameter_assignments)