コード例 #1
0
ファイル: bayesian_service.py プロジェクト: vmolina/katib
 def registerTrials(self, trials):
     channel = grpc.beta.implementations.insecure_channel(
         self.manager_addr, self.manager_port)
     with api_pb2.beta_create_Manager_stub(channel) as client:
         for i, t in enumerate(trials):
             ctrep = client.CreateTrial(api_pb2.CreateTrialRequest(trial=t),
                                        10)
             trials[i].trial_id = ctrep.trial_id
     return trials
コード例 #2
0
    def GetSuggestions(self, request, context):
        if request.study_id != self.current_study_id:
            self.generate_arch(request)

        if self.current_itr == 0:
            self.arch = self.generator.get_init_arch()
        elif self.current_itr <= self.restruct_itr:
            result = self.GetEvaluationResult(request.study_id,
                                              self.prev_trial_id)
            self.arch = self.generator.get_arch(self.arch, result)

        self.logger.info("Architecture at itr={}".format(self.current_itr))
        self.logger.info(self.arch)
        arch_json = json.dumps(self.arch)
        config_json = json.dumps(self.suggestion_config)
        arch = str(arch_json).replace('\"', '\'')
        config = str(config_json).replace('\"', '\'')

        trials = []
        trials.append(
            api_pb2.Trial(
                study_id=request.study_id,
                parameter_set=[
                    api_pb2.Parameter(name="architecture",
                                      value=arch,
                                      parameter_type=api_pb2.CATEGORICAL),
                    api_pb2.Parameter(name="parameters",
                                      value=config,
                                      parameter_type=api_pb2.CATEGORICAL),
                    api_pb2.Parameter(name="current_itr",
                                      value=str(self.current_itr),
                                      parameter_type=api_pb2.CATEGORICAL)
                ],
            ))

        channel = grpc.beta.implementations.insecure_channel(
            self.manager_addr, self.manager_port)
        with api_pb2.beta_create_Manager_stub(channel) as client:
            for i, t in enumerate(trials):
                ctrep = client.CreateTrial(api_pb2.CreateTrialRequest(trial=t),
                                           10)
                trials[i].trial_id = ctrep.trial_id
            self.prev_trial_id = ctrep.trial_id

        self.current_itr += 1

        return api_pb2.GetSuggestionsReply(trials=trials)
コード例 #3
0
    def GetSuggestions(self, request, context):
        if request.study_id not in self.registered_studies:
            self.setup_controller(request)
            self.is_first_run = True
            self.registered_studies.append(request.study_id)

        self.logger.info("-" * 80 +
                         "\nSuggestion Step {} for Study {}\n".format(
                             self.ctrl_step, request.study_id) + "-" * 80)

        with self.tf_graph.as_default():

            saver = tf.train.Saver()
            ctrl = self.controllers

            controller_ops = {
                "train_step": ctrl.train_step,
                "loss": ctrl.loss,
                "train_op": ctrl.train_op,
                "lr": ctrl.lr,
                "grad_norm": ctrl.grad_norm,
                "optimizer": ctrl.optimizer,
                "baseline": ctrl.baseline,
                "entropy": ctrl.sample_entropy,
                "sample_arc": ctrl.sample_arc,
                "skip_rate": ctrl.skip_rate
            }

            run_ops = [
                controller_ops["loss"], controller_ops["entropy"],
                controller_ops["lr"], controller_ops["grad_norm"],
                controller_ops["baseline"], controller_ops["skip_rate"],
                controller_ops["train_op"]
            ]

            if self.is_first_run:
                self.logger.info(
                    "First time running suggestion for {}. Random architecture will be given."
                    .format(request.study_id))
                with tf.Session() as sess:
                    sess.run(tf.global_variables_initializer())
                    arc = sess.run(controller_ops["sample_arc"])
                    # TODO: will use PVC to store the checkpoint to protect against unexpected suggestion pod restart
                    saver.save(sess, self.ctrl_cache_file)

                self.is_first_run = False

            else:
                with tf.Session() as sess:
                    saver.restore(sess, self.ctrl_cache_file)

                    valid_acc = ctrl.reward
                    result = self.GetEvaluationResult(request.study_id)

                    # This lstm cell is designed to maximize the metrics
                    # However, if the user want to minimize the metrics, we can take the negative of the result
                    if self.opt_direction == api_pb2.MINIMIZE:
                        result = -result

                    loss, entropy, lr, gn, bl, skip, _ = sess.run(
                        fetches=run_ops, feed_dict={valid_acc: result})
                    self.logger.info(
                        "Suggetion updated. LSTM Controller Loss: {}".format(
                            loss))
                    arc = sess.run(controller_ops["sample_arc"])

                    saver.save(sess, self.ctrl_cache_file)

        arc = arc.tolist()
        organized_arc = [0 for _ in range(self.num_layers)]
        record = 0
        for l in range(self.num_layers):
            organized_arc[l] = arc[record:record + l + 1]
            record += l + 1

        nn_config = dict()
        nn_config['num_layers'] = self.num_layers
        nn_config['input_size'] = self.input_size
        nn_config['output_size'] = self.output_size
        nn_config['embedding'] = dict()
        for l in range(self.num_layers):
            opt = organized_arc[l][0]
            nn_config['embedding'][opt] = self.search_space[opt].get_dict()

        organized_arc_json = json.dumps(organized_arc)
        nn_config_json = json.dumps(nn_config)

        organized_arc_str = str(organized_arc_json).replace('\"', '\'')
        nn_config_str = str(nn_config_json).replace('\"', '\'')

        self.logger.info(
            "\nNew Neural Network Architecture (internal representation):")
        self.logger.info(organized_arc_json)
        self.logger.info("\nCorresponding Seach Space Description:")
        self.logger.info(nn_config_str)
        self.logger.info("")

        trials = []
        trials.append(
            api_pb2.Trial(
                study_id=request.study_id,
                parameter_set=[
                    api_pb2.Parameter(name="architecture",
                                      value=organized_arc_str,
                                      parameter_type=api_pb2.CATEGORICAL),
                    api_pb2.Parameter(name="nn_config",
                                      value=nn_config_str,
                                      parameter_type=api_pb2.CATEGORICAL)
                ],
            ))

        channel = grpc.beta.implementations.insecure_channel(
            self.manager_addr, self.manager_port)
        with api_pb2.beta_create_Manager_stub(channel) as client:
            for i, t in enumerate(trials):
                ctrep = client.CreateTrial(api_pb2.CreateTrialRequest(trial=t),
                                           10)
                trials[i].trial_id = ctrep.trial_id
            self.logger.info("Trial {} Created\n".format(ctrep.trial_id))
            self.prev_trial_id = ctrep.trial_id

        self.ctrl_step += 1
        return api_pb2.GetSuggestionsReply(trials=trials)
コード例 #4
0
    def GetSuggestions(self, request, context):
        trials = []
        ret = self.stub.GetStudy(
            api_pb2.GetStudyRequest(study_id=request.study_id, ))

        algo_manager = AlgorithmManager(
            study_id=request.study_id,
            study_config=ret.study_config,
            X_train=[],
            y_train=[],
        )
        lowerbound = np.array(algo_manager.lower_bound)
        upperbound = np.array(algo_manager.upper_bound)

        cma = CMAES(
            dim=algo_manager.dim,
            upperbound=upperbound,
            lowerbound=lowerbound,
        )

        param_names = [
            'population', 'path_sigma', 'path_c', 'C', 'sigma', 'mean'
        ]
        param_info = {}
        for p in param_names:
            param_info[p] = dict(id="", value="")

        ret = self.stub.GetSuggestionParameterList(
            api_pb2.GetSuggestionParameterListRequest(
                study_id=request.study_id, ))
        """
        metrics
        [
            {
                "x": [],
                "y": ,
                "penalty":
            }
        ]
        """
        metrics = []

        path_sigma, path_c, C, sigma, mean = cma.init_params()
        for param in ret.suggestion_parameter_sets:
            new_param = []
            for suggestion_param in param.suggestion_parameters:
                param_info[suggestion_param.name]["id"] = param.param_id
                if suggestion_param.name == "path_sigma":
                    path_sigma = np.array(json.loads(suggestion_param.value))
                elif suggestion_param.name == "path_c":
                    path_c = np.array(json.loads(suggestion_param.value))
                elif suggestion_param.name == "C":
                    C = np.array(json.loads(suggestion_param.value))
                elif suggestion_param.name == "sigma":
                    sigma = np.array(json.loads(suggestion_param.value))
                elif suggestion_param.name == "mean":
                    mean = np.array(json.loads(suggestion_param.value))
                elif suggestion_param.name == "population":
                    value = json.loads(suggestion_param.value)
                    if value["y"] == "":
                        ret = self.stub.GetWorkers(
                            api_pb2.GetWorkersRequest(
                                study_id=request.study_id,
                                trial_id=value["trial_id"],
                            ))
                        worker_ids = []
                        for worker in ret.workers:
                            worker_ids.append(worker.worker_id)
                        ret = self.stub.GetMetrics(
                            api_pb2.GetMetricsRequest(
                                study_id=request.study_id,
                                worker_ids=worker_ids,
                            ))

                        objective_value = 0
                        for metrics_log_set in ret.metrics_log_sets:
                            # the algorithm cannot continue without all trials in the population are evaluated
                            if metrics_log_set.worker_status != api_pb2.COMPLETED:
                                context.set_code(
                                    grpc.StatusCode.FAILED_PRECONDITION)
                                context.set_details(
                                    "all trials in the population should be evaluated"
                                )
                                return api_pb2.GetSuggestionsReply(trials=[], )
                            objective_value += float(
                                metrics_log_set.metrics_logs[-1].values[-1])
                        objective_value /= len(ret.metrics_log_sets)
                        value["y"] = objective_value

                        # the algorithm is originally for minimization
                        if algo_manager.goal == api_pb2.MAXIMIZE:
                            y = -float(objective_value)
                        else:
                            y = float(objective_value)
                        metrics.append(
                            dict(
                                x=np.array(json.loads(value["x"])),
                                y=y,
                                penalty=value["penalty"],
                            ))

                    new_param.append(
                        api_pb2.SuggestionParameter(name="population",
                                                    value=json.dumps(value)))

            if len(new_param) > 0:
                ret = self.stub.SetSuggestionParameters(
                    api_pb2.SetSuggestionParametersRequest(
                        study_id=request.study_id,
                        suggestion_algorithm=request.suggestion_algorithm,
                        param_id=param.param_id,
                        suggestion_parameters=new_param,
                    ))

        param_info["path_sigma"]["value"], param_info["path_c"]["value"], param_info["C"]["value"], \
        param_info["sigma"]["value"], param_info["mean"]["value"] = cma.report_metric(
            objective_dict=metrics,
            mean=mean,
            sigma=sigma,
            C=C,
            path_sigma=path_sigma,
            path_c=path_c,
        )
        """
        raw_suggestions:
        [
            {
                "suggestion":[]
                "penalty":
            }
        ]
        """
        raw_suggestions = cma.get_suggestion(
            mean=param_info["mean"]["value"],
            sigma=param_info["sigma"]["value"],
            C=param_info["C"]["value"],
        )

        suggestion_params = []
        for raw_suggestion in raw_suggestions:
            # parse the raw suggestions to desired format
            trial = algo_manager.parse_x_next(raw_suggestion["suggestion"])
            trial = algo_manager.convert_to_dict(trial)
            new_trial = api_pb2.Trial(
                study_id=request.study_id,
                parameter_set=[
                    api_pb2.Parameter(
                        name=x["name"],
                        value=str(x["value"]),
                        parameter_type=x["type"],
                    ) for x in trial
                ],
                status=api_pb2.PENDING,
                objective_value="",
            )
            ret = self.stub.CreateTrial(
                api_pb2.CreateTrialRequest(trial=new_trial))
            new_trial.trial_id = ret.trial_id
            trials.append(new_trial)

            value = dict(
                trial_id=ret.trial_id,
                x=str(raw_suggestion["suggestion"].tolist()),
                y="",
                penalty=raw_suggestion["penalty"],
            )
            suggestion_params.append(
                api_pb2.SuggestionParameter(name="population",
                                            value=json.dumps(value)))

        ret = self.stub.SetSuggestionParameters(
            api_pb2.SetSuggestionParametersRequest(
                study_id=request.study_id,
                param_id=param_info["population"]["id"],
                suggestion_algorithm=request.suggestion_algorithm,
                suggestion_parameters=suggestion_params,
            ))

        for param_name, info in param_info.items():
            if param_name != "population":
                ret = self.stub.SetSuggestionParameters(
                    api_pb2.SetSuggestionParametersRequest(
                        study_id=request.study_id,
                        param_id=info["id"],
                        suggestion_algorithm=request.suggestion_algorithm,
                        suggestion_parameters=[
                            api_pb2.SuggestionParameter(
                                name=param_name,
                                value=str(info["value"].tolist()))
                        ]))

        return api_pb2.GetSuggestionsReply(trials=trials, )
コード例 #5
0
ファイル: nasrl_service.py プロジェクト: garganubhav/katib
    def GetSuggestions(self, request, context):
        if request.study_id not in self.registered_studies:
            self.registered_studies[request.study_id] = NAS_RL_StudyJob(
                request, self.logger)

        study = self.registered_studies[request.study_id]

        self.logger.info(
            "-" * 100 +
            "\nSuggestion Step {} for StudyJob {} (ID: {})\n".format(
                study.ctrl_step, study.study_name, study.study_id) + "-" * 100)

        with study.tf_graph.as_default():

            saver = tf.train.Saver()
            ctrl = study.controller

            controller_ops = {
                "train_step": ctrl.train_step,
                "loss": ctrl.loss,
                "train_op": ctrl.train_op,
                "lr": ctrl.lr,
                "grad_norm": ctrl.grad_norm,
                "optimizer": ctrl.optimizer,
                "baseline": ctrl.baseline,
                "entropy": ctrl.sample_entropy,
                "sample_arc": ctrl.sample_arc,
                "skip_rate": ctrl.skip_rate
            }

            run_ops = [
                controller_ops["loss"], controller_ops["entropy"],
                controller_ops["lr"], controller_ops["grad_norm"],
                controller_ops["baseline"], controller_ops["skip_rate"],
                controller_ops["train_op"]
            ]

            if study.is_first_run:
                self.logger.info(
                    ">>> First time running suggestion for {}. Random architecture will be given."
                    .format(study.study_name))
                with tf.Session() as sess:
                    sess.run(tf.global_variables_initializer())
                    candidates = list()
                    for _ in range(study.num_trials):
                        candidates.append(
                            sess.run(controller_ops["sample_arc"]))

                    # TODO: will use PVC to store the checkpoint to protect against unexpected suggestion pod restart
                    saver.save(sess, study.ctrl_cache_file)

                study.is_first_run = False

            else:
                with tf.Session() as sess:
                    saver.restore(sess, study.ctrl_cache_file)

                    valid_acc = ctrl.reward
                    result = self.GetEvaluationResult(study)

                    # In some rare cases, GetEvaluationResult() may return None
                    # if GetSuggestions() is called before all the trials are completed
                    while result is None:
                        self.logger.warning(
                            ">>> GetEvaluationResult() returns None")
                        time.sleep(20)
                        result = self.GetEvaluationResult(study)

                    # This LSTM network is designed to maximize the metrics
                    # However, if the user wants to minimize the metrics, we can take the negative of the result
                    if study.opt_direction == api_pb2.MINIMIZE:
                        result = -result

                    loss, entropy, lr, gn, bl, skip, _ = sess.run(
                        fetches=run_ops, feed_dict={valid_acc: result})
                    self.logger.info(
                        ">>> Suggetion updated. LSTM Controller Reward: {}".
                        format(loss))

                    candidates = list()
                    for _ in range(study.num_trials):
                        candidates.append(
                            sess.run(controller_ops["sample_arc"]))

                    saver.save(sess, study.ctrl_cache_file)

        organized_candidates = list()
        trials = list()

        for i in range(study.num_trials):
            arc = candidates[i].tolist()
            organized_arc = [0 for _ in range(study.num_layers)]
            record = 0
            for l in range(study.num_layers):
                organized_arc[l] = arc[record:record + l + 1]
                record += l + 1
            organized_candidates.append(organized_arc)

            nn_config = dict()
            nn_config['num_layers'] = study.num_layers
            nn_config['input_size'] = study.input_size
            nn_config['output_size'] = study.output_size
            nn_config['embedding'] = dict()
            for l in range(study.num_layers):
                opt = organized_arc[l][0]
                nn_config['embedding'][opt] = study.search_space[opt].get_dict(
                )

            organized_arc_json = json.dumps(organized_arc)
            nn_config_json = json.dumps(nn_config)

            organized_arc_str = str(organized_arc_json).replace('\"', '\'')
            nn_config_str = str(nn_config_json).replace('\"', '\'')

            self.logger.info(
                "\n>>> New Neural Network Architecture Candidate #{} (internal representation):"
                .format(i))
            self.logger.info(organized_arc_json)
            self.logger.info("\n>>> Corresponding Seach Space Description:")
            self.logger.info(nn_config_str)

            trials.append(
                api_pb2.Trial(
                    study_id=request.study_id,
                    parameter_set=[
                        api_pb2.Parameter(name="architecture",
                                          value=organized_arc_str,
                                          parameter_type=api_pb2.CATEGORICAL),
                        api_pb2.Parameter(name="nn_config",
                                          value=nn_config_str,
                                          parameter_type=api_pb2.CATEGORICAL)
                    ],
                ))

        self.prev_trial_ids = list()
        self.logger.info("")
        channel = grpc.beta.implementations.insecure_channel(
            MANAGER_ADDRESS, MANAGER_PORT)
        with api_pb2.beta_create_Manager_stub(channel) as client:
            for i, t in enumerate(trials):
                ctrep = client.CreateTrial(api_pb2.CreateTrialRequest(trial=t),
                                           10)
                trials[i].trial_id = ctrep.trial_id
                self.prev_trial_ids.append(ctrep.trial_id)

        self.logger.info(">>> {} Trials were created:".format(
            study.num_trials))
        for t in self.prev_trial_ids:
            self.logger.info(t)
        self.logger.info("")

        study.ctrl_step += 1

        return api_pb2.GetSuggestionsReply(trials=trials)