Exemple #1
0
    def getEvalHistory(self, studyID, obj_name, burn_in):
        worker_hist = []
        x_train = []
        y_train = []
        channel = grpc.beta.implementations.insecure_channel(
            self.manager_addr, self.manager_port)
        with api_pb2.beta_create_Manager_stub(channel) as client:
            gwfrep = client.GetWorkerFullInfo(
                api_pb2.GetWorkerFullInfoRequest(study_id=studyID,
                                                 only_latest_log=True), 10)
            worker_hist = gwfrep.worker_full_infos
        #self.logger.debug("Eval Trials Log: %r", worker_hist, extra={"StudyID": studyID})
        for w in worker_hist:
            if w.Worker.status == api_pb2.COMPLETED:
                for ml in w.metrics_logs:
                    if ml.name == obj_name:
                        y_train.append(float(ml.values[-1].value))
                        x_train.append(w.parameter_set)
                        break
        self.logger.info("%d completed trials are found.",
                         len(x_train),
                         extra={"StudyID": studyID})
        if len(x_train) <= burn_in:
            x_train = []
            y_train = []
            self.logger.info(
                "Trials will be sampled until %d trials for burn-in are completed.",
                burn_in,
                extra={"StudyID": studyID})
        else:
            self.logger.debug("Completed trials: %r",
                              x_train,
                              extra={"StudyID": studyID})

        return x_train, y_train
Exemple #2
0
    def _get_study_param(self):
        # this function need to
        # 1) get the number of layers
        # 2) get the I/O size
        # 3) get the available operations
        # 4) get the optimization direction (i.e. minimize or maximize)
        # 5) get the objective name
        # 6) get the study name

        channel = grpc.beta.implementations.insecure_channel(
            MANAGER_ADDRESS, MANAGER_PORT)
        with api_pb2.beta_create_Manager_stub(channel) as client:
            api_study_param = client.GetStudy(
                api_pb2.GetStudyRequest(study_id=self.study_id), 10)

        self.study_name = api_study_param.study_config.name
        self.opt_direction = api_study_param.study_config.optimization_type
        self.objective_name = api_study_param.study_config.objective_value_name

        all_params = api_study_param.study_config.nas_config

        graph_config = all_params.graph_config
        self.num_layers = int(graph_config.num_layers)
        self.input_size = list(map(int, graph_config.input_size))
        self.output_size = list(map(int, graph_config.output_size))

        search_space_raw = all_params.operations
        search_space_object = SearchSpace(search_space_raw)
        self.search_space = search_space_object.search_space
        self.num_operations = search_space_object.num_operations

        self.print_search_space()
Exemple #3
0
    def GetEvaluationResult(self, study):
        channel = grpc.beta.implementations.insecure_channel(
            MANAGER_ADDRESS, MANAGER_PORT)
        with api_pb2.beta_create_Manager_stub(channel) as client:
            gwfrep = client.GetWorkerFullInfo(
                api_pb2.GetWorkerFullInfoRequest(study_id=study.study_id,
                                                 only_latest_log=True), 10)
            trials_list = gwfrep.worker_full_infos

        completed_trials = dict()
        for t in trials_list:
            if t.Worker.trial_id in self.prev_trial_ids and t.Worker.status == api_pb2.COMPLETED:
                for ml in t.metrics_logs:
                    if ml.name == study.objective_name:
                        completed_trials[t.Worker.trial_id] = float(
                            ml.values[-1].value)

        if len(completed_trials) == study.num_trials:
            self.logger.info(">>> Evaluation results of previous trials:")
            for k in completed_trials:
                self.logger.info("{}: {}".format(k, completed_trials[k]))
            avg_metrics = sum(completed_trials.values()) / study.num_trials
            self.logger.info("The average is {}\n".format(avg_metrics))

            return avg_metrics
Exemple #4
0
 def getStudyConfig(self, studyID):
     channel = grpc.beta.implementations.insecure_channel(
         self.manager_addr, self.manager_port)
     with api_pb2.beta_create_Manager_stub(channel) as client:
         gsrep = client.GetStudy(api_pb2.GetStudyRequest(study_id=studyID),
                                 10)
         return gsrep.study_config
Exemple #5
0
 def registerTrials(self, trials):
     channel = grpc.beta.implementations.insecure_channel(
         self.manager_addr, self.manager_port)
     with api_pb2.beta_create_Manager_stub(channel) as client:
         for i, t in enumerate(trials):
             ctrep = client.CreateTrial(api_pb2.CreateTrialRequest(trial=t),
                                        10)
             trials[i].trial_id = ctrep.trial_id
     return trials
Exemple #6
0
    def _get_suggestion_param(self):
        channel = grpc.beta.implementations.insecure_channel(
            MANAGER_ADDRESS, MANAGER_PORT)
        with api_pb2.beta_create_Manager_stub(channel) as client:
            api_suggestion_param = client.GetSuggestionParameters(
                api_pb2.GetSuggestionParametersRequest(param_id=self.param_id),
                10)

        params_raw = api_suggestion_param.suggestion_parameters
        self.suggestion_config = parseSuggestionParam(params_raw)

        self.print_suggestion_params()
 def GetEvaluationResult(self, studyID, trialID):
     worker_list = []
     channel = grpc.beta.implementations.insecure_channel(
         self.manager_addr, self.manager_port)
     with api_pb2.beta_create_Manager_stub(channel) as client:
         gwfrep = client.GetWorkerFullInfo(
             api_pb2.GetWorkerFullInfoRequest(study_id=studyID,
                                              trial_id=trialID,
                                              only_latest_log=False), 10)
         worker_list = gwfrep.worker_full_infos
     for w in worker_list:
         if w.Worker.status == api_pb2.COMPLETED:
             for ml in w.metrics_logs:
                 if ml.name == self.objective_name:
                     samples = self.get_featuremap_statistics(ml)
                     return samples
    def GetSuggestions(self, request, context):
        if request.study_id != self.current_study_id:
            self.generate_arch(request)

        if self.current_itr == 0:
            self.arch = self.generator.get_init_arch()
        elif self.current_itr <= self.restruct_itr:
            result = self.GetEvaluationResult(request.study_id,
                                              self.prev_trial_id)
            self.arch = self.generator.get_arch(self.arch, result)

        self.logger.info("Architecture at itr={}".format(self.current_itr))
        self.logger.info(self.arch)
        arch_json = json.dumps(self.arch)
        config_json = json.dumps(self.suggestion_config)
        arch = str(arch_json).replace('\"', '\'')
        config = str(config_json).replace('\"', '\'')

        trials = []
        trials.append(
            api_pb2.Trial(
                study_id=request.study_id,
                parameter_set=[
                    api_pb2.Parameter(name="architecture",
                                      value=arch,
                                      parameter_type=api_pb2.CATEGORICAL),
                    api_pb2.Parameter(name="parameters",
                                      value=config,
                                      parameter_type=api_pb2.CATEGORICAL),
                    api_pb2.Parameter(name="current_itr",
                                      value=str(self.current_itr),
                                      parameter_type=api_pb2.CATEGORICAL)
                ],
            ))

        channel = grpc.beta.implementations.insecure_channel(
            self.manager_addr, self.manager_port)
        with api_pb2.beta_create_Manager_stub(channel) as client:
            for i, t in enumerate(trials):
                ctrep = client.CreateTrial(api_pb2.CreateTrialRequest(trial=t),
                                           10)
                trials[i].trial_id = ctrep.trial_id
            self.prev_trial_id = ctrep.trial_id

        self.current_itr += 1

        return api_pb2.GetSuggestionsReply(trials=trials)
    def _get_suggestion_param(self, paramID):
        channel = grpc.beta.implementations.insecure_channel(
            self.manager_addr, self.manager_port)
        with api_pb2.beta_create_Manager_stub(channel) as client:
            gsprep = client.GetSuggestionParameters(
                api_pb2.GetSuggestionParametersRequest(param_id=paramID), 10)

            params_raw = gsprep.suggestion_parameters
            suggestion_params = parseSuggestionParam(params_raw)
            self.suggestion_config = suggestion_params
            self.suggestion_config.update({"input_size": self.input_size[0]})
            self.suggestion_config.update({"output_size": self.output_size[0]})
            self.search_space.update({
                "max_layers_per_stage":
                self.suggestion_config["max_layers_per_stage"]
            })
            self.logger.info("Suggestion Config: {}".format(
                self.suggestion_config))
Exemple #10
0
    def GetEvaluationResult(self, studyID):
        worker_list = []
        channel = grpc.beta.implementations.insecure_channel(
            self.manager_addr, self.manager_port)
        with api_pb2.beta_create_Manager_stub(channel) as client:
            gwfrep = client.GetWorkerFullInfo(
                api_pb2.GetWorkerFullInfoRequest(study_id=studyID,
                                                 trial_id=self.prev_trial_id,
                                                 only_latest_log=True), 10)
            worker_list = gwfrep.worker_full_infos

        for w in worker_list:
            if w.Worker.status == api_pb2.COMPLETED:
                for ml in w.metrics_logs:
                    if ml.name == self.objective_name:
                        self.logger.info(
                            "Evaluation result of previous candidate: {}".
                            format(ml.values[-1].value))
                        return float(ml.values[-1].value)
    def _get_search_space(self, studyID):

        channel = grpc.beta.implementations.insecure_channel(
            self.manager_addr, self.manager_port)
        with api_pb2.beta_create_Manager_stub(channel) as client:
            gsrep = client.GetStudy(api_pb2.GetStudyRequest(study_id=studyID),
                                    10)

        self.objective_name = gsrep.study_config.objective_value_name
        all_params = gsrep.study_config.nas_config
        graph_config = all_params.graph_config
        search_space_raw = all_params.operations

        self.stages = int(graph_config.num_layers)
        self.input_size = list(map(int, graph_config.input_size))
        self.output_size = list(map(int, graph_config.output_size))
        search_space_object = SearchSpace(search_space_raw)
        self.search_space = search_space_object.search_space
        self.search_space.update({"stages": self.stages})
        self.logger.info("Search Space: {}".format(self.search_space))
Exemple #12
0
    def _get_suggestion_param(self, paramID, studyID):
        channel = grpc.beta.implementations.insecure_channel(
            self.manager_addr, self.manager_port)
        with api_pb2.beta_create_Manager_stub(channel) as client:
            gsprep = client.GetSuggestionParameters(
                api_pb2.GetSuggestionParametersRequest(param_id=paramID), 10)

        params_raw = gsprep.suggestion_parameters

        suggestion_params = parseSuggestionParam(params_raw)

        self.logger.info(
            "Parameters of LSTM Controller for Study {}:".format(studyID))
        for spec in suggestion_params:
            if len(spec) > 13:
                self.logger.info("{}: \t{}".format(spec,
                                                   suggestion_params[spec]))
            else:
                self.logger.info("{}: \t\t{}".format(spec,
                                                     suggestion_params[spec]))

        self.suggestion_config = suggestion_params
Exemple #13
0
    def _get_search_space(self, studyID):

        # this function need to
        # 1) get the number of layers
        # 2) get the I/O size
        # 3) get the available operations
        # 4) get the optimization direction (i.e. minimize or maximize)
        # 5) get the objective name

        channel = grpc.beta.implementations.insecure_channel(
            self.manager_addr, self.manager_port)
        with api_pb2.beta_create_Manager_stub(channel) as client:
            gsrep = client.GetStudy(api_pb2.GetStudyRequest(study_id=studyID),
                                    10)

        self.opt_direction = gsrep.study_config.optimization_type
        self.objective_name = gsrep.study_config.objective_value_name

        all_params = gsrep.study_config.nas_config
        graph_config = all_params.graph_config
        search_space_raw = all_params.operations

        self.num_layers = int(graph_config.num_layers)
        self.input_size = list(map(int, graph_config.input_size))
        self.output_size = list(map(int, graph_config.output_size))
        search_space_object = SearchSpace(search_space_raw)

        self.logger.info("Search Space for Study {}:".format(studyID))

        self.search_space = search_space_object.search_space
        for opt in self.search_space:
            opt.print_op(self.logger)

        self.num_operations = search_space_object.num_operations
        self.logger.info("There are {} operations in total.\n".format(
            self.num_operations))
Exemple #14
0
    def GetSuggestions(self, request, context):
        if request.study_id not in self.registered_studies:
            self.setup_controller(request)
            self.is_first_run = True
            self.registered_studies.append(request.study_id)

        self.logger.info("-" * 80 +
                         "\nSuggestion Step {} for Study {}\n".format(
                             self.ctrl_step, request.study_id) + "-" * 80)

        with self.tf_graph.as_default():

            saver = tf.train.Saver()
            ctrl = self.controllers

            controller_ops = {
                "train_step": ctrl.train_step,
                "loss": ctrl.loss,
                "train_op": ctrl.train_op,
                "lr": ctrl.lr,
                "grad_norm": ctrl.grad_norm,
                "optimizer": ctrl.optimizer,
                "baseline": ctrl.baseline,
                "entropy": ctrl.sample_entropy,
                "sample_arc": ctrl.sample_arc,
                "skip_rate": ctrl.skip_rate
            }

            run_ops = [
                controller_ops["loss"], controller_ops["entropy"],
                controller_ops["lr"], controller_ops["grad_norm"],
                controller_ops["baseline"], controller_ops["skip_rate"],
                controller_ops["train_op"]
            ]

            if self.is_first_run:
                self.logger.info(
                    "First time running suggestion for {}. Random architecture will be given."
                    .format(request.study_id))
                with tf.Session() as sess:
                    sess.run(tf.global_variables_initializer())
                    arc = sess.run(controller_ops["sample_arc"])
                    # TODO: will use PVC to store the checkpoint to protect against unexpected suggestion pod restart
                    saver.save(sess, self.ctrl_cache_file)

                self.is_first_run = False

            else:
                with tf.Session() as sess:
                    saver.restore(sess, self.ctrl_cache_file)

                    valid_acc = ctrl.reward
                    result = self.GetEvaluationResult(request.study_id)

                    # This lstm cell is designed to maximize the metrics
                    # However, if the user want to minimize the metrics, we can take the negative of the result
                    if self.opt_direction == api_pb2.MINIMIZE:
                        result = -result

                    loss, entropy, lr, gn, bl, skip, _ = sess.run(
                        fetches=run_ops, feed_dict={valid_acc: result})
                    self.logger.info(
                        "Suggetion updated. LSTM Controller Loss: {}".format(
                            loss))
                    arc = sess.run(controller_ops["sample_arc"])

                    saver.save(sess, self.ctrl_cache_file)

        arc = arc.tolist()
        organized_arc = [0 for _ in range(self.num_layers)]
        record = 0
        for l in range(self.num_layers):
            organized_arc[l] = arc[record:record + l + 1]
            record += l + 1

        nn_config = dict()
        nn_config['num_layers'] = self.num_layers
        nn_config['input_size'] = self.input_size
        nn_config['output_size'] = self.output_size
        nn_config['embedding'] = dict()
        for l in range(self.num_layers):
            opt = organized_arc[l][0]
            nn_config['embedding'][opt] = self.search_space[opt].get_dict()

        organized_arc_json = json.dumps(organized_arc)
        nn_config_json = json.dumps(nn_config)

        organized_arc_str = str(organized_arc_json).replace('\"', '\'')
        nn_config_str = str(nn_config_json).replace('\"', '\'')

        self.logger.info(
            "\nNew Neural Network Architecture (internal representation):")
        self.logger.info(organized_arc_json)
        self.logger.info("\nCorresponding Seach Space Description:")
        self.logger.info(nn_config_str)
        self.logger.info("")

        trials = []
        trials.append(
            api_pb2.Trial(
                study_id=request.study_id,
                parameter_set=[
                    api_pb2.Parameter(name="architecture",
                                      value=organized_arc_str,
                                      parameter_type=api_pb2.CATEGORICAL),
                    api_pb2.Parameter(name="nn_config",
                                      value=nn_config_str,
                                      parameter_type=api_pb2.CATEGORICAL)
                ],
            ))

        channel = grpc.beta.implementations.insecure_channel(
            self.manager_addr, self.manager_port)
        with api_pb2.beta_create_Manager_stub(channel) as client:
            for i, t in enumerate(trials):
                ctrep = client.CreateTrial(api_pb2.CreateTrialRequest(trial=t),
                                           10)
                trials[i].trial_id = ctrep.trial_id
            self.logger.info("Trial {} Created\n".format(ctrep.trial_id))
            self.prev_trial_id = ctrep.trial_id

        self.ctrl_step += 1
        return api_pb2.GetSuggestionsReply(trials=trials)
Exemple #15
0
    def parseParameters(self, paramID):
        channel = grpc.beta.implementations.insecure_channel(
            self.manager_addr, self.manager_port)
        params = []
        with api_pb2.beta_create_Manager_stub(channel) as client:
            gsprep = client.GetSuggestionParameters(
                api_pb2.GetSuggestionParametersRequest(param_id=paramID), 10)
            params = gsprep.suggestion_parameters

        parsed_service_params = {
            "N": 100,
            "model_type": "gp",
            "max_features": "auto",
            "length_scale": 0.5,
            "noise": 0.0005,
            "nu": 1.5,
            "kernel_type": "matern",
            "n_estimators": 50,
            "mode": "pi",
            "trade_off": 0.01,
            "trial_hist": "",
            "burn_in": 10,
        }
        modes = ["pi", "ei"]
        model_types = ["gp", "rf"]
        kernel_types = ["matern", "rbf"]

        for param in params:
            if param.name in parsed_service_params.keys():
                if param.name == "length_scale" or param.name == "noise" or param.name == "nu" or param.name == "trade_off":
                    try:
                        float(param.value)
                    except ValueError:
                        self.logger.warning(
                            "Parameter must be float for %s: %s back to default value",
                            param.name, param.value)
                    else:
                        parsed_service_params[param.name] = float(param.value)

                elif param.name == "N" or param.name == "n_estimators" or param.name == "burn_in":
                    try:
                        int(param.value)
                    except ValueError:
                        self.logger.warning(
                            "Parameter must be int for %s: %s back to default value",
                            param.name, param.value)
                    else:
                        parsed_service_params[param.name] = int(param.value)

                elif param.name == "kernel_type":
                    if param.value != "rbf" and param.value != "matern":
                        parsed_service_params[param.name] = param.value
                    else:
                        self.logger.warning(
                            "Unknown Parameter for %s: %s back to default value",
                            param.name, param.value)
                elif param.name == "mode" and param.value in modes:
                    if param.value != "lcb" and param.value != "ei" and param.value != "pi":
                        parsed_service_params[param.name] = param.value
                    else:
                        self.logger.warning(
                            "Unknown Parameter for %s: %s back to default value",
                            param.name, param.value)
                elif param.name == "model_type" and param.value in model_types:
                    if param.value != "rf" and param.value != "gp":
                        parsed_service_params[param.name] = param.value
                    else:
                        self.logger.warning(
                            "Unknown Parameter for %s: %s back to default value",
                            param.name, param.value)
            else:
                self.logger.warning("Unknown Parameter name: %s ", param.name)

        return parsed_service_params
Exemple #16
0
    def GetSuggestions(self, request, context):
        if request.study_id not in self.registered_studies:
            self.registered_studies[request.study_id] = NAS_RL_StudyJob(
                request, self.logger)

        study = self.registered_studies[request.study_id]

        self.logger.info(
            "-" * 100 +
            "\nSuggestion Step {} for StudyJob {} (ID: {})\n".format(
                study.ctrl_step, study.study_name, study.study_id) + "-" * 100)

        with study.tf_graph.as_default():

            saver = tf.train.Saver()
            ctrl = study.controller

            controller_ops = {
                "train_step": ctrl.train_step,
                "loss": ctrl.loss,
                "train_op": ctrl.train_op,
                "lr": ctrl.lr,
                "grad_norm": ctrl.grad_norm,
                "optimizer": ctrl.optimizer,
                "baseline": ctrl.baseline,
                "entropy": ctrl.sample_entropy,
                "sample_arc": ctrl.sample_arc,
                "skip_rate": ctrl.skip_rate
            }

            run_ops = [
                controller_ops["loss"], controller_ops["entropy"],
                controller_ops["lr"], controller_ops["grad_norm"],
                controller_ops["baseline"], controller_ops["skip_rate"],
                controller_ops["train_op"]
            ]

            if study.is_first_run:
                self.logger.info(
                    ">>> First time running suggestion for {}. Random architecture will be given."
                    .format(study.study_name))
                with tf.Session() as sess:
                    sess.run(tf.global_variables_initializer())
                    candidates = list()
                    for _ in range(study.num_trials):
                        candidates.append(
                            sess.run(controller_ops["sample_arc"]))

                    # TODO: will use PVC to store the checkpoint to protect against unexpected suggestion pod restart
                    saver.save(sess, study.ctrl_cache_file)

                study.is_first_run = False

            else:
                with tf.Session() as sess:
                    saver.restore(sess, study.ctrl_cache_file)

                    valid_acc = ctrl.reward
                    result = self.GetEvaluationResult(study)

                    # In some rare cases, GetEvaluationResult() may return None
                    # if GetSuggestions() is called before all the trials are completed
                    while result is None:
                        self.logger.warning(
                            ">>> GetEvaluationResult() returns None")
                        time.sleep(20)
                        result = self.GetEvaluationResult(study)

                    # This LSTM network is designed to maximize the metrics
                    # However, if the user wants to minimize the metrics, we can take the negative of the result
                    if study.opt_direction == api_pb2.MINIMIZE:
                        result = -result

                    loss, entropy, lr, gn, bl, skip, _ = sess.run(
                        fetches=run_ops, feed_dict={valid_acc: result})
                    self.logger.info(
                        ">>> Suggetion updated. LSTM Controller Reward: {}".
                        format(loss))

                    candidates = list()
                    for _ in range(study.num_trials):
                        candidates.append(
                            sess.run(controller_ops["sample_arc"]))

                    saver.save(sess, study.ctrl_cache_file)

        organized_candidates = list()
        trials = list()

        for i in range(study.num_trials):
            arc = candidates[i].tolist()
            organized_arc = [0 for _ in range(study.num_layers)]
            record = 0
            for l in range(study.num_layers):
                organized_arc[l] = arc[record:record + l + 1]
                record += l + 1
            organized_candidates.append(organized_arc)

            nn_config = dict()
            nn_config['num_layers'] = study.num_layers
            nn_config['input_size'] = study.input_size
            nn_config['output_size'] = study.output_size
            nn_config['embedding'] = dict()
            for l in range(study.num_layers):
                opt = organized_arc[l][0]
                nn_config['embedding'][opt] = study.search_space[opt].get_dict(
                )

            organized_arc_json = json.dumps(organized_arc)
            nn_config_json = json.dumps(nn_config)

            organized_arc_str = str(organized_arc_json).replace('\"', '\'')
            nn_config_str = str(nn_config_json).replace('\"', '\'')

            self.logger.info(
                "\n>>> New Neural Network Architecture Candidate #{} (internal representation):"
                .format(i))
            self.logger.info(organized_arc_json)
            self.logger.info("\n>>> Corresponding Seach Space Description:")
            self.logger.info(nn_config_str)

            trials.append(
                api_pb2.Trial(
                    study_id=request.study_id,
                    parameter_set=[
                        api_pb2.Parameter(name="architecture",
                                          value=organized_arc_str,
                                          parameter_type=api_pb2.CATEGORICAL),
                        api_pb2.Parameter(name="nn_config",
                                          value=nn_config_str,
                                          parameter_type=api_pb2.CATEGORICAL)
                    ],
                ))

        self.prev_trial_ids = list()
        self.logger.info("")
        channel = grpc.beta.implementations.insecure_channel(
            MANAGER_ADDRESS, MANAGER_PORT)
        with api_pb2.beta_create_Manager_stub(channel) as client:
            for i, t in enumerate(trials):
                ctrep = client.CreateTrial(api_pb2.CreateTrialRequest(trial=t),
                                           10)
                trials[i].trial_id = ctrep.trial_id
                self.prev_trial_ids.append(ctrep.trial_id)

        self.logger.info(">>> {} Trials were created:".format(
            study.num_trials))
        for t in self.prev_trial_ids:
            self.logger.info(t)
        self.logger.info("")

        study.ctrl_step += 1

        return api_pb2.GetSuggestionsReply(trials=trials)