Exemple #1
0
 def do(self):
     """Start to run benchmark evaluator."""
     logger.info("BenchmarkPipeStep started...")
     records = self._get_current_step_records()
     if not records:
         logger.error("There is no model to evaluate.")
         return
     self.master = Master()
     for record in records:
         _record = ReportRecord(worker_id=record.worker_id,
                                desc=record.desc,
                                step_name=record.step_name)
         Report().broadcast(_record)
         self._evaluate_single_model(record)
         self.master.pop_all_finished_evaluate_worker()
     self.master.join()
     self.master.pop_all_finished_evaluate_worker()
     for record in records:
         Report().update_report({
             "step_name": record.step_name,
             "worker_id": record.worker_id
         })
     Report().output_step_all_records(step_name=General.step_name,
                                      weights_file=False,
                                      performance=True)
     self.master.close_client()
     Report().backup_output_path()
Exemple #2
0
 def _broadcast(self, pfms):
     """Boadcase pfrm to record."""
     record = Report().receive(self.step_name, self.worker_id)
     if record.performance:
         record.performance.update(pfms)
     else:
         record.performance = pfms
     Report().broadcast(record)
     logging.debug("valid record: {}".format(record))
Exemple #3
0
    def search(self):
        """Search one mutated model.

        :return: current number of samples, and the model
        """
        desc = deepcopy(self.search_space)
        search_desc = desc.custom
        records = Report().get_pareto_front_records(['random', 'mutate'])
        codes = []
        for record in records:
            custom = record.desc['custom']
            codes.append(custom['code'])
        num_ops = len(search_desc.op_names)
        upper_bounds = [
            num_ops, 2, 2, num_ops, num_ops, 5, 5, num_ops, num_ops, 8, 8,
            num_ops, num_ops, 4, 4, 5, 5, 6, 6
        ]
        code_to_mutate = random.choice(codes)
        index = random.randrange(len(upper_bounds))
        choices = list(range(upper_bounds[index]))
        choices.pop(int(code_to_mutate[index + 1], 36))
        choice = random.choice(choices)
        code_mutated = code_to_mutate[:index +
                                      1] + str(choice) + code_to_mutate[index +
                                                                        2:]
        search_desc['code'] = code_mutated
        search_desc['method'] = "mutate"
        logging.info("Mutate from {} to {}".format(code_to_mutate,
                                                   code_mutated))
        search_desc = self.codec.decode(search_desc)
        self.sample_count += 1
        desc['custom'] = search_desc
        return self.sample_count, desc
Exemple #4
0
    def search(self):
        """Search one NetworkDesc from search space.

        :return: search id, network desc
        :rtype: int, NetworkDesc
        """
        desc = deepcopy(self.search_space)
        if self.random_count < self.random_models:
            codec = self._random_sample()
            desc.update({"trainer.codec": codec})
            return self.random_count, desc
        self.ea_epoch += 1
        # todo: according to gflops and acc.
        records = Report().get_pareto_front_records(self.step_name, self.num_individual)
        codes = [record.desc.get('nbit_w_list') + record.desc.get('nbit_a_list') for record in records]
        logging.info("codes=%s", codes)
        if len(codes) < 2:
            encoding1, encoding2 = codes[0], codes[0]
        else:
            encoding1, encoding2 = random.sample(codes, 2)
        choice = random.randint(0, 1)
        # mutate
        if choice == 0:
            encoding_new = self.mutatation(encoding1)
        # crossover
        else:
            encoding_new, _ = self.crossover(encoding1, encoding2)
        self.ea_count += 1
        if self.ea_count % self.num_individual == 0:
            self.ea_epoch += 1
        codec = self.codec.decode(encoding_new)
        desc.update({"trainer.codec": codec})
        return self.random_count + self.ea_count, desc
Exemple #5
0
 def _get_current_step_records(self):
     step_name = General.step_name
     models_folder = PipeStepConfig.pipe_step.get("models_folder")
     cur_index = PipelineConfig.steps.index(step_name)
     if cur_index >= 1 or models_folder:
         # records = Report().get_step_records(PipelineConfig.steps[cur_index - 1])
         if not models_folder:
             models_folder = FileOps.join_path(
                 TaskOps().local_output_path,
                 PipelineConfig.steps[cur_index - 1])
         models_folder = models_folder.replace("{local_base_path}",
                                               TaskOps().local_base_path)
         records = Report().load_records_from_model_folder(models_folder)
     else:
         records = self._load_single_model_records()
     final_records = []
     for record in records:
         if not record.weights_file:
             logger.error("Model file is not existed, id={}".format(
                 record.worker_id))
         else:
             record.step_name = General.step_name
             final_records.append(record)
     logging.debug("Records: {}".format(final_records))
     return final_records
Exemple #6
0
    def search(self):
        """Search one mutated model.

        :return: current number of samples, and the model
        """
        desc = deepcopy(self.search_space)
        search_desc = desc.custom
        # TODO: merge sr ea in one pipe step.
        records = Report().get_pareto_front_records(['random', 'mutate'])
        codes = []
        for record in records:
            codes.append(record.desc['custom']['code'])
        code_to_mutate = random.choice(codes)
        current_mutate, code_mutated = 0, code_to_mutate
        num_candidates = len(search_desc["candidates"])
        while current_mutate < self.num_mutate:
            code_new = self.mutate_once(code_mutated, num_candidates)
            if code_new != code_mutated:
                current_mutate += 1
                code_mutated = code_new
        logging.info("Mutate from {} to {}".format(code_to_mutate, code_mutated))
        search_desc['code'] = code_mutated
        search_desc['method'] = "mutate"
        search_desc = self.codec.decode(search_desc)
        desc['custom'] = search_desc
        self.sample_count += 1
        return dict(worker_id=self.sample_count, desc=desc)
Exemple #7
0
 def _evaluate_single_model(self, record):
     try:
         cls_gpu_evaluator = ClassFactory.get_cls(ClassType.GPU_EVALUATOR)
     except Exception:
         logger.error(
             "Failed to create Evaluator, please check the config file.")
         logger.error(traceback.format_exc())
         return
     try:
         worker_info = {
             "step_name": record.step_name,
             "worker_id": record.worker_id
         }
         _record = dict(worker_id=record.worker_id,
                        desc=record.desc,
                        step_name=record.step_name)
         _init_record = ReportRecord().load_dict(_record)
         Report().broadcast(_init_record)
         evaluator = cls_gpu_evaluator(worker_info=worker_info,
                                       model_desc=record.desc,
                                       weights_file=record.weights_file)
         self.master.run(evaluator)
     except Exception:
         logger.error(
             "Failed to evaluate model, worker info={}".format(worker_info))
         logger.error(traceback.format_exc())
         return
Exemple #8
0
 def do(self):
     """Do the main task in this pipe step."""
     logging.debug("NasPipeStep started...")
     while not self.generator.is_completed:
         res = self.generator.sample()
         if res:
             self._dispatch_trainer(res)
         else:
             time.sleep(0.5)
         self._after_train(wait_until_finish=False)
     self.master.join()
     self._after_train(wait_until_finish=True)
     logging.debug("Pareto_front values: %s",
                   Report().pareto_front(General.step_name))
     Report().output_pareto_front(General.step_name)
     self.master.close_client()
Exemple #9
0
    def search(self):
        """Search one NetworkDesc from search space.

        :return: search id, network desc
        :rtype: int, NetworkDesc
        """
        if self.random_count < self.random_models:
            self.random_count += 1
            desc = self._random_sample()
            desc.update({"trainer.codec": dict(desc)})
            return self.random_count, desc
        self.ea_epoch += 1
        records = Report().get_pareto_front_records(self.step_name, self.num_individual)
        codes = [record.desc.get('backbone').get('encoding') for record in records]
        logging.info("codes=%s", codes)
        if len(codes) < 2:
            encoding1, encoding2 = codes[0], codes[0]
        else:
            encoding1, encoding2 = random.sample(codes, 2)
        choice = random.randint(0, 1)
        # mutate
        if choice == 0:
            encoding_new = self.mutatation(encoding1)
        # crossover
        else:
            encoding_new, _ = self.crossover(encoding1, encoding2)
        self.ea_count += 1
        if self.ea_count % self.num_individual == 0:
            self.ea_epoch += 1
        desc = self.codec.decode(encoding_new)
        desc.update({"trainer.codec": dict(desc)})
        return self.random_count + self.ea_count, desc
Exemple #10
0
 def get_pareto_front(self):
     """Get the pareto front of trained candidates."""
     records = Report().get_pareto_front_records()
     codes = []
     for record in records:
         codes.append(record.desc['code'])
     code_to_mutate = random.choice(codes)
     return code_to_mutate
 def do(self):
     """Start to run fully train with horovod or local trainer."""
     logger.info("FullyTrainPipeStep started...")
     cls_trainer = ClassFactory.get_cls('trainer')
     if cls_trainer.config.distributed:
         self._do_distributed_fully_train()
     else:
         records = self._get_current_step_records()
         logger.debug("load pipestep records: {}".format(records))
         self.master = Master()
         self._train_multi_models(records)
         for record in records:
             Report().update_report({"step_name": record.step_name, "worker_id": record.worker_id})
         Report().output_step_all_records(
             step_name=self.task.step_name, weights_file=True, performance=True)
         self.master.close_client()
     Report().backup_output_path()
Exemple #12
0
 def __init__(self):
     self.step_name = General.step_name
     self.search_space = SearchSpace()
     self.search_alg = SearchAlgorithm(self.search_space.search_space)
     self.report = Report()
     self.record = ReportRecord()
     self.record.step_name = self.step_name
     if hasattr(self.search_alg.config, 'objective_keys'):
         self.record.objective_keys = self.search_alg.config.objective_keys
 def _train_multi_models(self, records):
     for record in records:
         self._train_single_model(record.desc, record.worker_id)
         finished_worker_info = self.master.pop_finished_worker()
         Report().update_report(finished_worker_info)
     self.master.join()
     self.master.pop_all_finished_train_worker()
     if not self.need_evaluate:
         return
     for record in records:
         self._evaluate_single_model(record)
         self.master.pop_all_finished_evaluate_worker()
     self.master.join()
     self.master.pop_all_finished_evaluate_worker()
 def _train_single_model(self, model_desc=None, model_id=None):
     cls_trainer = ClassFactory.get_cls('trainer')
     step_name = self.task.step_name
     if model_desc is not None:
         sample = dict(worker_id=model_id, desc=model_desc, step_name=step_name)
         record = ReportRecord().load_dict(sample)
         logging.debug("Broadcast Record=%s", str(record))
         Report().broadcast(record)
         model = NetworkDesc(model_desc).to_model()
         trainer = cls_trainer(model, model_id)
     else:
         trainer = cls_trainer(None, 0)
     if cls_trainer.config.distributed:
         self._do_distributed_fully_train()
     else:
         self._do_single_fully_train(trainer)
Exemple #15
0
    def _init_evaluator(self):
        """Do evaluate stuff.

        :param finished_trainer_info: the finished trainer info
        :type: list or dict

        """
        use_evaluator, cls_evaluator_set = self._use_evaluator()
        if not use_evaluator:
            return
        record = Report().receive(self.step_name, self.worker_id)
        model_desc = record.desc
        model = NetworkDesc(model_desc).to_model()
        for cls in cls_evaluator_set:
            evaluator = cls(worker_info=self.worker_info, model=model)
            self.add_evaluator(evaluator)
Exemple #16
0
 def __init__(self, search_space=None, **kwargs):
     """Init SearchAlgorithm."""
     super(SearchAlgorithm, self).__init__()
     # modify config by kwargs, using local scope
     if self.config and kwargs:
         self.config = self.config()
         load_conf_from_desc(self.config, kwargs)
     self.search_space = search_space
     if hasattr(self.config, 'codec'):
         self.codec = Codec(search_space, type=self.config.codec)
     else:
         self.codec = None
     logging.debug("Config=%s", obj2config(self.config))
     self.report = Report()
     self.record = ReportRecord()
     self.record.step_name = self.step_name
Exemple #17
0
 def sample(self):
     """Sample a work id and model from search algorithm."""
     res = self.search_alg.search()
     if not res:
         return None
     if not isinstance(res, list):
         res = [res]
     out = []
     for sample in res:
         if isinstance(sample, tuple):
             sample = dict(worker_id=sample[0], desc=sample[1])
         record = self.record.load_dict(sample)
         logging.debug("Broadcast Record=%s", str(record))
         Report().broadcast(record)
         desc = self._decode_hps(record.desc)
         out.append((record.worker_id, desc))
     return out
 def _get_current_step_records(self):
     step_name = self.task.step_name
     models_folder = PipeStepConfig.pipe_step.get("models_folder")
     records = []
     cur_index = PipelineConfig.steps.index(step_name)
     if cur_index >= 1 or models_folder:
         # records = Report().get_pareto_front_records(PipelineConfig.steps[cur_index - 1])
         if not models_folder:
             models_folder = FileOps.join_path(
                 TaskOps().local_output_path, PipelineConfig.steps[cur_index - 1])
         models_folder = models_folder.replace(
             "{local_base_path}", TaskOps().local_base_path)
         records = Report().load_records_from_model_folder(models_folder)
     else:
         records = [ReportRecord(step_name, 0)]
     logging.debug("Records: {}".format(records))
     for record in records:
         record.step_name = step_name
     return records
Exemple #19
0
    def update(self, step_name, worker_id):
        """Update search algorithm accord to the worker path.

        :param step_name: step name
        :param worker_id: current worker id
        :return:
        """
        report = Report()
        record = report.receive(step_name, worker_id)
        logging.debug("Get Record=%s", str(record))
        self.search_alg.update(record.serialize())
        report.dump_report(record.step_name, record)
        logging.info("Update Success. step_name=%s, worker_id=%s", step_name,
                     worker_id)
        logging.info("Best values: %s",
                     Report().pareto_front(step_name=General.step_name))
Exemple #20
0
 def _broadcast(self, epoch=None):
     record = Report().receive(self.trainer.step_name,
                               self.trainer.worker_id)
     if self.trainer.config.report_on_epoch:
         record.epoch = epoch
     # todo: remove in FinedGrainedSpace
     if self.trainer.config.codec:
         record.desc = self.trainer.config.codec
     if not record.desc:
         record.desc = self.trainer.model_desc
     record.performance = self.trainer.performance
     record.objectives = self.trainer.valid_metrics.objectives
     if record.performance is not None:
         for key in record.performance:
             if key not in record.objectives:
                 if (key == 'gflops' or key == 'kparams'):
                     record.objectives.update({key: 'MIN'})
                 else:
                     record.objectives.update({key: 'MAX'})
     record.model_path = self.trainer.model_path
     record.checkpoint_path = self.trainer.checkpoint_file
     record.weights_file = self.trainer.weights_file
     Report().broadcast(record)
     logging.debug("report_callback record: {}".format(record))
Exemple #21
0
 def _save_best(self, desc):
     record = Report().receive(self.step_name, self.sample_count + 1)
     record.performance = {"accuracy": 100}
     record.desc = desc
     Report().broadcast(record)
Exemple #22
0
 def after_train(self, logs=None):
     """Close the connection of report."""
     self._broadcast(self.epoch)
     Report().close(self.trainer.step_name, self.trainer.worker_id)