def __init__(self): self.step_name = General.step_name self.search_space = SearchSpace() self.search_alg = SearchAlgorithm(self.search_space.search_space) self.report = Report() self.record = ReportRecord() self.record.step_name = self.step_name if hasattr(self.search_alg.config, 'objective_keys'): self.record.objective_keys = self.search_alg.config.objective_keys
def do(self): """Start to run benchmark evaluator.""" logger.info("BenchmarkPipeStep started...") records = self._get_current_step_records() if not records: logger.error("There is no model to evaluate.") return self.master = Master() for record in records: _record = ReportRecord(worker_id=record.worker_id, desc=record.desc, step_name=record.step_name) Report().broadcast(_record) self._evaluate_single_model(record) self.master.pop_all_finished_evaluate_worker() self.master.join() self.master.pop_all_finished_evaluate_worker() for record in records: Report().update_report({ "step_name": record.step_name, "worker_id": record.worker_id }) Report().output_step_all_records(step_name=General.step_name, weights_file=False, performance=True) self.master.close_client() Report().backup_output_path()
def _evaluate_single_model(self, record): try: cls_gpu_evaluator = ClassFactory.get_cls(ClassType.GPU_EVALUATOR) except Exception: logger.error( "Failed to create Evaluator, please check the config file.") logger.error(traceback.format_exc()) return try: worker_info = { "step_name": record.step_name, "worker_id": record.worker_id } _record = dict(worker_id=record.worker_id, desc=record.desc, step_name=record.step_name) _init_record = ReportRecord().load_dict(_record) Report().broadcast(_init_record) evaluator = cls_gpu_evaluator(worker_info=worker_info, model_desc=record.desc, weights_file=record.weights_file) self.master.run(evaluator) except Exception: logger.error( "Failed to evaluate model, worker info={}".format(worker_info)) logger.error(traceback.format_exc()) return
def _train_single_model(self, model_desc=None, model_id=None): cls_trainer = ClassFactory.get_cls('trainer') step_name = self.task.step_name if model_desc is not None: sample = dict(worker_id=model_id, desc=model_desc, step_name=step_name) record = ReportRecord().load_dict(sample) logging.debug("Broadcast Record=%s", str(record)) Report().broadcast(record) model = NetworkDesc(model_desc).to_model() trainer = cls_trainer(model, model_id) else: trainer = cls_trainer(None, 0) if cls_trainer.config.distributed: self._do_distributed_fully_train() else: self._do_single_fully_train(trainer)
def __init__(self, search_space=None, **kwargs): """Init SearchAlgorithm.""" super(SearchAlgorithm, self).__init__() # modify config by kwargs, using local scope if self.config and kwargs: self.config = self.config() load_conf_from_desc(self.config, kwargs) self.search_space = search_space if hasattr(self.config, 'codec'): self.codec = Codec(search_space, type=self.config.codec) else: self.codec = None logging.debug("Config=%s", obj2config(self.config)) self.report = Report() self.record = ReportRecord() self.record.step_name = self.step_name
def _get_current_step_records(self): step_name = self.task.step_name models_folder = PipeStepConfig.pipe_step.get("models_folder") records = [] cur_index = PipelineConfig.steps.index(step_name) if cur_index >= 1 or models_folder: # records = Report().get_pareto_front_records(PipelineConfig.steps[cur_index - 1]) if not models_folder: models_folder = FileOps.join_path( TaskOps().local_output_path, PipelineConfig.steps[cur_index - 1]) models_folder = models_folder.replace( "{local_base_path}", TaskOps().local_base_path) records = Report().load_records_from_model_folder(models_folder) else: records = [ReportRecord(step_name, 0)] logging.debug("Records: {}".format(records)) for record in records: record.step_name = step_name return records
def _load_single_model_records(self): model_desc = PipeStepConfig.model.get("model_desc") model_desc_file = PipeStepConfig.model.get("model_desc_file") if model_desc_file: model_desc_file = model_desc_file.replace( "{local_base_path}", TaskOps().local_base_path) model_desc = Config(model_desc_file) if not model_desc: logger.error("Model desc or Model desc file is None.") return [] model_file = PipeStepConfig.model.get("model_file") if not model_file: logger.error("Model file is None.") return [] if not os.path.exists(model_file): logger.error("Model file is not existed.") return [] return ReportRecord().load_dict( dict(worker_id="1", desc=model_desc, weights_file=model_file))
class Generator(object): """Convert search space and search algorithm, sample a new model.""" def __init__(self): self.step_name = General.step_name self.search_space = SearchSpace() self.search_alg = SearchAlgorithm(self.search_space.search_space) self.report = Report() self.record = ReportRecord() self.record.step_name = self.step_name if hasattr(self.search_alg.config, 'objective_keys'): self.record.objective_keys = self.search_alg.config.objective_keys @property def is_completed(self): """Define a property to determine search algorithm is completed.""" return self.search_alg.is_completed def sample(self): """Sample a work id and model from search algorithm.""" res = self.search_alg.search() if not res: return None if not isinstance(res, list): res = [res] out = [] for sample in res: if isinstance(sample, tuple): sample = dict(worker_id=sample[0], desc=sample[1]) record = self.record.load_dict(sample) logging.debug("Broadcast Record=%s", str(record)) desc = self._decode_hps(record.desc) record.desc = desc Report().broadcast(record) out.append((record.worker_id, desc)) return out def update(self, step_name, worker_id): """Update search algorithm accord to the worker path. :param step_name: step name :param worker_id: current worker id :return: """ report = Report() record = report.receive(step_name, worker_id) logging.debug("Get Record=%s", str(record)) self.search_alg.update(record.serialize()) report.dump_report(record.step_name, record) logging.info("Update Success. step_name=%s, worker_id=%s", step_name, worker_id) logging.info("Best values: %s", Report().pareto_front(step_name=General.step_name)) @staticmethod def _decode_hps(hps): """Decode hps: `trainer.optim.lr : 0.1` to dict format. And convert to `vega.core.common.config import Config` object This Config will be override in Trainer or Datasets class The override priority is: input hps > user configuration > default configuration :param hps: hyper params :return: dict """ hps_dict = {} if hps is None: return None if isinstance(hps, tuple): return hps for hp_name, value in hps.items(): hp_dict = {} for key in list(reversed(hp_name.split('.'))): if hp_dict: hp_dict = {key: hp_dict} else: hp_dict = {key: value} # update cfg with hps hps_dict = update_dict(hps_dict, hp_dict, []) return Config(hps_dict)