class DartsNetworkTemplateConfig(object): """Darts network template config.""" cifar10 = Config( os.path.join(os.path.dirname(__file__), "darts_cifar10.json")) imagenet = Config( os.path.join(os.path.dirname(__file__), "darts_imagenet.json"))
def __call__(self, x, training, weights=None): """Forward function of MixedOp.""" if not isinstance(self.ops_cands, list): op_desc = { 'C': self.C, 'stride': self.stride, 'affine': True, 'data_format': self.data_format } class_op = NetworkFactory.get_network(NetTypesMap['block'], self.ops_cands) self._ops = class_op(Config(op_desc)) else: self._ops = [] for primitive in self.ops_cands: op_desc = { 'C': self.C, 'stride': self.stride, 'affine': False, 'data_format': self.data_format } class_op = NetworkFactory.get_network(NetTypesMap['block'], primitive) op = class_op(Config(op_desc)) if 'pool' in primitive: self._ops.append((op, True)) else: self._ops.append((op, False)) if weights is not None: result = [] for idx, (op, pool) in enumerate(self._ops): tmp = op(x, training=training) if pool: tmp = tf.layers.batch_normalization( tmp, axis=1 if self.data_format == 'channels_first' else 3, trainable=False, training=training) tmp = weights[idx] * tmp result.append(tmp) return tf.add_n(result) else: if isinstance(self._ops, list): for idx, (op, pool) in enumerate(self._ops): x = op(x, training=training) if pool: x = tf.layers.batch_normalization( x, axis=1 if self.data_format == 'channels_first' else 3, trainable=False, training=training) else: x = self._ops(x, training=training) return x
def _init_hps(self, hps): """Convert trainer values in hps to cfg. :param hps: hyperparameters :type hps: dict """ if "hps_file" in self.cfg and self.cfg.hps_file is not None: hps_file = self.cfg.hps_file.replace("{local_base_path}", self.local_base_path) hps = Config(hps_file) if hps is not None: self.cfg = Config(update_dict(hps.get('trainer'), self.cfg)) self.hps = hps
def _pre_desc(self, channel_in, channel_out, kernel_size, stride, padding, affine, data_format): pre_desc = Config() pre_desc.channel_in = channel_in pre_desc.channel_out = channel_out pre_desc.affine = affine pre_desc.kernel_size = kernel_size pre_desc.stride = stride pre_desc.padding = padding pre_desc.data_format = data_format return pre_desc
def _save_model_desc(self): search_space = SearchSpace() codec = Codec(self.cfg.codec, search_space) pareto_front_df = pd.read_csv( FileOps.join_path(self.result_path, "pareto_front.csv")) codes = pareto_front_df['Code'] for i in range(len(codes)): search_desc = Config() search_desc.custom = deepcopy(search_space.search_space.custom) search_desc.modules = deepcopy(search_space.search_space.modules) code = codes.loc[i] search_desc.custom.code = code search_desc.custom.method = 'full' codec.decode(search_desc.custom) self.trainer.output_model_desc(i, search_desc)
def save_results(self): """Save the results of evolution contains the information of pupulation and elitism.""" step_name = Config(deepcopy(UserConfig().data)).general.step_name _path = FileOps.join_path(self.local_output_path, step_name) FileOps.make_dir(_path) arch_file = FileOps.join_path(_path, 'arch.txt') arch_child = FileOps.join_path(_path, 'arch_child.txt') sel_arch_file = FileOps.join_path(_path, 'selected_arch.npy') sel_arch = [] with open(arch_file, 'a') as fw_a, open(arch_child, 'a') as fw_ac: writer_a = csv.writer(fw_a, lineterminator='\n') writer_ac = csv.writer(fw_ac, lineterminator='\n') writer_ac.writerow(['Population Iteration: ' + str(self.evolution_count + 1)]) for c in range(self.individual_num): writer_ac.writerow( self._log_data(net_info_type='active_only', pop=self.pop[c], value=self.pop[c].fitness)) writer_a.writerow(['Population Iteration: ' + str(self.evolution_count + 1)]) for c in range(self.elitism_num): writer_a.writerow(self._log_data(net_info_type='active_only', pop=self.elitism[c], value=self.elit_fitness[c])) sel_arch.append(self.elitism[c].gene) sel_arch = np.stack(sel_arch) np.save(sel_arch_file, sel_arch) if self.backup_base_path is not None: FileOps.copy_folder(self.local_output_path, self.backup_base_path)
def _evaluate_single_model(self, id=None, desc_file=None, pretrained_model=None): try: cls_gpu_evaluator = ClassFactory.get_cls(ClassType.GPU_EVALUATOR) except Exception: logger.error( "Failed to create Evaluator, please check the config file.") logger.error(traceback.format_exc()) return if desc_file and pretrained_model is not None: cls_gpu_evaluator.cfg.model_desc_file = desc_file model_cfg = ClassFactory.__configs__.get('model') if model_cfg: setattr(model_cfg, 'model_desc_file', desc_file) else: setattr(ClassFactory.__configs__, 'model', Config({'model_desc_file': desc_file})) cls_gpu_evaluator.cfg.pretrained_model_file = pretrained_model try: evaluator = cls_gpu_evaluator() evaluator.train_process() evaluator.output_evaluate_result(id, evaluator.evaluate_result) except Exception: logger.error( "Failed to evaluate model, id={}, desc_file={}, pretrained_model={}" .format(id, desc_file, pretrained_model)) logger.error(traceback.format_exc()) return
def build_cell(self, name, C_prev_prev, C_prev, C_curr, reduction, reduction_prev): """Build cell for Darts Network. :param name: cell name :type name: str :param C_prev_prev: channel of previous of previous cell :type C_prev_prev: int :param C_prev: channel of previous cell :type C_prev: int :param C_curr: channel of current cell :type C_curr: int :param reduction: whether to reduce resolution in this cell :type reduction: bool :param reduction_prev: whether to reduce resolution in previous cell :return: object of cell :rtype: class type of cell """ cell_desc = { 'genotype': self.desc[name]['genotype'], 'steps': self.desc[name]['steps'], 'concat': self.desc[name]['concat'], 'C_prev_prev': C_prev_prev, 'C_prev': C_prev, 'C': C_curr, 'reduction': reduction, 'reduction_prev': reduction_prev, 'search': self.search, 'data_format': self.data_format } cell_type = self.desc[name]['type'] cell_name = self.desc[name]['name'] cell_class = NetworkFactory.get_network( NetTypesMap[cell_type], cell_name) return cell_class(Config(cell_desc))
def _init_model(self, model=None): """Load model desc from save path and parse to model.""" if model is not None: return model model_cfg = ClassFactory.__configs__.get('model') if 'model_desc_file' in model_cfg and model_cfg.model_desc_file is not None: desc_file = model_cfg.model_desc_file.replace( "{model_zoo}", self.model_zoo_path) desc_file = desc_file.replace("{local_base_path}", self.local_base_path) if ":" not in desc_file: desc_file = os.path.abspath(desc_file) if ":" in desc_file: local_desc_file = FileOps.join_path( self.local_output_path, os.path.basename(desc_file)) FileOps.copy_file(desc_file, local_desc_file) desc_file = local_desc_file if self.horovod: hvd.join() model_desc = Config(desc_file) logging.info("net_desc:{}".format(model_desc)) elif 'model_desc' in model_cfg and model_cfg.model_desc is not None: model_desc = model_cfg.model_desc else: return None if model_desc is not None: self.model_desc = model_desc net_desc = NetworkDesc(model_desc) model = net_desc.to_model() return model else: return None
def _init_model(self): """Initialize model if fully training a model. :return: config of fully train model :rtype: config file """ config = Config(self.cfg.config_template) config['total_epochs'] = self.cfg.epoch if 'model_desc_file' in self.cfg: _model_desc_file = self.cfg.model_desc_file.replace( "{local_base_path}", TaskOps().local_base_path) _total_list = ListDict.load_csv(_model_desc_file) pre_arch = _total_list.sort('mAP')[0]['arch'] pretrained = pre_arch.split('_')[1] pre_worker_id = _total_list.sort('mAP')[0]['pre_worker_id'] model_desc = dict(arch=pre_arch, pre_arch=pretrained, pre_worker_id=-1) logging.info( "Initialize fully train model from: {}".format(model_desc)) if self.cfg.regnition: # re-write config from previous result config['model']['backbone']['reignition'] = True config['model']['pretrained'] = os.path.join( self.output_path, pretrained + '_imagenet.pth') else: config['model']['pretrained'] = extract_backbone_from_pth( self.output_path, pre_worker_id, pretrained) elif 'model_desc' in self.cfg: model_desc = self.cfg.model_desc else: raise ValueError('Missing model description!') model_desc = update_config(config, model_desc) return model_desc
def _save_model_desc(self): """Save final model desc of NAS.""" pf_file = FileOps.join_path(self.trainer.local_output_path, self.trainer.step_name, "pareto_front.csv") if not FileOps.exists(pf_file): return with open(pf_file, "r") as file: pf = pd.read_csv(file) pareto_fronts = pf["encoding"].tolist() search_space = SearchSpace() codec = QuantCodec('QuantCodec', search_space) for i, pareto_front in enumerate(pareto_fronts): pareto_front = [int(x) for x in pareto_front[1:-1].split(',')] model_desc = Config() model_desc.modules = search_space.search_space.modules model_desc.backbone = codec.decode(pareto_front)._desc.backbone self.trainer.output_model_desc(i, model_desc)
def __init__(self, metric_cfg=None): """Init Metrics.""" self.mdict = {} metric_config = obj2config(self.config) if not isinstance(metric_config, list): metric_config = [metric_config] for metric_item in metric_config: ClassFactory.get_cls(ClassType.METRIC, self.config.type) metric_name = metric_item.pop('type') metric_class = ClassFactory.get_cls(ClassType.METRIC, metric_name) if isfunction(metric_class): metric_class = partial(metric_class, **metric_item.get("params", {})) else: metric_class = metric_class(**metric_item.get("params", {})) self.mdict[metric_name] = metric_class self.mdict = Config(self.mdict) self.metric_results = dict()
def __init__(self, types=['epoch', 'train'], max_steps=[0, 0]): self.estimator = Config() if not isinstance(types, list) or not isinstance(max_steps, list): types = [types] max_steps = [max_steps] if len(types) != len(max_steps): raise Exception('types length must equal to max_step') for type, max_step in zip(types, max_steps): self.add_runtime_est(type, max_step)
def _init_lr_scheduler(self): """Init lr scheduler from timm according to type in config.""" args = self.cfg.lr_scheduler.copy() args['epochs'] = self.cfg.epochs lr_scheduler, self.epochs = create_scheduler(Config(args), self.optimizer) start_epoch = args.get('start_epoch', 0) lr_scheduler.step(start_epoch) return lr_scheduler
def _code_to_chninfo(self, code): """Transform code to channel info. :param code: input code :type code: list of int :return: channel info :rtype: Config """ chn = self.search_space.backbone.base_chn chn_node = self.search_space.backbone.base_chn_node chninfo = Config() if code is None: chninfo['chn'] = chn chninfo['chn_node'] = chn_node chninfo['encoding'] = code return chninfo chn_mask = [] chn_node_mask = [] start_id = 0 end_id = chn[0] for i in range(len(chn)): if sum(code[start_id:end_id]) == 0: len_mask = len(code[start_id:end_id]) tmp_mask = [0] * len_mask tmp_mask[random.randint(0, len_mask - 1)] = 1 chn_mask.append(tmp_mask) else: chn_mask.append(code[start_id:end_id]) start_id = end_id if i + 1 == len(chn): end_id += chn_node[0] else: end_id += chn[i + 1] chn = [] for single_chn_mask in chn_mask: chn.append(sum(single_chn_mask)) for i in range(len(chn_node)): if sum(code[start_id:end_id]) == 0: len_mask = len(code[start_id:end_id]) tmp_mask = [0] * len_mask tmp_mask[random.randint(0, len_mask - 1)] = 1 chn_node_mask.append(tmp_mask) else: chn_node_mask.append(code[start_id:end_id]) start_id = end_id if i + 1 < len(chn_node): end_id += chn_node[i + 1] chn_node = [] for single_chn_mask in chn_node_mask: chn_node.append(sum(single_chn_mask)) chninfo['chn'] = chn chninfo['chn_node'] = chn_node chninfo['chn_mask'] = chn_mask chninfo['chn_node_mask'] = chn_node_mask chninfo['encoding'] = code return chninfo
def build_mixedop(self, **kwargs): """Build MixedOp. :param kwargs: arguments for MixedOp :type kwargs: dict :return: MixedOp Object :rtype: MixedOp """ mixedop_desc = Config(**kwargs) return MixedOp(mixedop_desc)
def __init__(self, search_space=None, **kwargs): """Init DartsCodec.""" super(DartsCodec, self).__init__(search_space, **kwargs) self.darts_cfg = copy.deepcopy(search_space) self.super_net = { 'normal': self.darts_cfg.super_network.normal.genotype, 'reduce': self.darts_cfg.super_network.reduce.genotype } self.super_net = Config(self.super_net) self.steps = self.darts_cfg.super_network.normal.steps
def __init__(self, metric_cfg): """Init Metrics.""" metric_config = deepcopy(metric_cfg) self.mdict = {} if not isinstance(metric_config, list): metric_config = [metric_config] for metric_item in metric_config: metric_name = metric_item.pop('type') if ClassFactory.is_exists(ClassType.METRIC, metric_name): metric_class = ClassFactory.get_cls(ClassType.METRIC, metric_name) else: metric_class = getattr( importlib.import_module('vega.core.metrics'), metric_name) if isfunction(metric_class): metric_class = partial(metric_class, **metric_item) else: metric_class = metric_class(**metric_item) self.mdict[metric_name] = metric_class self.mdict = Config(self.mdict)
def __init__(self, desc): """Init MixedOp.""" super(MixedOp, self).__init__() C = desc.C stride = desc.stride ops_cands = desc.ops_cands if not isinstance(ops_cands, list): op_desc = {'C': C, 'stride': stride, 'affine': True} class_op = NetworkFactory.get_network(NetTypesMap['block'], ops_cands) self._ops = class_op(Config(op_desc)) else: self._ops = nn.ModuleList() for primitive in ops_cands: op_desc = {'C': C, 'stride': stride, 'affine': False} class_op = NetworkFactory.get_network(NetTypesMap['block'], primitive) op = class_op(Config(op_desc)) if 'pool' in primitive: op = nn.Sequential(op, nn.BatchNorm2d(C, affine=False)) self._ops.append(op)
def _network_stems(self, stem): """Build stems part. :param stem: stem part of network :type stem: torch.nn.Module :return: stem's output channel :rtype: int """ stem_desc = {'C': self._C, 'stem_multi': 3, 'data_format': self.data_format} stem_class = NetworkFactory.get_network(NetTypesMap['block'], stem) self.stem = stem_class(Config(stem_desc)) return self.stem.C_curr
def _save_descript(self): """Save result descript.""" template_file = self.config.darts_template_file genotypes = self.search_alg.codec.calc_genotype(self._get_arch_weights()) if template_file == "{default_darts_cifar10_template}": template = DartsNetworkTemplateConfig.cifar10 elif template_file == "{default_darts_imagenet_template}": template = DartsNetworkTemplateConfig.imagenet else: dst = FileOps.join_path(self.trainer.get_local_worker_path(), os.path.basename(template_file)) FileOps.copy_file(template_file, dst) template = Config(dst) model_desc = self._gen_model_desc(genotypes, template) self.trainer.config.codec = model_desc
def _init_model(self): """Initialize the model architecture for full train step. :return: train model :rtype: class """ search_space = Config({"search_space": self.model_desc}) self.codec = Codec(self.cfg.codec, search_space) self.get_selected_arch() indiv_cfg = self.codec.decode(self.elitism) self.trainer.model_desc = self.elitism.active_net_list() # self.output_model_desc() net_desc = NetworkDesc(indiv_cfg) model = net_desc.to_model() return model
def _train_single_model(self, desc_file=None, model_id=None): cls_trainer = ClassFactory.get_cls('trainer') if desc_file is not None: cls_trainer.cfg.model_desc_file = desc_file model_cfg = ClassFactory.__configs__.get('model') if model_cfg: setattr(model_cfg, 'model_desc_file', desc_file) else: setattr(ClassFactory.__configs__, 'model', Config({'model_desc_file': desc_file})) if cls_trainer.cfg.get('horovod', False): self._do_horovod_fully_train() else: trainer = cls_trainer(None, id=model_id) self.master.run(trainer)
def do(self): """Start to run benchmark evaluator.""" logger.info("BenchmarkPipeStep started...") cfg = Config(deepcopy(UserConfig().data)) step_name = cfg.general.step_name pipe_step_cfg = cfg[step_name].pipe_step if "esr_models_file" in pipe_step_cfg and pipe_step_cfg.esr_models_file is not None: # TODO: ESR model self._evaluate_esr_models(pipe_step_cfg.esr_models_file, pipe_step_cfg.models_folder) elif "models_folder" in pipe_step_cfg and pipe_step_cfg.models_folder is not None: self._evaluate_multi_models(pipe_step_cfg.models_folder) else: self._evaluate_single_model() self._backup_output_path() logger.info("Complete model evaluation.")
def add_runtime_est(self, type, max_step): """Add new type of runtime estimator. :param type: runtime type :type type: str :param max_step: max step of new type :type type: int """ if type in self.estimator: logging.warning('type %s has already in estimator', type) return self.estimator[type] = Config() self.estimator[type].start_time = None self.estimator[type].current_time = None self.estimator[type].start_step = 0 self.estimator[type].current_step = 0 self.estimator[type].max_step = max_step
def _save_descript(self, descript): """Save result descript. :param descript: darts search result descript :type descript: dict or Config """ template_file = self.cfg.darts_template_file genotypes = self.search_alg.codec.calc_genotype(self.model.arch_weights) if template_file == "{default_darts_cifar10_template}": template = DefaultConfig().data.default_darts_cifar10_template elif template_file == "{default_darts_imagenet_template}": template = DefaultConfig().data.default_darts_imagenet_template else: dst = FileOps.join_path(self.trainer.get_local_worker_path(), os.path.basename(template_file)) FileOps.copy_file(template_file, dst) template = Config(dst) model_desc = self._gen_model_desc(genotypes, template) self.trainer.output_model_desc(self.trainer.worker_id, model_desc)
def genotypes_to_json(self, genotypes): """Transfer genotypes to json. :param genotypes: Genotype for models :type genotypes: namedtuple Genotype """ desc_list = [] if self.trainer.config.darts_template_file == "{default_darts_cifar10_template}": template = DartsNetworkTemplateConfig.cifar10 elif self.trainer.config.darts_template_file == "{default_darts_imagenet_template}": template = DartsNetworkTemplateConfig.imagenet for idx in range(len(genotypes)): template_cfg = Config(template) template_cfg.super_network.normal.genotype = genotypes[idx].normal template_cfg.super_network.reduce.genotype = genotypes[idx].reduce desc_list.append(template_cfg) return desc_list
def _train_esr_models(self, esr_models_file): esr_models_file = esr_models_file.replace("{local_base_path}", self.task.local_base_path) esr_models_file = os.path.abspath(esr_models_file) archs = np.load(esr_models_file) for i, arch in enumerate(archs): cls_trainer = ClassFactory.get_cls('trainer') cls_trainer.cfg.model_arch = arch model_cfg = ClassFactory.__configs__.get('model') if model_cfg: setattr(model_cfg, 'model_arch', arch) else: setattr(ClassFactory.__configs__, 'model', Config({'model_arch': arch})) if cls_trainer.cfg.get('horovod', False): self._do_horovod_fully_train() else: trainer = cls_trainer(None, id=i) self.master.run(trainer)
def _load_single_model_records(self): model_desc = PipeStepConfig.model.get("model_desc") model_desc_file = PipeStepConfig.model.get("model_desc_file") if model_desc_file: model_desc_file = model_desc_file.replace( "{local_base_path}", TaskOps().local_base_path) model_desc = Config(model_desc_file) if not model_desc: logger.error("Model desc or Model desc file is None.") return [] model_file = PipeStepConfig.model.get("model_file") if not model_file: logger.error("Model file is None.") return [] if not os.path.exists(model_file): logger.error("Model file is not existed.") return [] return ReportRecord().load_dict( dict(worker_id="1", desc=model_desc, weights_file=model_file))
def _init_model(self): """Initialize the model architecture for full train step. :return: train model :rtype: class """ model_cfg = ClassFactory.__configs__.get('model') if 'model_desc' in model_cfg and model_cfg.model_desc is not None: model_desc = model_cfg.model_desc else: raise ValueError('Model_desc is None for evaluator') search_space = Config({"search_space": model_desc}) self.codec = Codec(self.cfg.codec, search_space) self._get_selected_arch() indiv_cfg = self.codec.decode(self.elitism) logger.info('Model arch:{}'.format(self.elitism.active_net_list())) self.model_desc = self.elitism.active_net_list() net_desc = NetworkDesc(indiv_cfg) model = net_desc.to_model() return model