def _init_transforms(self): """Initialize transforms method. :return: a list of object :rtype: list """ if "transforms" in self.args.keys(): transforms = list() if not isinstance(self.args.transforms, list): self.args.transforms = [self.args.transforms] for i in range(len(self.args.transforms)): transform_name = self.args.transforms[i].pop("type") kwargs = self.args.transforms[i] if ClassFactory.is_exists(ClassType.TRANSFORM, transform_name): transforms.append( ClassFactory.get_cls(ClassType.TRANSFORM, transform_name)(**kwargs)) else: transforms.append( getattr( importlib.import_module('torchvision.transforms'), transform_name)(**kwargs)) return transforms else: return list()
def _use_evaluator(self): """Check if use evaluator and get the evaluators. :return: if we used evaluator, and Evaluator classes :rtype: bool, (Evaluator, GpuEvaluator, DloopEvaluator) """ use_evaluator = False cls_evaluator_set = [] try: cls_gpu_evaluator = ClassFactory.get_cls(ClassType.GPU_EVALUATOR) use_evaluator = True cls_evaluator_set.append(cls_gpu_evaluator) except Exception as e: logger.warning("evaluator not been set. {}".format(str(e))) try: cls_hava_d_evaluator = ClassFactory.get_cls( ClassType.HAVA_D_EVALUATOR) use_evaluator = True cls_evaluator_set.append(cls_hava_d_evaluator) except: pass try: cls_davinci_mobile_evaluator = ClassFactory.get_cls( ClassType.DAVINCI_MOBILE_EVALUATOR) use_evaluator = True cls_evaluator_set.append(cls_davinci_mobile_evaluator) except: pass return use_evaluator, cls_evaluator_set
def __new__(cls, hps=None, **kwargs): """Construct method.""" if "dataset_type" in kwargs.keys(): t_cls = ClassFactory.get_cls(ClassType.DATASET, kwargs["dataset_type"]) else: t_cls = ClassFactory.get_cls(ClassType.DATASET) return super(Dataset, cls).__new__(t_cls)
def __new__(cls, *args, **kwargs): """Create a subclass instance of dataset.""" if Dataset in cls.__bases__: return super().__new__(cls) if kwargs.get('type'): t_cls = ClassFactory.get_cls(ClassType.DATASET, kwargs.pop('type')) else: t_cls = ClassFactory.get_cls(ClassType.DATASET) return super().__new__(t_cls)
def _init_callbacks(self, callbacks): # Initialize callbacks by configuration or parameters if callbacks is None: _callbacks = [] callbacks_config = self.cfg.callbacks.copy() for callback_config in callbacks_config.values(): callback_name = callback_config.pop('type') if ClassFactory.is_exists(ClassType.CALLBACK, callback_name): callback_class = ClassFactory.get_cls( ClassType.CALLBACK, callback_name) callback = callback_class(**callback_config) _callbacks.append(callback) else: raise ValueError( "Undefined callback {}".format(callback_name)) else: _callbacks = callbacks # Sort the callbacks metrics_evaluator = None model_checkpoint = None model_statistics = None predefined_callbacks = [] customized_callbacks = [] for callback in _callbacks: if isinstance(callback, self._predefined_callbacks()): if isinstance(callback, MetricsEvaluator): metrics_evaluator = callback if isinstance(callback, ModelStatistics): model_statistics = callback if isinstance(callback, ModelCheckpoint): model_checkpoint = callback else: predefined_callbacks.append(callback) else: customized_callbacks.append(callback) if metrics_evaluator is None: metrics_evaluator = MetricsEvaluator() if model_checkpoint is None: model_checkpoint = ModelCheckpoint() _callbacks = [metrics_evaluator, model_checkpoint] + \ customized_callbacks + predefined_callbacks if 'model_statistic' in self.cfg and self.cfg.model_statistic: if model_statistics is None: model_statistics = ModelStatistics() _callbacks = [model_statistics] + _callbacks # Creat Callbacklist and set its trainer and pramameters self.callbacks = CallbackList(_callbacks) _callbacks_params = { 'epochs': self.epochs, 'is_chief': self.is_chief, 'use_cuda': self.use_cuda, 'do_validation': self.do_validation, 'is_detection_trainer': self.cfg.is_detection_trainer } self.callbacks.set_params(_callbacks_params) self.callbacks.set_trainer(self)
def register_network_cls(t_cls): if t_cls.__name__ in cls.__network_registry__[net_type]: raise ValueError( "Cannot register duplicate network ({})".format( t_cls.__name__)) # TODO: Unified networkFactory and classFactory if net_type == NetTypes.LOSS: ClassFactory.register_cls(t_cls, ClassType.LOSS) cls.__network_registry__[net_type][t_cls.__name__] = t_cls return t_cls
def __new__(cls, *args, **kwargs): """Create search algorithm instance by ClassFactory.""" if cls.__name__ != 'SearchAlgorithm': return super().__new__(cls) if kwargs.get('type'): t_cls = ClassFactory.get_cls(ClassType.SEARCH_ALGORITHM, kwargs.pop('type')) else: t_cls = ClassFactory.get_cls(ClassType.SEARCH_ALGORITHM) return super().__new__(t_cls)
def __init__(self, aux_weight, loss_base): """Init MixAuxiliaryLoss.""" self.aux_weight = aux_weight loss_base_cp = loss_base.copy() loss_base_name = loss_base_cp.pop('type') if ClassFactory.is_exists('trainer.loss', loss_base_name): loss_class = ClassFactory.get_cls('trainer.loss', loss_base_name) else: loss_class = getattr(importlib.import_module('tensorflow.losses'), loss_base_name) self.loss_fn = loss_class(**loss_base_cp)
def _init_lr_scheduler(self, scheduler=None): """Init lr scheduler from torch.optim.lr_scheduler according to type in config.""" if scheduler is not None: return scheduler scheduler_config = self.cfg.lr_scheduler.copy() scheduler_name = scheduler_config.pop('type') if ClassFactory.is_exists(ClassType.LR_SCHEDULER, scheduler_name): scheduler_class = ClassFactory.get_cls(ClassType.LR_SCHEDULER, scheduler_name) else: scheduler_class = getattr( importlib.import_module('torch.optim.lr_scheduler'), scheduler_name) return scheduler_class(self.optimizer, **scheduler_config)
def _init_after_scheduler(self): """Init after_scheduler with after_scheduler_config.""" if isinstance(self.after_scheduler_config, dict): scheduler_config = copy.deepcopy(self.after_scheduler_config) print("after_scheduler_config: {}".format(scheduler_config)) scheduler_name = scheduler_config.pop('type') if ClassFactory.is_exists(ClassType.LR_SCHEDULER, scheduler_name): scheduler_class = ClassFactory.get_cls(ClassType.LR_SCHEDULER, scheduler_name) else: scheduler_class = getattr(importlib.import_module('torch.optim.lr_scheduler'), scheduler_name) self.after_scheduler = scheduler_class(self.optimizer, **scheduler_config)
def _init_dataloader(self, mode, loader=None): """Init dataloader.""" if loader is not None: return loader dataset_cls = ClassFactory.get_cls(ClassType.DATASET) dataset = dataset_cls(mode=mode) return dataset.dataloader
def decorator(cls): """Provide input param to decorator. :param func: wrapper function :return: decoratpr """ # TODO: 需要导入包 if isinstance(class_name, str): need_validate_cls = ClassFactory.get_cls(ClassType.CONFIG, class_name) else: need_validate_cls = class_name @wraps(cls) def wrapper(*args, **kwargs): """Make function as a wrapper.""" valid_attrs = { key: item for key, item in cls.__dict__.items() if not key.startswith('_') } for attr_name, rules in valid_attrs.items(): attr_value = getattr(need_validate_cls, attr_name) if isinstance(rules, list) or isinstance(rules, tuple): for _rule in rules: _rule(attr_value) else: rules(attr_value) return cls(*args, **kwargs) return wrapper
def run(self): """Execute the whole pipeline.""" def _shutdown_cluster(signum, frame): logging.info("Shutdown urgently.") Master.shutdown() os._exit(0) try: signal.signal(signal.SIGINT, _shutdown_cluster) signal.signal(signal.SIGTERM, _shutdown_cluster) for step_name in PipelineConfig.steps: step_cfg = UserConfig().data.get(step_name) General.step_name = step_name ClassFactory().set_current_step(step_cfg) # load Config obj form desc load_conf_from_desc(PipeStepConfig, step_cfg) logger.info("Start pipeline step: [{}]".format(step_name)) PipeStep().do() except Exception: logger.error("Failed to run pipeline.") logger.error(traceback.format_exc()) try: Master.shutdown() except Exception: logger.error("Failed to shutdown dask cluster.") logger.error(traceback.format_exc())
def _evaluate_single_model(self, id=None, desc_file=None, pretrained_model=None): try: cls_gpu_evaluator = ClassFactory.get_cls(ClassType.GPU_EVALUATOR) except Exception: logger.error( "Failed to create Evaluator, please check the config file.") logger.error(traceback.format_exc()) return if desc_file and pretrained_model is not None: cls_gpu_evaluator.cfg.model_desc_file = desc_file model_cfg = ClassFactory.__configs__.get('model') if model_cfg: setattr(model_cfg, 'model_desc_file', desc_file) else: setattr(ClassFactory.__configs__, 'model', Config({'model_desc_file': desc_file})) cls_gpu_evaluator.cfg.pretrained_model_file = pretrained_model try: evaluator = cls_gpu_evaluator() evaluator.train_process() evaluator.output_evaluate_result(id, evaluator.evaluate_result) except Exception: logger.error( "Failed to evaluate model, id={}, desc_file={}, pretrained_model={}" .format(id, desc_file, pretrained_model)) logger.error(traceback.format_exc()) return
def _evaluate_esr_models(self, esr_models_file, models_folder): models_folder = models_folder.replace("{local_base_path}", self.task.local_base_path) models_folder = os.path.abspath(models_folder) esr_models_file = esr_models_file.replace("{local_base_path}", self.task.local_base_path) esr_models_file = os.path.abspath(esr_models_file) archs = np.load(esr_models_file) for i, arch in enumerate(archs): try: cls_gpu_evaluator = ClassFactory.get_cls( ClassType.GPU_EVALUATOR) except Exception: logger.error( "Failed to create Evaluator, please check the config file") logger.error(traceback.format_exc()) return pretrained_model = FileOps.join_path(models_folder, "model_{}.pth".format(i)) if not os.path.exists(pretrained_model): logger.error("Failed to find model file, file={}".format( pretrained_model)) cls_gpu_evaluator.cfg.model_arch = arch cls_gpu_evaluator.cfg.pretrained_model_file = pretrained_model try: evaluator = cls_gpu_evaluator() evaluator.train_process() evaluator.output_evaluate_result(i, evaluator.evaluate_result) except Exception: logger.error( "Failed to evaluate model, id={}, pretrained_model={}". format(i, pretrained_model)) logger.error(traceback.format_exc()) return
def _evaluate_single_model(self, record): try: cls_gpu_evaluator = ClassFactory.get_cls(ClassType.GPU_EVALUATOR) except Exception: logger.error( "Failed to create Evaluator, please check the config file.") logger.error(traceback.format_exc()) return try: worker_info = { "step_name": record.step_name, "worker_id": record.worker_id } _record = dict(worker_id=record.worker_id, desc=record.desc, step_name=record.step_name) _init_record = ReportRecord().load_dict(_record) Report().broadcast(_init_record) evaluator = cls_gpu_evaluator(worker_info=worker_info, model_desc=record.desc, weights_file=record.weights_file) self.master.run(evaluator) except Exception: logger.error( "Failed to evaluate model, worker info={}".format(worker_info)) logger.error(traceback.format_exc()) return
def create_search_space(desc): """Create one search space from desc.""" param = deepcopy(desc) module_type = param.pop('type') if module_type == 'FineGrainedSpace': return FineGrainedSpaceFactory.from_desc(param) module = ClassFactory.get_cls(ClassType.SEARCH_SPACE, module_type) return module(**param) if param else module()
def _init_dataloader(self): """Init dataloader.""" data_cls = ClassFactory.get_cls(ClassType.DATASET) data_cfg = copy.deepcopy(ClassFactory.__configs__.get(ClassType.DATASET)) data_cfg.pop('type') self.train_data, self.valid_data = [ data_cls(**data_cfg, mode=mode) for mode in ['train', 'val'] ]
def __init__(self, metric_cfg=None): """Init Metrics.""" self.mdict = {} metric_config = obj2config(self.config) if not isinstance(metric_config, list): metric_config = [metric_config] for metric_item in metric_config: ClassFactory.get_cls(ClassType.METRIC, self.config.type) metric_name = metric_item.pop('type') metric_class = ClassFactory.get_cls(ClassType.METRIC, metric_name) if isfunction(metric_class): metric_class = partial(metric_class, **metric_item.get("params", {})) else: metric_class = metric_class(**metric_item.get("params", {})) self.mdict[metric_name] = metric_class self.mdict = Config(self.mdict) self.metric_results = dict()
def _init_loss(self, loss_fn=None): """Init loss function from torch according to type in config.""" if loss_fn is not None: return loss_fn loss_config = self.cfg.loss.copy() loss_name = loss_config.pop('type') if NetworkFactory.is_exists(NetTypes.LOSS, loss_name): loss_class = NetworkFactory.get_network(NetTypes.LOSS, loss_name) elif ClassFactory.is_exists('trainer.loss', loss_name): loss_class = ClassFactory.get_cls('trainer.loss', loss_name) else: loss_class = getattr(importlib.import_module('torch.nn'), loss_name) loss_fn = loss_class(**loss_config) if self.cfg.cuda: loss_fn = loss_fn.cuda() return loss_fn
def get_search_space(cls_name): """Get Search Space by class name. :param cls_name: class name :return: Search Space cls """ return ClassFactory.get_cls(ClassType.SEARCH_SPACE, cls_name, bing_cfg=False)
def __init__(self, metric_cfg): """Init Metrics.""" metric_config = deepcopy(metric_cfg) self.mdict = {} if not isinstance(metric_config, list): metric_config = [metric_config] for metric_item in metric_config: metric_name = metric_item.pop('type') if ClassFactory.is_exists(ClassType.METRIC, metric_name): metric_class = ClassFactory.get_cls(ClassType.METRIC, metric_name) else: metric_class = getattr( importlib.import_module('vega.core.metrics'), metric_name) if isfunction(metric_class): metric_class = partial(metric_class, **metric_item) else: metric_class = metric_class(**metric_item) self.mdict[metric_name] = metric_class self.mdict = Config(self.mdict)
def _init_optimizer(self, optimizer=None): """Init optimizer from torch.optim according to optim type in config.""" if optimizer is not None: return optimizer optim_config = self.cfg.optim.copy() optim_name = optim_config.pop('type') if ClassFactory.is_exists(ClassType.OPTIM, optim_name): optim_class = ClassFactory.get_cls(ClassType.OPTIM, optim_name) else: optim_class = getattr(importlib.import_module('torch.optim'), optim_name) learnable_params = [ param for param in self.model.parameters() if param.requires_grad ] optimizer = optim_class(learnable_params, **optim_config) if self.horovod: optimizer = hvd.DistributedOptimizer( optimizer, named_parameters=self.model.named_parameters(), compression=hvd.Compression.none) return optimizer
def import_torch_operators(): """Import search space operators from torch.""" ops = Config() for _name in dir(nn): if _name.startswith("_"): continue _cls = getattr(nn, _name) if not isclass(_cls): continue ops[_name] = ClassFactory.register_cls(_cls, ClassType.SEARCH_SPACE) return ops
def _init_dataloader(self, mode, loader=None): """Init dataloader.""" if loader is not None: return loader if mode == "train" and self.hps is not None and self.hps.get( "dataset") is not None: dataset_cls = ClassFactory.get_cls(ClassType.DATASET) dataset = dataset_cls(mode=mode, hps=self.hps.get("dataset")) else: dataset_cls = ClassFactory.get_cls(ClassType.DATASET) dataset = dataset_cls(mode=mode) if vega.is_torch_backend(): if self.distributed: sampler = torch.utils.data.distributed.DistributedSampler( dataset, num_replicas=hvd.size(), rank=hvd.rank()) dataset.sampler = sampler return dataset.dataloader elif vega.is_tf_backend(): if self.distributed: dataset.set_distributed(self._world_size, self._rank_id) return dataset
def append(self, *args, **kwargs): """Append a transform to the end of the list. :param *args: positional arguments :type *args: tuple :param ** kwargs: keyword argumnets :type ** kwargs: dict """ if isinstance(args[0], str): transform = ClassFactory.get_cls(ClassType.TRANSFORM, args[0]) self.__transform__.append(transform(**kwargs)) else: self.__transform__.append(args[0])
def _init_loss(self): """Init loss.""" if vega.is_torch_backend(): loss_config = self.criterion.copy() loss_name = loss_config.pop('type') loss_class = getattr(importlib.import_module('torch.nn'), loss_name) return loss_class(**loss_config) elif vega.is_tf_backend(): from inspect import isclass loss_config = self.config.tf_criterion.copy() loss_name = loss_config.pop('type') if ClassFactory.is_exists('trainer.loss', loss_name): loss_class = ClassFactory.get_cls('trainer.loss', loss_name) if isclass(loss_class): return loss_class(**loss_config) else: return partial(loss_class, **loss_config) else: loss_class = getattr( importlib.import_module('tensorflow.losses'), loss_name) return partial(loss_class, **loss_config)
def _get_callbacks(self, customs, disables): defaults = [] if vega.is_torch_backend(): defaults = [ "ModelStatistics", "MetricsEvaluator", "ModelCheckpoint", "PerformanceSaver", "LearningRateScheduler", "ProgressLogger", "ReportCallback" ] elif vega.is_tf_backend(): defaults = [ "ModelStatistics", "MetricsEvaluator", "PerformanceSaver", "ProgressLogger", "ReportCallback" ] custom_disables = [] disables = disables if disables else [] customs = customs if customs else [] if customs: if isinstance(customs, str): customs = [customs] for customs_name in customs: callback_class = ClassFactory.get_cls(ClassType.CALLBACK, customs_name) # Sort the callbacks if hasattr(callback_class, "disable_callbacks"): _disables = callback_class.disable_callbacks if not isinstance(_disables, list): _disables = [_disables] custom_disables = _disables callbacks = set([ _cls for _cls in defaults + customs if _cls not in disables + custom_disables ]) callbacks = [ ClassFactory.get_cls(ClassType.CALLBACK, _cls)() for _cls in callbacks ] callbacks = sorted(callbacks, key=lambda callback: callback.priority) return callbacks
def _init_after_scheduler(self): """Init after_scheduler with after_scheduler_config.""" if isinstance(self.after_scheduler_config, dict): scheduler_config = copy.deepcopy(self.after_scheduler_config) print("after_scheduler_config: {}".format(scheduler_config)) scheduler_name = scheduler_config.pop('type') if ClassFactory.is_exists(ClassType.LR_SCHEDULER, scheduler_name): scheduler_class = ClassFactory.get_cls(ClassType.LR_SCHEDULER, scheduler_name) else: scheduler_class = getattr( importlib.import_module('torch.optim.lr_scheduler'), scheduler_name) if scheduler_class.__name__ == "CosineAnnealingLR": if scheduler_config.get("T_max", -1) == -1: if scheduler_config.get("by_epoch", True): scheduler_config["T_max"] = self.epochs else: scheduler_config["T_max"] = self.epochs * self.steps self.after_scheduler = scheduler_class(self.optimizer, **scheduler_config)
def do(self): """Do the main task in this pipe step.""" logger.info("SpNasPipeStep started") while not self.generator.is_completed: id, spnas_sample = self.generator.search_alg.search() cls_trainer = ClassFactory.get_cls('trainer') trainer = cls_trainer(spnas_sample=spnas_sample, id=id) logging.info("submit trainer(id={})!".format(id)) self.master.run(trainer) finished_trainer_info = self.master.pop_finished_worker() logger.debug(finished_trainer_info) self.update_generator(self.generator, finished_trainer_info) self.master.join() finished_trainer_info = self.master.pop_all_finished_train_worker() self.update_generator(self.generator, finished_trainer_info)