def fit_weights(module, x): """Fit weight.""" arch_params = module.module_arch_params if not arch_params: return None weights = module.get_weights() if arch_params.get('out_channels'): out_channels_idx = [idx for idx, value in enumerate(arch_params.out_channels) if value == 1] for name, weight in weights.items(): if weight is None: continue if 'BatchNorm' in name: module.set_weights(name, weight[out_channels_idx]) else: if is_torch_backend(): module.set_weights(name, weight[out_channels_idx, :, :, :]) else: module.set_weights(name, weight[:, :, :, out_channels_idx]) if arch_params.get('in_channels'): in_channels_idx = [idx for idx, value in enumerate(arch_params.in_channels) if value == 1] for name, weight in weights.items(): if weight is None or 'BatchNorm' in name: continue if weight is not None: if is_torch_backend(): module.set_weights(name, weight[:, in_channels_idx, :, :]) else: module.set_weights(name, weight[:, :, in_channels_idx, :]) return None
def before_train(self, logs=None): """Be called before the train process.""" self.config = self.trainer.config self.device = vega.is_gpu_device() if vega.is_gpu_device( ) is not True else 0 self.base_net_desc = self.trainer.model.desc sess_config = None if vega.is_torch_backend(): if vega.is_npu_device(): count_input = torch.FloatTensor(1, 3, 32, 32).npu() elif vega.is_gpu_device(): count_input = torch.FloatTensor(1, 3, 32, 32).to(self.device) elif vega.is_tf_backend(): count_input = tf.random.uniform([1, 3, 32, 32], dtype=tf.float32) sess_config = self.trainer._init_session_config() elif vega.is_ms_backend(): count_input = mindspore.Tensor( np.random.randn(1, 3, 32, 32).astype(np.float32)) self.flops_count, self.params_count = calc_model_flops_params( self.trainer.model, count_input) self.latency_count = calc_forward_latency(self.trainer.model, count_input, sess_config) logging.info("after prune model glops=%sM, params=%sK, latency=%sms", self.flops_count * 1e-6, self.params_count * 1e-3, self.latency_count * 1000) self.trainer.model = self._generate_init_model() if vega.is_torch_backend(): self.trainer.optimizer = Optimizer()( model=self.trainer.model, distributed=self.trainer.distributed) self.trainer.lr_scheduler = LrScheduler()(self.trainer.optimizer)
def fit_weights(module, x): """Fit weight.""" inputs = x[0] if isinstance(x, tuple) else x for name, weight in module.get_weights().items(): if weight is None: continue in_channels_axis = 1 if is_torch_backend() else 2 out_channels_axis = 0 if is_torch_backend() else 3 if 'BatchNorm' in name: out_channels_diff = int(module.out_channels) - int( weight.shape[0]) if out_channels_diff == 0: continue padding = [0, out_channels_diff] else: in_channels_diff = int(inputs.shape[1]) - int( weight.shape[in_channels_axis]) out_channels_diff = int(module.out_channels) - int( weight.shape[out_channels_axis]) if in_channels_diff == 0 and out_channels_diff == 0: continue padding = [0, 0, 0, 0, 0, 0, 0, 0] if in_channels_diff != 0: padding[5] = in_channels_diff module.in_channels += in_channels_diff if out_channels_diff != 0: padding[-1] = out_channels_diff module.set_weights(name, ops.pad(weight, padding)) return None
def _get_data_format(): if vega.is_torch_backend() or vega.is_ms_backend(): return 'channels_first' elif vega.is_tf_backend(): return 'channels_last' else: return None
def filter(self): """Apply mask to linear.""" if sum(self.mask_code) == 0: self.mask_code[0] = 1 mask_code = np.asarray(self.mask_code) idx_in = np.squeeze(np.argwhere(mask_code)).tolist() idx_in = [idx_in] if not isinstance(idx_in, list) else idx_in self.layer.in_features = sum(mask_code) weights = self.layer.get_weights() out_size = self.layer.out_features for name, weight in weights.items(): if 'kernel' in name or 'weight' in name: if is_torch_backend(): self.layer.set_weights(name, weight[:, idx_in]) out_size = weight.shape[0] else: self.layer.set_weights(name, weight[idx_in, :]) out_size = weight.shape[1] # fineTune out_feature value if self.layer.out_features == out_size: return idx_out = list( np.random.permutation(out_size)[:self.layer.out_features]) for name, weight in self.layer.get_weights().items(): if 'kernel' in name: self.layer.set_weights(name, weight[:, idx_out]) else: self.layer.set_weights(name, weight[idx_out]) self.layer.out_features = out_size
def _init_model(self): """Load model desc from save path and parse to model.""" model = self.trainer.model if self.trainer.config.is_detection_trainer: model_desc = self.trainer.model_desc or self._get_model_desc() else: model_desc = self._get_model_desc() pretrained_model_file = self._get_pretrained_model_file() if not model: if not model_desc: raise Exception( "Failed to Init model, can not get model description.") model = ModelZoo.get_model(model_desc, pretrained_model_file, ModelConfig.head) if model: if hasattr(model, "desc"): self.trainer.model_desc = model.desc if vega.is_torch_backend(): import torch if vega.is_gpu_device(): model = model.cuda() if General._parallel and General.devices_per_trainer > 1: model = torch.nn.DataParallel(model) elif vega.is_npu_device(): model = model.npu() if General._parallel and General.devices_per_trainer > 1: import torch.distributed as dist dist.init_process_group( backend='hccl', world_size=int(os.environ['WORLD_SIZE']), rank=int(os.environ['RANK_ID'])) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[int(os.environ['DEVICE_ID'])]) return model
def _train_single_model(self, model_desc=None, model_id=None, weights_file=None): cls_trainer = ClassFactory.get_cls(ClassType.TRAINER, PipeStepConfig.trainer.type) step_name = self.task.step_name if model_desc is not None: sample = dict(worker_id=model_id, desc=model_desc, step_name=step_name) record = ReportRecord().load_dict(sample) logging.debug("update record=%s", str(record)) trainer = cls_trainer(model_desc=model_desc, id=model_id, pretrained_model_file=weights_file) else: trainer = cls_trainer(None, 0) record = ReportRecord(trainer.step_name, trainer.worker_id, desc=trainer.model_desc) ReportClient().update(**record.to_dict()) # resume training if vega.is_torch_backend() and General._resume: trainer.load_checkpoint = True trainer._resume_training = True if self._distributed_training: self._do_distributed_fully_train(trainer) else: self._do_single_fully_train(trainer)
def __call__(self, model=None, distributed=False, **kwargs): """Call Optimizer class. :param model: model, used in torch case :param distributed: use distributed :return: optimizer """ params = self.map_config.get("params", {}) logging.debug("Call Optimizer. name={}, params={}".format( self.optim_cls.__name__, params)) optimizer = None try: if vega.is_torch_backend(): learnable_params = [ param for param in model.parameters() if param.requires_grad ] optimizer = self.optim_cls(learnable_params, **params) if distributed: optimizer = self.set_distributed(optimizer, model) elif vega.is_tf_backend(): optimizer = dynamic_optimizer(self.optim_cls, **params) elif vega.is_ms_backend(): if "dynamic_lr" in kwargs: params.update({"learning_rate": kwargs["dynamic_lr"]}) learnable_params = [ param for param in model.trainable_params() if param.requires_grad ] optimizer = self.optim_cls(learnable_params, **params) return optimizer except Exception as ex: logging.error("Failed to call Optimizer name={}, params={}".format( self.optim_cls.__name__, params)) raise ex
def step(self, train_x=None, train_y=None, valid_x=None, valid_y=None, lr=None, w_optimizer=None, w_loss=None, unrolled=None, scope_name=None): """Compute one step.""" if vega.is_torch_backend(): self.optimizer.zero_grad() loss = w_loss(self.model(valid_x), valid_y) loss.backward() self.optimizer.step() return elif vega.is_tf_backend(): self.lr = lr global_step = tf.compat.v1.train.get_global_step() loss_fn = self._init_loss() self.optimizer = self._init_arch_optimizer() logits = self.model(valid_x) logits = tf.cast(logits, tf.float32) loss = loss_fn(logits, valid_y) loss_scale = self.trainer_config.loss_scale if self.trainer_config.amp else 1. arch_op = self.model.get_weight_ops() if loss_scale != 1: scaled_grad_vars = self.optimizer.compute_gradients(loss * loss_scale, var_list=arch_op) unscaled_grad_vars = [(grad / loss_scale, var) for grad, var in scaled_grad_vars] minimize_op = self.optimizer.apply_gradients(unscaled_grad_vars, global_step) else: grad_vars = self.optimizer.compute_gradients(loss, var_list=arch_op) minimize_op = self.optimizer.apply_gradients(grad_vars, global_step) return minimize_op
def _reset_classifier_model(self): if vega.is_torch_backend(): # num_classes = ModelConfig.model_desc.backbone.n_class num_classes = ModelConfig.num_classes model = self.trainer.model out_features = num_classes # fix layers # for param in model.parameters(): # param.requires_grad = False # change head if "torch_vision_model" in ModelConfig.model_desc["modules"]: # torchvision import torch.nn as nn in_features = model.fc.in_features model.fc = nn.Linear(in_features, out_features).cuda() else: # vega in_features = model.fc.in_features from vega.modules.operators import ops model.fc = ops.Linear(in_features=in_features, out_features=out_features).cuda() # TODO n_class ModelConfig.model_desc.backbone.n_class = num_classes logging.info("Model fine tuned successfully.")
def after_valid(self, logs=None): """Be called after validation.""" if self.do_validation and self.valid_metrics is not None: # Get the summary of valid metrics metrics_results = self.valid_metrics.results if vega.is_torch_backend() and self.trainer.distributed: for key, value in metrics_results.items(): metrics_results[key] = self.trainer._metric_average( value, key) if 'loss' in metrics_results: metrics_results.pop('loss') if 'global_step' in metrics_results: metrics_results.pop('global_step') self.cur_valid_perfs = metrics_results logs.update({'cur_valid_perfs': self.cur_valid_perfs}) # update best valid perfs based on current valid valid_perfs if self.best_valid_perfs is None: self.best_valid_changed = True self.best_valid_perfs = self.cur_valid_perfs else: self.best_valid_changed = self._update_best_perfs( self.cur_valid_perfs, self.best_valid_perfs) logs.update({ 'cur_valid_perfs': self.cur_valid_perfs, 'best_valid_perfs': self.best_valid_perfs, 'best_valid_perfs_changed': self.best_valid_changed })
def __init__(self, load_path=None): """Construct MobileNetV3Tiny class. :param load_path: path for saved model """ super(MobileNetV3Tiny, self).__init__() input_channel = 9 features = [ conv_bn_relu6(inchannel=3, outchannel=input_channel, kernel=3, stride=2) ] for _, lst in enumerate(self.inverted_residual_setting): output_channel = lst[1] features.append( InvertedResidual(inp=input_channel, oup=output_channel, stride=lst[2], expand_ratio=lst[0])) input_channel = output_channel self.block = OutlistSequential(*features, out_list=[3, 6, 13, 17]) if load_path is not None and is_torch_backend(): import torch self.load_state_dict(torch.load(load_path), strict=False)
def _generate_init_model(self): """Generate init model by loading pretrained model. :return: initial model after loading pretrained model :rtype: torch.nn.Module """ model_init = self._new_model_init() chn_mask = self._init_chn_node_mask() if vega.is_torch_backend(): checkpoint = torch.load(self.config.init_model_file + '.pth') model_init.load_state_dict(checkpoint) model = PruneMobileNet(model_init).apply(chn_mask) model.to(self.device) elif vega.is_tf_backend(): model = model_init with tf.compat.v1.Session( config=self.trainer._init_session_config()) as sess: saver = tf.compat.v1.train.import_meta_graph("{}.meta".format( self.config.init_model_file)) saver.restore(sess, self.config.init_model_file) all_weight = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.VARIABLES) all_weight = [ t for t in all_weight if not t.name.endswith('Momentum:0') ] PruneMobileNet(all_weight).apply(chn_mask) save_file = FileOps.join_path( self.trainer.get_local_worker_path(), 'prune_model') saver.save(sess, save_file) elif vega.is_ms_backend(): parameter_dict = load_checkpoint(self.config.init_model_file) load_param_into_net(model_init, parameter_dict) model = PruneMobileNet(model_init).apply(chn_mask) return model
def load_checkpoint(self, worker_id=None, step_name=None, saved_folder=None): """Load checkpoint.""" if saved_folder is None: if worker_id is None: worker_id = self.worker_id if step_name is None: step_name = self.step_name saved_folder = self.get_local_worker_path(step_name, worker_id) checkpoint_file = FileOps.join_path(saved_folder, self.checkpoint_file_name) model_pickle_file = FileOps.join_path(saved_folder, self.model_pickle_file_name) try: with open(model_pickle_file, 'rb') as f: model = pickle.load(f) if vega.is_torch_backend(): ckpt = torch.load(checkpoint_file, map_location=torch.device('cpu')) model.load_state_dict(ckpt['weight']) if self.config.cuda: model = model.cuda() elif vega.is_tf_backend(): FileOps.copy_folder(saved_folder, self.get_local_worker_path()) self.model = model except Exception: logging.info( 'Checkpoint file is not existed, use default model now.') return
def __init__(self, genotype, steps, concat, reduction, reduction_prev=None, C_prev_prev=None, C_prev=None, C=None): """Init Cell.""" super(Cell, self).__init__() self.genotype = genotype self.steps = steps self.concat = concat self.reduction = reduction self.reduction_prev = reduction_prev self.C_prev_prev = C_prev_prev self.C_prev = C_prev self.C = C self.concat_size = 0 affine = True if isinstance(self.genotype[0][0], list): affine = False if self.reduction_prev: self.preprocess0 = FactorizedReduce(self.C_prev_prev, self.C, affine) else: self.preprocess0 = ReLUConvBN(self.C_prev_prev, self.C, 1, 1, 0, affine) self.preprocess1 = ReLUConvBN(self.C_prev, self.C, 1, 1, 0, affine) op_names, indices_out, indices_inp = zip(*self.genotype) self.build_ops(self.C, op_names, indices_out, indices_inp, self.concat, self.reduction) self.concat_size = len(self.concat) self.torch_flag = vega.is_torch_backend()
def before_train(self, logs=None): """Be called before the train process.""" self.config = self.trainer.config model_code = copy.deepcopy(self.trainer.model.desc) model = self.trainer.model logging.info('current code: %s, %s', model_code.nbit_w_list, model_code.nbit_a_list) quantizer = Quantizer(model, model_code.nbit_w_list, model_code.nbit_a_list) model = quantizer() self.trainer.model = model count_input = [1, 3, 32, 32] sess_config = None if vega.is_torch_backend(): model = model.cuda() self.trainer.optimizer = Optimizer()( model=self.trainer.model, distributed=self.trainer.distributed) self.trainer.lr_scheduler = LrScheduler()(self.trainer.optimizer) count_input = torch.FloatTensor(*count_input).cuda() elif vega.is_tf_backend(): tf.compat.v1.reset_default_graph() count_input = tf.random.uniform(count_input, dtype=tf.float32) sess_config = self.trainer._init_session_config() self.flops_count, self.params_count = calc_model_flops_params( model, count_input, custom_hooks=quantizer.custom_hooks()) self.latency_count = calc_forward_latency(model, count_input, sess_config) logging.info("after quant model glops=%sM, params=%sK, latency=%sms", self.flops_count * 1e-6, self.params_count * 1e-3, self.latency_count * 1000) self.validate()
def before_train(self, logs=None): """Be called before the training process.""" self.input = None self.flops = None self.params = None self.latency = None self.calc_params_each_epoch = self.trainer.config.calc_params_each_epoch self.calc_latency = self.trainer.config.calc_latency if vega.is_tf_backend(): import tensorflow as tf datasets = self.trainer.valid_input_fn() data_iter = tf.compat.v1.data.make_one_shot_iterator(datasets) # data_iter = self.trainer.valid_input_fn().make_one_shot_iterator() input_data, _ = data_iter.get_next() self.input = input_data[:1] elif vega.is_torch_backend(): for batch in self.trainer.valid_loader: batch = self.trainer._set_device(batch) if isinstance(batch, dict): self.input = batch elif isinstance(batch, list) and isinstance(batch[0], dict): self.input = batch[:1] else: # classification self.input = batch[0][:1] break self.update_flops_params(logs=logs)
def load_records_from_model_folder(cls, model_folder): """Transfer json_file to records.""" if not model_folder or not os.path.exists(model_folder): logging.error("Failed to load records from model folder, folder={}".format(model_folder)) return [] records = [] pattern = FileOps.join_path(model_folder, "desc_*.json") files = glob.glob(pattern) for _file in files: try: with open(_file) as f: worker_id = _file.split(".")[-2].split("_")[-1] weights_file = os.path.join(os.path.dirname(_file), "model_{}".format(worker_id)) if vega.is_torch_backend(): weights_file = '{}.pth'.format(weights_file) elif vega.is_ms_backend(): weights_file = '{}.ckpt'.format(weights_file) if not os.path.exists(weights_file): weights_file = None sample = dict(worker_id=worker_id, desc=json.load(f), weights_file=weights_file) record = ReportRecord().load_dict(sample) records.append(record) except Exception as ex: logging.info('Can not read records from json because {}'.format(ex)) return records
def _train_single_model(self, model_desc=None, model_id=None, weights_file=None): cls_trainer = ClassFactory.get_cls('trainer') step_name = self.task.step_name if model_desc is not None: sample = dict(worker_id=model_id, desc=model_desc, step_name=step_name) record = ReportRecord().load_dict(sample) logging.debug("Broadcast Record=%s", str(record)) trainer = cls_trainer(model_desc=model_desc, id=model_id, pretrained_model_file=weights_file) else: trainer = cls_trainer(None, 0) record = ReportRecord(trainer.step_name, trainer.worker_id, desc=trainer.model_desc) ReportClient.broadcast(record) ReportServer.add_watched_var(trainer.step_name, trainer.worker_id) # resume training if vega.is_torch_backend() and General._resume: trainer.load_checkpoint = True trainer._resume_training = True if self._distributed_training: self._do_distributed_fully_train(trainer) else: self._do_single_fully_train(trainer)
def eval_model_parameters(model): """Calculate number of parameters in million (M) for a model. :param model: A model :type model: nn.Module :return: The number of parameters :rtype: Float """ if vega.is_torch_backend(): return np.sum(v.numel() for name, v in model.named_parameters() if "auxiliary" not in name) / 1e6 else: import tensorflow as tf tf.compat.v1.reset_default_graph() dummy_input = tf.compat.v1.placeholder( dtype=tf.float32, shape=[1, 32, 32, 3] if model.data_format == 'channels_last' else [1, 3, 32, 32]) model.training = True model(dummy_input) all_weight = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) weight_op = [t for t in all_weight if "auxiliary" not in t.name] return np.sum([np.prod(t.get_shape().as_list()) for t in weight_op]) * 1e-6
def _to_tensor(self, data): if vega.is_torch_backend(): import torch return torch.tensor(data) elif vega.is_tf_backend(): import tensorflow as tf return tf.convert_to_tensor(data)
def fit_weights(module, x): """Fit weights shape.""" inputs = x[0] if isinstance(x, tuple) else x in_features_diff = 0 for name, weight in module.get_weights().items(): if 'kernel' in name or 'weight' in name: in_features_diff = int(inputs.shape[1]) - int( weight.shape[1 if is_torch_backend() else 0]) if in_features_diff == 0: continue padding = [0, in_features_diff] if is_torch_backend() else [ 0, in_features_diff, 0, 0 ] module.set_weights(name, ops.pad(weight, padding)) if module.in_features: module.in_features += in_features_diff return None
def get_named_modules(layer): """Get named modules.""" if vega.is_tf_backend(): return [(op.name, op) for op in layer] elif vega.is_torch_backend(): return layer.named_modules() elif vega.is_ms_backend(): return layer._children_scope_recursive()
def _infer(args, loader, model=None): """Choose backend.""" if vega.is_torch_backend(): return _infer_pytorch(model, loader) elif vega.is_tf_backend(): return _infer_tf(args, model, loader) elif vega.is_ms_backend(): return _infer_ms(args, model, loader)
def set_model(self, model): """Set model.""" self.model = model if vega.is_torch_backend(): if self.parallel: self.model = self.model.module self.loss = self._init_loss().cuda() self.optimizer = self._init_arch_optimizer(self.model)
def _get_model(args): """Get model.""" from vega.model_zoo import ModelZoo model = ModelZoo.get_model(args.model_desc, args.model) if vega.is_torch_backend(): if args.device == "GPU": model = model.cuda() model.eval() return model
def get_shape(layer): """Get weight shape.""" if vega.is_tf_backend(): return layer.get_shape() elif vega.is_torch_backend(): return layer.weight.data.shape elif vega.is_ms_backend(): para_name = list(layer._params)[0] return getattr(layer, para_name).default_input.shape
def _init_horovod_setting(self): """Init horovod setting.""" self.is_chief = True if self.distributed and vega.is_torch_backend(): hvd.broadcast_parameters(self.model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(self.optimizer, root_rank=0) if hvd.rank() != 0: self.is_chief = False else: self.is_chief = True
def filter_in_channels(self, mask_code): """Mask in channels.""" filter_idx = self._make_mask(mask_code) weights = self.layer.get_weights() self.layer.in_channels = sum(mask_code) for name, weight in weights.items(): if weight is not None: if is_torch_backend(): self.layer.set_weights(name, weight[:, filter_idx, :, :]) else: self.layer.set_weights(name, weight[:, :, filter_idx, :])
def Adapter(dataset): """Adapter of dataset.""" if vega.is_torch_backend(): from .pytorch.adapter import TorchAdapter as Adapter elif vega.is_tf_backend(): from .tensorflow.adapter import TfAdapter as Adapter elif vega.is_ms_backend(): from .mindspore.adapter import MsAdapter as Adapter else: raise ValueError return Adapter(dataset)