class TestInputDict(unittest.TestCase): def setUp(self): model = ModelInputDict() sp_net_config = supernet(expand_ratio=[0.5, 1.0]) self.model = Convert(sp_net_config).convert(model) self.images = paddle.randn(shape=[2, 3, 32, 32], dtype='float32') self.images2 = { 'data': paddle.randn(shape=[2, 12, 32, 32], dtype='float32') } default_run_config = {'skip_layers': ['conv1.0', 'conv2.0']} self.run_config = RunConfig(**default_run_config) self.ofa_model = OFA(self.model, run_config=self.run_config) self.ofa_model._clear_search_space(self.images, data=self.images2) def test_export(self): config = self.ofa_model._sample_config(task="expand_ratio", sample_type="smallest") self.ofa_model.export(config, input_shapes=[[1, 3, 32, 32], { 'data': [1, 12, 32, 32] }], input_dtypes=['float32', 'float32'])
class TestExport(unittest.TestCase): def setUp(self): self._init_model() def _init_model(self): self.origin_model = ModelOriginLinear() model = ModelLinear() self.ofa_model = OFA(model) def test_ofa(self): config = { 'embedding_1': { 'expand_ratio': (2.0) }, 'linear_3': { 'expand_ratio': (2.0) }, 'linear_4': {}, 'linear_5': {} } origin_dict = {} for name, param in self.origin_model.named_parameters(): origin_dict[name] = param.shape self.ofa_model.export(self.origin_model, config, input_shapes=[[1, 64]], input_dtypes=['int64']) for name, param in self.origin_model.named_parameters(): if name in config.keys(): if 'expand_ratio' in config[name]: assert origin_dict[name][ -1] == param.shape[-1] * config[name]['expand_ratio']
class TestExportCase1(unittest.TestCase): def setUp(self): model = ModelLinear1() data_np = np.random.random((3, 64)).astype(np.int64) self.data = paddle.to_tensor(data_np) self.ofa_model = OFA(model) self.ofa_model.set_epoch(0) outs, _ = self.ofa_model(self.data) self.config = self.ofa_model.current_config def test_export_model(self): self.ofa_model.export( self.config, input_shapes=[[3, 64]], input_dtypes=['int64']) assert len(self.ofa_model.ofa_layers) == 4
class TestOFAV2Export(unittest.TestCase): def setUp(self): model = ModelV1(name='export') sp_net_config = supernet(expand_ratio=[0.25, 0.5, 1.0]) self.model = Convert(sp_net_config).convert(model) self.images = paddle.randn(shape=[2, 3, 32, 32], dtype='float32') self.ofa_model = OFA(self.model) def test_export(self): origin_model = ModelV1(name='origin') net_config = {'model.0': {}} self.ofa_model.export(net_config, input_shapes=[1, 3, 32, 32], input_dtypes=['float32'], origin_model=origin_model)
def do_train(args): paddle.set_device("gpu" if args.n_gpu else "cpu") args.model_type = args.model_type.lower() model_class, tokenizer_class = MODEL_CLASSES[args.model_type] config_path = os.path.join(args.model_name_or_path, 'model_config.json') cfg_dict = dict(json.loads(open(config_path).read())) num_labels = cfg_dict['num_classes'] model = model_class.from_pretrained(args.model_name_or_path, num_classes=num_labels) origin_model = model_class.from_pretrained(args.model_name_or_path, num_classes=num_labels) sp_config = supernet(expand_ratio=[1.0, args.width_mult]) model = Convert(sp_config).convert(model) ofa_model = OFA(model) sd = paddle.load( os.path.join(args.model_name_or_path, 'model_state.pdparams')) ofa_model.model.set_state_dict(sd) best_config = utils.dynabert_config(ofa_model, args.width_mult) ofa_model.export(best_config, input_shapes=[[1, args.max_seq_length], [1, args.max_seq_length]], input_dtypes=['int64', 'int64'], origin_model=origin_model) for name, sublayer in origin_model.named_sublayers(): if isinstance(sublayer, paddle.nn.MultiHeadAttention): sublayer.num_heads = int(args.width_mult * sublayer.num_heads) output_dir = os.path.join(args.sub_model_output_dir, "model_width_%.5f" % args.width_mult) if not os.path.exists(output_dir): os.makedirs(output_dir) model_to_save = origin_model model_to_save.save_pretrained(output_dir) if args.static_sub_model != None: export_static_model(origin_model, args.static_sub_model, args.max_seq_length)
class TestShortCut(unittest.TestCase): def setUp(self): model = resnet50() sp_net_config = supernet(expand_ratio=[0.25, 0.5, 1.0]) self.model = Convert(sp_net_config).convert(model) self.images = paddle.randn(shape=[2, 3, 224, 224], dtype='float32') self._test_clear_search_space() def _test_clear_search_space(self): self.ofa_model = OFA(self.model) self.ofa_model.set_epoch(0) outs, _ = self.ofa_model(self.images) self.config = self.ofa_model.current_config def test_export_model(self): self.ofa_model.export( self.config, input_shapes=[[2, 3, 224, 224]], input_dtypes=['float32']) assert len(self.ofa_model.ofa_layers) == 37
class TestExport(unittest.TestCase): def setUp(self): self._init_model() def _init_model(self): self.origin_model = ModelOriginLinear() model = ModelLinear() self.ofa_model = OFA(model) def test_ofa(self): config = self.ofa_model._sample_config(task='expand_ratio', phase=None) origin_dict = {} for name, param in self.origin_model.named_parameters(): origin_dict[name] = param.shape self.ofa_model.export( config, input_shapes=[[1, 64]], input_dtypes=['int64'], origin_model=self.origin_model) for name, param in self.origin_model.named_parameters(): if name in config.keys(): if 'expand_ratio' in config[name]: assert origin_dict[name][-1] == param.shape[-1] * config[ name]['expand_ratio']
class TestExportCase2(unittest.TestCase): def setUp(self): model = ModelLinear() data_np = np.random.random((3, 64)).astype(np.int64) self.data = paddle.to_tensor(data_np) self.ofa_model = OFA(model) self.ofa_model.set_epoch(0) outs, _ = self.ofa_model(self.data) self.config = self.ofa_model.current_config def test_export_model_linear2(self): config = self.ofa_model._sample_config( task='expand_ratio', phase=None, sample_type='smallest') ex_model = self.ofa_model.export( config, input_shapes=[[3, 64]], input_dtypes=['int64']) ex_model(self.data) assert len(self.ofa_model.ofa_layers) == 3
def do_train(args): paddle.set_device("gpu" if args.n_gpu else "cpu") args.model_type = args.model_type.lower() model_class, tokenizer_class = MODEL_CLASSES[args.model_type] config_path = os.path.join(args.model_name_or_path, 'model_config.json') cfg_dict = dict(json.loads(open(config_path).read())) kept_layers_index = {} if args.depth_mult < 1.0: depth = round(cfg_dict["init_args"][0]['num_hidden_layers'] * args.depth_mult) cfg_dict["init_args"][0]['num_hidden_layers'] = depth for idx, i in enumerate(range(1, depth + 1)): kept_layers_index[idx] = math.floor(i / args.depth_mult) - 1 os.rename(config_path, config_path + '_bak') with open(config_path, "w", encoding="utf-8") as f: f.write(json.dumps(cfg_dict, ensure_ascii=False)) num_labels = cfg_dict['num_classes'] model = model_class.from_pretrained( args.model_name_or_path, num_classes=num_labels) origin_model = model_class.from_pretrained( args.model_name_or_path, num_classes=num_labels) os.rename(config_path + '_bak', config_path) sp_config = supernet(expand_ratio=[1.0, args.width_mult]) model = Convert(sp_config).convert(model) ofa_model = OFA(model) sd = paddle.load( os.path.join(args.model_name_or_path, 'model_state.pdparams')) if len(kept_layers_index) == 0: ofa_model.model.set_state_dict(sd) else: for name, params in ofa_model.model.named_parameters(): if 'encoder' not in name: params.set_value(sd[name]) else: idx = int(name.strip().split('.')[3]) mapping_name = name.replace( '.' + str(idx) + '.', '.' + str(kept_layers_index[idx]) + '.') params.set_value(sd[mapping_name]) best_config = utils.dynabert_config(ofa_model, args.width_mult) for name, sublayer in ofa_model.model.named_sublayers(): if isinstance(sublayer, paddle.nn.MultiHeadAttention): sublayer.num_heads = int(args.width_mult * sublayer.num_heads) ofa_model.export( best_config, input_shapes=[[1, args.max_seq_length], [1, args.max_seq_length]], input_dtypes=['int64', 'int64'], origin_model=origin_model) for name, sublayer in origin_model.named_sublayers(): if isinstance(sublayer, paddle.nn.MultiHeadAttention): sublayer.num_heads = int(args.width_mult * sublayer.num_heads) output_dir = os.path.join(args.sub_model_output_dir, "model_width_%.5f" % args.width_mult) if not os.path.exists(output_dir): os.makedirs(output_dir) model_to_save = origin_model model_to_save.save_pretrained(output_dir) if args.static_sub_model != None: export_static_model(origin_model, args.static_sub_model, args.max_seq_length)