Beispiel #1
0
class TestInputDict(unittest.TestCase):
    def setUp(self):
        model = ModelInputDict()

        sp_net_config = supernet(expand_ratio=[0.5, 1.0])
        self.model = Convert(sp_net_config).convert(model)
        self.images = paddle.randn(shape=[2, 3, 32, 32], dtype='float32')
        self.images2 = {
            'data': paddle.randn(shape=[2, 12, 32, 32], dtype='float32')
        }
        default_run_config = {'skip_layers': ['conv1.0', 'conv2.0']}
        self.run_config = RunConfig(**default_run_config)

        self.ofa_model = OFA(self.model, run_config=self.run_config)
        self.ofa_model._clear_search_space(self.images, data=self.images2)

    def test_export(self):

        config = self.ofa_model._sample_config(task="expand_ratio",
                                               sample_type="smallest")
        self.ofa_model.export(config,
                              input_shapes=[[1, 3, 32, 32], {
                                  'data': [1, 12, 32, 32]
                              }],
                              input_dtypes=['float32', 'float32'])
Beispiel #2
0
class TestExport(unittest.TestCase):
    def setUp(self):
        self._init_model()

    def _init_model(self):
        self.origin_model = ModelOriginLinear()
        model = ModelLinear()
        self.ofa_model = OFA(model)

    def test_ofa(self):
        config = {
            'embedding_1': {
                'expand_ratio': (2.0)
            },
            'linear_3': {
                'expand_ratio': (2.0)
            },
            'linear_4': {},
            'linear_5': {}
        }
        origin_dict = {}
        for name, param in self.origin_model.named_parameters():
            origin_dict[name] = param.shape
        self.ofa_model.export(self.origin_model,
                              config,
                              input_shapes=[[1, 64]],
                              input_dtypes=['int64'])
        for name, param in self.origin_model.named_parameters():
            if name in config.keys():
                if 'expand_ratio' in config[name]:
                    assert origin_dict[name][
                        -1] == param.shape[-1] * config[name]['expand_ratio']
Beispiel #3
0
class TestExportCase1(unittest.TestCase):
    def setUp(self):
        model = ModelLinear1()
        data_np = np.random.random((3, 64)).astype(np.int64)
        self.data = paddle.to_tensor(data_np)
        self.ofa_model = OFA(model)
        self.ofa_model.set_epoch(0)
        outs, _ = self.ofa_model(self.data)
        self.config = self.ofa_model.current_config

    def test_export_model(self):
        self.ofa_model.export(
            self.config, input_shapes=[[3, 64]], input_dtypes=['int64'])
        assert len(self.ofa_model.ofa_layers) == 4
Beispiel #4
0
class TestOFAV2Export(unittest.TestCase):
    def setUp(self):
        model = ModelV1(name='export')
        sp_net_config = supernet(expand_ratio=[0.25, 0.5, 1.0])
        self.model = Convert(sp_net_config).convert(model)
        self.images = paddle.randn(shape=[2, 3, 32, 32], dtype='float32')
        self.ofa_model = OFA(self.model)

    def test_export(self):
        origin_model = ModelV1(name='origin')
        net_config = {'model.0': {}}
        self.ofa_model.export(net_config,
                              input_shapes=[1, 3, 32, 32],
                              input_dtypes=['float32'],
                              origin_model=origin_model)
def do_train(args):
    paddle.set_device("gpu" if args.n_gpu else "cpu")
    args.model_type = args.model_type.lower()
    model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
    config_path = os.path.join(args.model_name_or_path, 'model_config.json')
    cfg_dict = dict(json.loads(open(config_path).read()))
    num_labels = cfg_dict['num_classes']

    model = model_class.from_pretrained(args.model_name_or_path,
                                        num_classes=num_labels)

    origin_model = model_class.from_pretrained(args.model_name_or_path,
                                               num_classes=num_labels)

    sp_config = supernet(expand_ratio=[1.0, args.width_mult])
    model = Convert(sp_config).convert(model)

    ofa_model = OFA(model)

    sd = paddle.load(
        os.path.join(args.model_name_or_path, 'model_state.pdparams'))
    ofa_model.model.set_state_dict(sd)
    best_config = utils.dynabert_config(ofa_model, args.width_mult)
    ofa_model.export(best_config,
                     input_shapes=[[1, args.max_seq_length],
                                   [1, args.max_seq_length]],
                     input_dtypes=['int64', 'int64'],
                     origin_model=origin_model)
    for name, sublayer in origin_model.named_sublayers():
        if isinstance(sublayer, paddle.nn.MultiHeadAttention):
            sublayer.num_heads = int(args.width_mult * sublayer.num_heads)

    output_dir = os.path.join(args.sub_model_output_dir,
                              "model_width_%.5f" % args.width_mult)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    model_to_save = origin_model
    model_to_save.save_pretrained(output_dir)

    if args.static_sub_model != None:
        export_static_model(origin_model, args.static_sub_model,
                            args.max_seq_length)
Beispiel #6
0
class TestShortCut(unittest.TestCase):
    def setUp(self):
        model = resnet50()
        sp_net_config = supernet(expand_ratio=[0.25, 0.5, 1.0])
        self.model = Convert(sp_net_config).convert(model)
        self.images = paddle.randn(shape=[2, 3, 224, 224], dtype='float32')
        self._test_clear_search_space()

    def _test_clear_search_space(self):
        self.ofa_model = OFA(self.model)
        self.ofa_model.set_epoch(0)
        outs, _ = self.ofa_model(self.images)
        self.config = self.ofa_model.current_config

    def test_export_model(self):
        self.ofa_model.export(
            self.config,
            input_shapes=[[2, 3, 224, 224]],
            input_dtypes=['float32'])
        assert len(self.ofa_model.ofa_layers) == 37
Beispiel #7
0
class TestExport(unittest.TestCase):
    def setUp(self):
        self._init_model()

    def _init_model(self):
        self.origin_model = ModelOriginLinear()
        model = ModelLinear()
        self.ofa_model = OFA(model)

    def test_ofa(self):
        config = self.ofa_model._sample_config(task='expand_ratio', phase=None)
        origin_dict = {}
        for name, param in self.origin_model.named_parameters():
            origin_dict[name] = param.shape
        self.ofa_model.export(
            config,
            input_shapes=[[1, 64]],
            input_dtypes=['int64'],
            origin_model=self.origin_model)
        for name, param in self.origin_model.named_parameters():
            if name in config.keys():
                if 'expand_ratio' in config[name]:
                    assert origin_dict[name][-1] == param.shape[-1] * config[
                        name]['expand_ratio']
Beispiel #8
0
class TestExportCase2(unittest.TestCase):
    def setUp(self):
        model = ModelLinear()
        data_np = np.random.random((3, 64)).astype(np.int64)
        self.data = paddle.to_tensor(data_np)
        self.ofa_model = OFA(model)
        self.ofa_model.set_epoch(0)
        outs, _ = self.ofa_model(self.data)
        self.config = self.ofa_model.current_config

    def test_export_model_linear2(self):
        config = self.ofa_model._sample_config(
            task='expand_ratio', phase=None, sample_type='smallest')
        ex_model = self.ofa_model.export(
            config, input_shapes=[[3, 64]], input_dtypes=['int64'])
        ex_model(self.data)
        assert len(self.ofa_model.ofa_layers) == 3
Beispiel #9
0
def do_train(args):
    paddle.set_device("gpu" if args.n_gpu else "cpu")
    args.model_type = args.model_type.lower()
    model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
    config_path = os.path.join(args.model_name_or_path, 'model_config.json')
    cfg_dict = dict(json.loads(open(config_path).read()))

    kept_layers_index = {}
    if args.depth_mult < 1.0:
        depth = round(cfg_dict["init_args"][0]['num_hidden_layers'] *
                      args.depth_mult)
        cfg_dict["init_args"][0]['num_hidden_layers'] = depth
        for idx, i in enumerate(range(1, depth + 1)):
            kept_layers_index[idx] = math.floor(i / args.depth_mult) - 1

    os.rename(config_path, config_path + '_bak')
    with open(config_path, "w", encoding="utf-8") as f:
        f.write(json.dumps(cfg_dict, ensure_ascii=False))

    num_labels = cfg_dict['num_classes']

    model = model_class.from_pretrained(
        args.model_name_or_path, num_classes=num_labels)

    origin_model = model_class.from_pretrained(
        args.model_name_or_path, num_classes=num_labels)

    os.rename(config_path + '_bak', config_path)

    sp_config = supernet(expand_ratio=[1.0, args.width_mult])
    model = Convert(sp_config).convert(model)

    ofa_model = OFA(model)

    sd = paddle.load(
        os.path.join(args.model_name_or_path, 'model_state.pdparams'))

    if len(kept_layers_index) == 0:
        ofa_model.model.set_state_dict(sd)
    else:
        for name, params in ofa_model.model.named_parameters():
            if 'encoder' not in name:
                params.set_value(sd[name])
            else:
                idx = int(name.strip().split('.')[3])
                mapping_name = name.replace(
                    '.' + str(idx) + '.',
                    '.' + str(kept_layers_index[idx]) + '.')
                params.set_value(sd[mapping_name])

    best_config = utils.dynabert_config(ofa_model, args.width_mult)
    for name, sublayer in ofa_model.model.named_sublayers():
        if isinstance(sublayer, paddle.nn.MultiHeadAttention):
            sublayer.num_heads = int(args.width_mult * sublayer.num_heads)

    ofa_model.export(
        best_config,
        input_shapes=[[1, args.max_seq_length], [1, args.max_seq_length]],
        input_dtypes=['int64', 'int64'],
        origin_model=origin_model)
    for name, sublayer in origin_model.named_sublayers():
        if isinstance(sublayer, paddle.nn.MultiHeadAttention):
            sublayer.num_heads = int(args.width_mult * sublayer.num_heads)

    output_dir = os.path.join(args.sub_model_output_dir,
                              "model_width_%.5f" % args.width_mult)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    model_to_save = origin_model
    model_to_save.save_pretrained(output_dir)

    if args.static_sub_model != None:
        export_static_model(origin_model, args.static_sub_model,
                            args.max_seq_length)