def test_memory_efficient(self):
        model = copy.deepcopy(self.model)
        params = list(model.parameters())
        optimizer = MemoryEfficientFP16Optimizer.build_optimizer(
            self.cfg_dls, params)

        self.run_iter(model, params, optimizer)
예제 #2
0
    def test_load_state_dict(self):
        # define simple FP16 model
        model = torch.nn.Linear(5, 5).cuda().half()
        params = list(model.parameters())

        # initialize memory efficient FP16 optimizer
        # with pseudo DictConfigs
        optimizer = FairseqAdam(
            cfg=OmegaConf.create(
                vars(
                    argparse.Namespace(
                        adam_betas="(0.9, 0.999)",
                        adam_eps=1e-8,
                        weight_decay=0.0,
                        lr=[0.00001],
                    )
                )
            ),
            params=params,
        )
        me_optimizer = MemoryEfficientFP16Optimizer(
            cfg=OmegaConf.create(
                {
                    "common": vars(
                        argparse.Namespace(
                            fp16_init_scale=1,
                            fp16_scale_window=1,
                            fp16_scale_tolerance=1,
                            threshold_loss_scale=1,
                            min_loss_scale=1e-4,
                        )
                    )
                }
            ),
            params=params,
            optimizer=optimizer,
        )

        # optimizer state is created in the first step
        loss = model(torch.rand(5).cuda().half()).sum()
        me_optimizer.backward(loss)
        me_optimizer.step()

        # reload state
        state = me_optimizer.state_dict()
        me_optimizer.load_state_dict(state)
        for k, v in me_optimizer.optimizer.state.items():
            self.assertTrue(k.dtype == torch.float16)
            for v_i in v.values():
                if torch.is_tensor(v_i):
                    self.assertTrue(v_i.dtype == torch.float32)