Example #1
0
 def test_total_parameters(self):
     ident = IdentityLayer()
     emb = torch.nn.Embedding(32, 8)  # 32 * 8 = 256
     emb2 = torch.nn.Embedding(32, 16)  # 32 * 16 = 128
     emb2.weight.requires_grad = False
     assert total_parameters(emb) == 256
     assert total_parameters(ident) == 0
     assert total_parameters(emb2) == 512
     assert total_parameters(torch.nn.ModuleList([ident, emb, emb2])) == 768
Example #2
0
    def __init__(self, opt, dict):
        self.add_start_token = opt["add_start_token"]
        super().__init__(*self._get_special_tokens(opt, dict))

        # init the model
        self.encoder = IdentityLayer()
        self.decoder = self._get_decoder(opt, dict)
        self.config = self.decoder.transformer.config
        self.lm_head = torch.nn.Linear(self.config.n_embd,
                                       self.config.vocab_size,
                                       bias=False)
        self._tie_weights(self.lm_head, self.decoder.transformer.wte)
Example #3
0
    def test_schedule_work_items(self):
        # test that we schedule things correctly
        # pretend we have 8 layers and 4 gpus, and they are unevenly distributed
        model = torch.nn.ModuleList()
        for i in range(8):
            layer = IdentityLayer()
            if i == 0:
                layer._mp_gpu = 'cuda:0'
            elif i in (1, 2, 3):
                layer._mp_gpu = 'cuda:1'
            elif i in (4, 5):
                layer._mp_gpu = 'cuda:2'
            elif i in (6, 7):
                layer._mp_gpu = 'cuda:3'
            model.append(layer)

        # there are 2 chunks, each 16 x 7 in size
        chunks = PipelineHelper.split(torch.randn(32, 7), 16)

        work_items = list(PipelineHelper.schedule_work_items(model, chunks))
        assert len(work_items) == 8
        assert work_items[0].layer_nos == [0] and work_items[0].chunk_idx == 0
        assert work_items[1].layer_nos == [1, 2, 3
                                           ] and work_items[1].chunk_idx == 0
        assert work_items[2].layer_nos == [0] and work_items[2].chunk_idx == 1
        assert work_items[3].layer_nos == [4, 5
                                           ] and work_items[3].chunk_idx == 0
        assert work_items[4].layer_nos == [1, 2, 3
                                           ] and work_items[4].chunk_idx == 1
        assert work_items[5].layer_nos == [6, 7
                                           ] and work_items[5].chunk_idx == 0
        assert work_items[6].layer_nos == [4, 5
                                           ] and work_items[6].chunk_idx == 1
        assert work_items[7].layer_nos == [6, 7
                                           ] and work_items[7].chunk_idx == 1
Example #4
0
    def __init__(self, opt, dict):
        self.null_idx, self.start_idx, self.end_idx = self._get_special_tokens(
            opt, dict)
        super().__init__(self.null_idx, self.start_idx, self.end_idx)

        # init the model
        self.encoder = IdentityLayer()
        self.decoder = self._get_decoder(opt, dict)
        self.config = self.decoder.transformer.config
        self.lm_head = torch.nn.Linear(self.config.n_embd,
                                       self.config.vocab_size,
                                       bias=False)
        self._tie_weights(self.lm_head, self.decoder.transformer.wte)
        # add start token
        self.add_start_token = opt["add_special_tokens"] and opt[
            "add_start_token"]
        # used to reverse concatenation of context and labels
        self.text_lengths = None
Example #5
0
 def _get_model():
     model = torch.nn.Module()
     model.layers = torch.nn.ModuleList(
         [IdentityLayer() for _ in range(8)])
     return model