Exemple #1
0
class AlbertLSTM(AlbertPreTrainedModel):
    def __init__(self, config: AlbertConfig):
        super().__init__(config)

        self.albert = AlbertModel(config)

        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        self.lstm = nn.LSTM(input_size=config.hidden_size,
                            hidden_size=config.hidden_size,
                            num_layers=1,
                            dropout=0,
                            batch_first=True,
                            bidirectional=False)

        self.fc = nn.Linear(config.hidden_size, config.num_labels)
        self.fc_bn = nn.BatchNorm1d(config.num_labels)
        self.tanh = nn.Tanh()
        self.init_weights()

        # Default: freeze albert
        for name, param in self.albert.named_parameters():
            param.requires_grad = False

        # Unfreeze layers
        if config.unfreeze == "embed":
            for name, param in self.albert.named_parameters():
                if "embeddings" in name:
                    param.requires_grad = True

        if config.unfreeze == "embed_enc0":
            for name, param in self.albert.named_parameters():
                if "embeddings" in name or "encoder" in name:
                    param.requires_grad = True

        if config.unfreeze == "embed_enc0_pooler":
            for name, param in self.albert.named_parameters():
                param.requires_grad = True

        if config.unfreeze == "enc0":
            for name, param in self.albert.named_parameters():
                if "encoder" in name:
                    param.requires_grad = True

        if config.unfreeze == "enc0_pooler":
            for name, param in self.albert.named_parameters():
                if "encoder" in name or "pooler" in name:
                    param.requires_grad = True

        if config.unfreeze == "embed_pooler":
            for name, param in self.albert.named_parameters():
                if "embed" in name or "pooler" in name:
                    param.requires_grad = True

        if config.unfreeze == "pooler":
            for name, param in self.albert.named_parameters():
                if "pooler" in name:
                    param.requires_grad = True

    def forward(self, doc):
        """
        Input:
            doc: [batch_size, num_chunks, 3, max_chunk_len]            
        Returns:
            out: [batch_size, output_dim]       
            
        """
        batch_size = doc.shape[0]

        pooled = self.albert(input_ids=doc[0, :, 0],
                             attention_mask=doc[0, :, 1],
                             token_type_ids=doc[0, :, 2])[1].unsqueeze(0)

        for i in range(batch_size - 1):
            # Output of BertModel: (last_hidden_state, pooler_output, hidden_states, attentions)
            # Last layer hidden-state of the first token of the sequence (classification token)
            pool_i = self.albert(input_ids=doc[i + 1, :, 0],
                                 attention_mask=doc[i + 1, :, 1],
                                 token_type_ids=doc[i + 1, :, 2])[1]
            pooled = torch.cat((pooled, pool_i.unsqueeze(0)), dim=0)

        dp = self.dropout(pooled)  # [batch_size, num_chunks, bert_hidden_size]
        # output: [batch_size, num_chunks, n_directions*hidden_size], output features from last layer for each t
        # h_n: [n_layers*n_directions, batch_size, hidden_size], hidden state for t=seq_len
        # c_n: [n_layers*n_directions, batch_size, hidden_size], cell state fir t=seq_len
        output, (h_n, c_n) = self.lstm(dp)

        # h_n = output[:,-1,].squeeze(1)  # [batch_size, hidden_size]
        h_n = h_n.squeeze(0)  # [batch_size, hidden_size]

        out = self.fc(h_n)  # [batch_size, num_labels]
        out = self.fc_bn(out)
        out = F.softmax(out, dim=1)  # [batch_size, num_labels]
        # out = self.tanh(out)   # [batch_size, num_labels]

        return out
Exemple #2
0
class AlbertLinear(AlbertPreTrainedModel):
    def __init__(self, config: AlbertConfig):
        super().__init__(config)

        self.albert = AlbertModel(config)

        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.fc = nn.Linear(config.hidden_size, config.num_labels)
        self.fc_bn = nn.BatchNorm1d(config.num_labels)
        # self.fc = nn.Linear(config.hidden_size * config.n_chunks, config.num_labels)
        self.init_weights()

        # Default: freeze albert
        for name, param in self.albert.named_parameters():
            param.requires_grad = False

        # Unfreeze layers
        if config.unfreeze == "embed":
            for name, param in self.albert.named_parameters():
                if "embeddings" in name:
                    param.requires_grad = True

        if config.unfreeze == "embed_enc0":
            for name, param in self.albert.named_parameters():
                if "embeddings" in name or "encoder" in name:
                    param.requires_grad = True

        if config.unfreeze == "embed_enc0_pooler":
            for name, param in self.albert.named_parameters():
                param.requires_grad = True

        if config.unfreeze == "enc0":
            for name, param in self.albert.named_parameters():
                if "encoder" in name:
                    param.requires_grad = True

        if config.unfreeze == "enc0_pooler":
            for name, param in self.albert.named_parameters():
                if "encoder" in name or "pooler" in name:
                    param.requires_grad = True

        if config.unfreeze == "embed_pooler":
            for name, param in self.albert.named_parameters():
                if "embed" in name or "pooler" in name:
                    param.requires_grad = True

        if config.unfreeze == "pooler":
            for name, param in self.albert.named_parameters():
                if "pooler" in name:
                    param.requires_grad = True

    def forward(self, doc):
        """
        Input:
            doc: [batch_size, num_chunks, 3, max_chunk_len]            
        Returns:
            out: [batch_size, output_dim]       
            
        """
        batch_size = doc.shape[0]

        pooled = self.albert(input_ids=doc[0, :, 0],
                             attention_mask=doc[0, :, 1],
                             token_type_ids=doc[0, :, 2])[1].unsqueeze(0)
        for i in range(batch_size - 1):
            pool_i = self.albert(input_ids=doc[i + 1, :, 0],
                                 attention_mask=doc[i + 1, :, 1],
                                 token_type_ids=doc[i + 1, :, 2])[1]
            pooled = torch.cat((pooled, pool_i.unsqueeze(0)), dim=0)

        dp = self.dropout(pooled)  # [batch_size, num_chunks, hidden_size]
        # concat = dp.view(batch_size, -1)  # [batch_size, num_chunks*hidden_size]
        if self.albert.config.linear_max == True:
            dp = torch.max(dp, dim=1).values  # [batch_size, hidden_size]
        else:
            dp = torch.mean(dp, dim=1)  # [batch_size, hidden_size]
        # dp = dp.sum(dim=1) # [batch_size, hidden_size]

        out = self.fc(dp)  # [batch_size, num_labels]
        out = self.fc_bn(out)
        out = F.softmax(out, dim=1)  # [batch_size, num_labels]

        return out