Beispiel #1
0
 def __init__(self, config):
     super(PoolerEndLogits, self).__init__()
     self.dense_0 = nn.Linear(config.hidden_size * 2, config.hidden_size)
     self.activation = nn.Tanh()
     self.LayerNorm = nn.LayerNorm(config.hidden_size,
                                   eps=config.layer_norm_eps)
     self.dense_1 = nn.Linear(config.hidden_size, 1)
Beispiel #2
0
 def __init__(self, config, num_labels):
     super(BertTokenHead, self).__init__()
     dropout_classifier = config.dropout_classifier
     self.num_labels = num_labels
     self.classifier = nn.Linear(config.hidden_size, self.num_labels)
     self.dropout = nn.Dropout(
         dropout_classifier) if dropout_classifier is not None else None
Beispiel #3
0
def prune_linear_layer(layer, index, dim=0):
    """ Prune a linear layer (a model parameters) to keep only entries in index.
        Return the pruned layer as a new layer with requires_grad=True.
        Used to remove heads.
    """
    index = index.to(layer.weight.device)
    W = layer.weight.index_select(dim, index).clone().detach()
    if layer.bias is not None:
        if dim == 1:
            b = layer.bias.clone().detach()
        else:
            b = layer.bias[index].clone().detach()
    new_size = list(layer.weight.size())
    new_size[dim] = len(index)
    new_layer = nn.Linear(new_size[1],
                          new_size[0],
                          bias=layer.bias is not None).to(layer.weight.device)
    new_layer.weight.requires_grad = False
    new_layer.weight.copy_(W.contiguous())
    new_layer.weight.requires_grad = True
    if layer.bias is not None:
        new_layer.bias.requires_grad = False
        new_layer.bias.copy_(b.contiguous())
        new_layer.bias.requires_grad = True
    return new_layer
Beispiel #4
0
    def __init__(self, n_in, n_hidden, dropout=0):
        super(MLP, self).__init__()

        self.linear = nn.Linear(n_in, n_hidden)
        self.activation = nn.LeakyReLU(negative_slope=0.1)
        self.dropout = SharedDropout(p=dropout)

        self.reset_parameters()
Beispiel #5
0
    def __init__(self, config):
        super(SequenceSummary, self).__init__()

        self.summary_type = config.summary_type if hasattr(
            config, 'summary_use_proj') else 'last'
        if self.summary_type == 'attn':
            # We should use a standard multi-head attention module with absolute positional embedding for that.
            # Cf. https://github.com/zihangdai/xlnet/blob/master/modeling.py#L253-L276
            # We can probably just use the multi-head attention module of PyTorch >=1.1.0
            raise NotImplementedError

        self.summary = Identity()
        if hasattr(config, 'summary_use_proj') and config.summary_use_proj:
            if hasattr(
                    config, 'summary_proj_to_labels'
            ) and config.summary_proj_to_labels and config.num_labels > 0:
                num_classes = config.num_labels
            else:
                num_classes = config.hidden_size
            self.summary = nn.Linear(config.hidden_size, num_classes)

        self.activation = Identity()
        if hasattr(
                config,
                'summary_activation') and config.summary_activation == 'tanh':
            self.activation = nn.Tanh()

        self.first_dropout = Identity()
        if hasattr(
                config,
                'summary_first_dropout') and config.summary_first_dropout > 0:
            self.first_dropout = nn.Dropout(config.summary_first_dropout)

        self.last_dropout = Identity()
        if hasattr(config,
                   'summary_last_dropout') and config.summary_last_dropout > 0:
            self.last_dropout = nn.Dropout(config.summary_last_dropout)
Beispiel #6
0
 def __init__(self, config):
     super(PoolerAnswerClass, self).__init__()
     self.dense_0 = nn.Linear(config.hidden_size * 2, config.hidden_size)
     self.activation = nn.Tanh()
     self.dense_1 = nn.Linear(config.hidden_size, 1, bias=False)
Beispiel #7
0
 def __init__(self, config):
     super(PoolerStartLogits, self).__init__()
     self.dense = nn.Linear(config.hidden_size, 1)