Example #1
0
    def __init__(self,
                 num_class,
                 input_dim,
                 kernel_nums,
                 kernel_sizes: list,
                 max_kernel_size=50,
                 dropout_rate=0.5):

        super().__init__()

        self.convs = nn.ModuleList([
            nn.Conv2d(in_channels=1,
                      out_channels=num,
                      kernel_size=(width, input_dim))
            for (num, width) in zip(kernel_nums, kernel_sizes)
        ], )

        # self.bias = [nn.Parameter(torch.zeros())]
        self.highway_layer = Highway(input_dim=sum(kernel_nums), num_layers=1)

        self.dropout_layer = nn.Dropout(dropout_rate)

        self.feedforward_layer = nn.Linear(sum(kernel_nums), num_class)

        self.max_kernel_size = max_kernel_size
Example #2
0
    def __init__(self,
                 width: int,
                 input_size: int,
                 hidden_size: int,
                 n_layers: int,
                 n_highway: int,
                 use_position: bool = False,
                 dropout: float = 0.0):
        super(LBLHighwayBiLmV2, self).__init__()
        self.use_position = use_position
        self.n_layers = n_layers = n_layers
        self.n_highway = n_highway = n_highway
        self.dropout = torch.nn.Dropout(p=dropout)

        self.width = width
        self.input_size = input_size
        self.hidden_size = hidden_size

        forward_scores, backward_scores = [], []
        forward_blocks, backward_blocks = [], []

        for _ in range(n_layers):
            forward_scores.append(torch.nn.Parameter(torch.randn(width + 1)))
            backward_scores.append(torch.nn.Parameter(torch.randn(width + 1)))

            forward_blocks.append(Highway(hidden_size, num_layers=n_highway))
            backward_blocks.append(Highway(hidden_size, num_layers=n_highway))

        self.forward_weights = torch.nn.ParameterList(forward_scores)
        self.backward_weights = torch.nn.ParameterList(backward_scores)
        self.forward_blocks = torch.nn.ModuleList(forward_blocks)
        self.backward_blocks = torch.nn.ModuleList(backward_blocks)

        if self.use_position:
            self.position = PositionalEncoding(hidden_size)
Example #3
0
    def define_embedding(self, H, char_arr, rel_arr, def_arr):
        """
            Define the embedding for different methods.
        """
        if H.joint_emb is not None:
            self._jdrop = nn.Dropout(H.joint_dropout if self.use_dropout else 0)

        if H.char_emb or H.cnnsoftmax:
            self.char_arr =  torch.LongTensor(char_arr).cuda()
            self.rel_arr, self.def_arr = None, None
            self._char_emb = nn.Embedding(262, H.char_emsize).cuda()
            self._char_network = nn.ModuleList()
            self._char_network.append(nn.Conv1d(H.char_emsize, 32, 1, stride=(1,)).cuda())
            self._char_network.append(nn.Conv1d(H.char_emsize, 32, 2, stride=(1,)).cuda())
            self._char_network.append(nn.Conv1d(H.char_emsize, 64, 3, stride=(2,)).cuda())
            self._char_network.append(nn.Conv1d(H.char_emsize, 128, 4, stride=(3,)).cuda())
            self._char_network.append(nn.Conv1d(H.char_emsize, 256, 5, stride=(4,)).cuda())
            self._char_network.append(nn.Conv1d(H.char_emsize, 512, 6, stride=(5,)).cuda())
            self._conv_activation = eval("torch.nn.functional.%s" % H.char_activation)
            if not H.char_nohighways:
                self._char_highways = Highway(1024, H.hdepth, activation=self._conv_activation)
            self._char_linear =  nn.Linear(1024, H.emsize, bias=False)
            nforms = 1

            if rel_arr:
                self.rel_arr = self.coverage_filter(torch.LongTensor(rel_arr).cuda())
                nforms += 1
            if def_arr:
                self.def_arr = self.coverage_filter(torch.LongTensor(def_arr).cuda())
                nforms += 1

            self.rel_exist = self.rel_arr is not None
            self.def_exist = self.def_arr is not None
            self.nforms = H.nforms = nforms
            if H.defenc == "lstm":
                if def_arr:
                    defsize = self.def_arr.shape[1]
                    def_h = torch.zeros(H.hdepth,defsize,H.emsize).cuda()
                    self.def_hid = (def_h,def_h)
                if rel_arr:
                    relsize = self.rel_arr.shape[1]
                    rel_h = torch.zeros(H.hdepth,relsize,H.emsize).cuda()
                    self.rel_hid = (rel_h, rel_h)
                self._definition_network = torch.nn.LSTM(H.emsize, H.emsize, num_layers=H.hdepth)
            elif H.defenc == "highway":
                self._definition_network = Highway(H.emsize, H.hdepth, activation=self._conv_activation)

            if H.combine == "concat":
                self._comb_lin = nn.Linear(H.emsize*H.nforms, H.emsize, bias=True)

            if H.cnnsoftmax or H.char_emb:
                if H.cnnsoftmax:
                    self._lookup = nn.Embedding(H.ntoken, H.emsize)
                if H.cnncorr:
                    self._cnnsoftmax_correction = nn.Linear(H.cnncorr, H.ntoken, bias=False)
                    self._cnnsoftmax_M = nn.Linear(H.cnncorr, H.emsize, bias=False)
        else:
            self._lookup = nn.Embedding(H.ntoken, H.emsize)
Example #4
0
    def __init__(self,
                 width: int,
                 input_size: int,
                 hidden_size: int,
                 n_heads: int,
                 n_layers: int,
                 n_highway: int,
                 use_position: bool = False,
                 use_relative_position: bool = False,
                 dropout: float = 0.0):
        super(SelfAttentiveLBLBiLMV3, self).__init__()
        self.use_position = use_position
        self.use_relative_position_weights = use_relative_position
        self.n_layers = n_layers
        self.n_highway = n_highway
        self.n_heads = n_heads
        self.input_size = input_size
        self.width = width
        self.hidden_size = hidden_size

        forward_attns, backward_attns = [], []
        forward_blocks, backward_blocks = [], []

        for _ in range(n_layers):
            if self.use_relative_position_weights:
                forward_attn = MultiHeadedAttentionWithRelativePositionEmbeddings(
                    n_heads,
                    hidden_size,
                    width=width + 1,
                    left_to_right=True,
                    dropout=dropout)
                backward_attn = MultiHeadedAttentionWithRelativePositionEmbeddings(
                    n_heads,
                    hidden_size,
                    width=width + 1,
                    left_to_right=False,
                    dropout=dropout)
            else:
                forward_attn = MultiHeadedAttention(n_heads, hidden_size,
                                                    dropout)
                backward_attn = MultiHeadedAttention(n_heads, hidden_size,
                                                     dropout)

            forward_attns.append(forward_attn)
            backward_attns.append(backward_attn)
            forward_blocks.append(Highway(hidden_size, n_highway))
            backward_blocks.append(Highway(hidden_size, n_highway))

        self.forward_attns = torch.nn.ModuleList(forward_attns)
        self.backward_attns = torch.nn.ModuleList(backward_attns)

        self.forward_blocks = torch.nn.ModuleList(forward_blocks)
        self.backward_blocks = torch.nn.ModuleList(backward_blocks)

        if self.use_position:
            self.position = PositionalEncoding(hidden_size)
Example #5
0
    def __init__(self,
                 width: int,
                 input_size: int,
                 hidden_size: int,
                 n_layers: int,
                 n_highway: int,
                 use_position: bool = False,
                 dropout: float = 0.0):
        super(Bengio03HighwayBiLmV2, self).__init__()
        self.use_position = use_position
        self.n_layers = n_layers
        self.n_highway = n_highway

        self.dropout = torch.nn.Dropout(p=dropout)
        self.activation = torch.nn.ReLU()

        self.width = width
        self.input_size = input_size
        self.context_input_size = input_size * (width + 1)
        self.hidden_size = hidden_size

        self.forward_paddings = torch.nn.ModuleList([
            torch.nn.ConstantPad2d((0, 0, length, 0), 0)
            for length in range(width + 1)
        ])
        self.backward_paddings = torch.nn.ModuleList([
            torch.nn.ConstantPad2d((0, 0, 0, length), 0)
            for length in range(width + 1)
        ])

        forward_blocks = []
        backward_blocks = []
        for layer_index in range(self.n_layers):
            forward_layer = torch.nn.ModuleList([
                torch.nn.Linear(input_size, hidden_size, bias=False)
                for _ in range(width + 1)
            ])
            backward_layer = torch.nn.ModuleList([
                torch.nn.Linear(input_size, hidden_size, bias=False)
                for _ in range(width + 1)
            ])
            self.add_module('forward_layer_{}'.format(layer_index),
                            forward_layer)
            self.add_module('backward_layer_{}'.format(layer_index),
                            backward_layer)

            forward_blocks.append(Highway(hidden_size, num_layers=n_highway))
            backward_blocks.append(Highway(hidden_size, num_layers=n_highway))

        self.forward_blocks = torch.nn.ModuleList(forward_blocks)
        self.backward_blocks = torch.nn.ModuleList(backward_blocks)

        if self.use_position:
            self.position = PositionalEncoding(hidden_size)
Example #6
0
    def __init__(self,
                 width: int,
                 input_size: int,
                 hidden_size: int,
                 n_layers: int,
                 n_highway: int,
                 use_position: bool = False,
                 dropout: float = 0.0):
        super(Bengio03HighwayBiLm, self).__init__()
        self.use_position = use_position
        self.n_layers = n_layers
        self.n_highway = n_highway

        self.dropout = torch.nn.Dropout(p=dropout)
        self.activation = torch.nn.ReLU()

        self.width = width
        self.input_size = input_size
        self.context_input_size = input_size * (width + 1)
        self.hidden_size = hidden_size

        forward_paddings, backward_paddings = [], []
        forward_blocks, backward_blocks = [], []
        forward_projects, backward_projects = [], []
        for i in range(n_layers):
            forward_paddings.append(
                torch.nn.Parameter(torch.randn(width, hidden_size)))
            backward_paddings.append(
                torch.nn.Parameter(torch.randn(width, hidden_size)))

            forward_blocks.append(Highway(hidden_size, num_layers=n_highway))
            backward_blocks.append(Highway(hidden_size, num_layers=n_highway))

            forward_projects.append(
                torch.nn.Linear(self.context_input_size, hidden_size))
            backward_projects.append(
                torch.nn.Linear(self.context_input_size, hidden_size))

        self.forward_projects = torch.nn.ModuleList(forward_projects)
        self.backward_projects = torch.nn.ModuleList(backward_projects)
        self.forward_paddings = torch.nn.ParameterList(forward_paddings)
        self.backward_paddings = torch.nn.ParameterList(backward_paddings)
        self.forward_blocks = torch.nn.ModuleList(forward_blocks)
        self.backward_blocks = torch.nn.ModuleList(backward_blocks)

        if self.use_position:
            self.position = PositionalEncoding(hidden_size)

        self.reset_parameters()
Example #7
0
    def _load_highway(self):
        # pylint: disable=protected-access
        # the highway layers have same dimensionality as the number of cnn filters
        cnn_options = self._options['char_cnn']
        filters = cnn_options['filters']
        n_filters = sum(f[1] for f in filters)
        n_highway = cnn_options['n_highway']

        # create the layers, and load the weights
        self._highways = Highway(n_filters,
                                 n_highway,
                                 activation=torch.nn.functional.relu)
        for k in range(n_highway):
            # The AllenNLP highway is one matrix multplication with concatenation of
            # transform and carry weights.
            with h5py.File(cached_path(self._weight_file), 'r') as fin:
                # The weights are transposed due to multiplication order assumptions in tf
                # vs pytorch (tf.matmul(X, W) vs pytorch.matmul(W, X))
                w_transform = numpy.transpose(
                    fin['CNN_high_{}'.format(k)]['W_transform'][...])
                # -1.0 since AllenNLP is g * x + (1 - g) * f(x) but tf is (1 - g) * x + g * f(x)
                w_carry = -1.0 * numpy.transpose(
                    fin['CNN_high_{}'.format(k)]['W_carry'][...])
                weight = numpy.concatenate([w_transform, w_carry], axis=0)
                self._highways._layers[k].weight.data.copy_(
                    torch.FloatTensor(weight))
                self._highways._layers[k].weight.requires_grad = False

                b_transform = fin['CNN_high_{}'.format(k)]['b_transform'][...]
                b_carry = -1.0 * fin['CNN_high_{}'.format(k)]['b_carry'][...]
                bias = numpy.concatenate([b_transform, b_carry], axis=0)
                self._highways._layers[k].bias.data.copy_(
                    torch.FloatTensor(bias))
                self._highways._layers[k].bias.requires_grad = False
Example #8
0
    def __init__(self, output_dim: int, word_embedder: Embeddings,
                 char_embedder: Embeddings, filters: List[Tuple[int, int]],
                 n_highway: int, activation: str):
        super(ConvTokenEmbedder, self).__init__(output_dim, word_embedder,
                                                char_embedder)

        self.emb_dim = 0
        if word_embedder is not None:
            self.emb_dim += word_embedder.n_d

        if char_embedder is not None:
            self.convolutions = []
            char_embed_dim = char_embedder.n_d

            for i, (width, num) in enumerate(filters):
                conv = torch.nn.Conv1d(in_channels=char_embed_dim,
                                       out_channels=num,
                                       kernel_size=width,
                                       bias=True)
                self.convolutions.append(conv)

            self.convolutions = torch.nn.ModuleList(self.convolutions)

            self.n_filters = sum(f[1] for f in filters)
            self.n_highway = n_highway

            self.highways = Highway(self.n_filters,
                                    self.n_highway,
                                    activation=Activation.by_name("relu")())
            self.emb_dim += self.n_filters
            self.activation = Activation.by_name(activation)()

        self.projection = torch.nn.Linear(self.emb_dim,
                                          self.output_dim,
                                          bias=True)
Example #9
0
class CNNClassifier(nn.Module):
    """Encodes a sequence of word embeddings"""
    def __init__(self,
                 num_class,
                 input_dim,
                 kernel_nums,
                 kernel_sizes: list,
                 max_kernel_size=50,
                 dropout_rate=0.5):

        super().__init__()

        self.convs = nn.ModuleList([
            nn.Conv2d(in_channels=1,
                      out_channels=num,
                      kernel_size=(width, input_dim))
            for (num, width) in zip(kernel_nums, kernel_sizes)
        ], )

        # self.bias = [nn.Parameter(torch.zeros())]
        self.highway_layer = Highway(input_dim=sum(kernel_nums), num_layers=1)

        self.dropout_layer = nn.Dropout(dropout_rate)

        self.feedforward_layer = nn.Linear(sum(kernel_nums), num_class)

        self.max_kernel_size = max_kernel_size

    def forward(self, x):
        # x : [batch size, seq len, input dim]
        if x.size(1) < self.max_kernel_size:
            pd = [0, 0, 0, self.max_kernel_size - x.size(1)]

            # [batch size, max seq len, input dim]
            x = f.pad(x, pd, 'constant', 0)

        # x : [batch size, kernel num, max seq len, input dim]
        x = x.unsqueeze(1)

        # x : [batch size, kernel num, max seq_len - width]
        x = [torch.relu(conv(x).squeeze(-1)) for conv in self.convs]

        # x = [torch.max_pool1d(x_, x_.size(-1)).squeeze(-1) for x_ in x]
        x = [torch.avg_pool1d(x_, x_.size(-1)).squeeze(-1) for x_ in x]

        # [batch size, sum(kernel_num)]
        x = torch.cat(x, dim=-1)

        x = self.highway_layer.forward(x)

        # [batch size, num_class]
        logit = torch.log_softmax(
            self.feedforward_layer(self.dropout_layer(x)), -1)

        # [batch size]
        return logit
Example #10
0
    def __init__(self,
                 output_dim: int,
                 embeddings: Embeddings,
                 filters: List[Tuple[int, int]],
                 n_highway: int,
                 activation: str,
                 use_cuda: bool,
                 input_field_name: str = None):
        super(ConvTokenEmbedder, self).__init__(input_field_name)
        self.embeddings = embeddings
        self.output_dim = output_dim
        self.use_cuda = use_cuda
        self.filters = filters

        convolutions = []
        for i, (width, num) in enumerate(filters):
            conv = torch.nn.Conv1d(in_channels=embeddings.n_d,
                                   out_channels=num,
                                   kernel_size=width,
                                   bias=True)
            convolutions.append(conv)

        self.convolutions = torch.nn.ModuleList(convolutions)

        self.n_filters = sum(f[1] for f in filters)
        self.n_highway = n_highway

        self.highways = Highway(self.n_filters,
                                self.n_highway,
                                activation=torch.nn.functional.relu)

        self.activation = Activation.by_name(activation)()
        self.projection = torch.nn.Linear(self.n_filters,
                                          output_dim,
                                          bias=True)
        self.reset_parameters()
Example #11
0
    def __init__(
        self,
        embedding_dim: int,
        filters: Sequence[Sequence[int]],
        num_highway: int,
        projection_dim: int,
        activation: str = "relu",
        projection_location: str = "after_highway",
        do_layer_norm: bool = False,
    ) -> None:
        super().__init__()

        if projection_location not in _VALID_PROJECTION_LOCATIONS:
            raise ConfigurationError(
                f"unknown projection location: {projection_location}")

        self.input_dim = embedding_dim
        self.output_dim = projection_dim
        self._projection_location = projection_location

        if activation == "tanh":
            self._activation = torch.nn.functional.tanh
        elif activation == "relu":
            self._activation = torch.nn.functional.relu
        else:
            raise ConfigurationError(f"unknown activation {activation}")

        # Create the convolutions
        self._convolutions: List[torch.nn.Module] = []
        for i, (width, num) in enumerate(filters):
            conv = torch.nn.Conv1d(in_channels=embedding_dim,
                                   out_channels=num,
                                   kernel_size=width,
                                   bias=True)
            conv.weight.data.uniform_(-0.05, 0.05)
            conv.bias.data.fill_(0.0)
            self.add_module(f"char_conv_{i}",
                            conv)  # needs to match the old ELMo name
            self._convolutions.append(conv)

        # Create the highway layers
        num_filters = sum(num for _, num in filters)
        if projection_location == "after_cnn":
            highway_dim = projection_dim
        else:
            # highway_dim is the number of cnn filters
            highway_dim = num_filters
        self._highways = Highway(highway_dim,
                                 num_highway,
                                 activation=torch.nn.functional.relu)
        for highway_layer in self._highways._layers:
            # highway is a linear layer for each highway layer
            # with fused W and b weights
            highway_layer.weight.data.normal_(mean=0.0,
                                              std=np.sqrt(1.0 / highway_dim))
            highway_layer.bias[:highway_dim].data.fill_(0.0)
            highway_layer.bias[highway_dim:].data.fill_(2.0)

        # Projection layer: always num_filters -> projection_dim
        self._projection = torch.nn.Linear(num_filters,
                                           projection_dim,
                                           bias=True)
        self._projection.weight.data.normal_(mean=0.0,
                                             std=np.sqrt(1.0 / num_filters))
        self._projection.bias.data.fill_(0.0)

        # And add a layer norm
        if do_layer_norm:
            self._layer_norm: Callable = LayerNorm(self.output_dim)
        else:
            self._layer_norm = lambda tensor: tensor
Example #12
0
    def __init__(self,
                 width: int,
                 input_size: int,
                 hidden_size: int,
                 n_heads: int,
                 n_layers: int,
                 n_highway: int,
                 use_position: bool = False,
                 use_relative_position: bool = False,
                 dropout: float = 0.0):
        super(SelfAttentiveLBLBiLM, self).__init__()
        self.use_position = use_position
        self.use_relative_position_weights = use_relative_position
        self.n_layers = n_layers
        self.n_highway = n_highway
        self.n_heads = n_heads
        self.input_size = input_size
        self.width = width
        self.hidden_size = hidden_size

        forward_attns, backward_attns = [], []
        forward_paddings, backward_paddings = [], []
        forward_blocks, backward_blocks = [], []
        forward_weights, backward_weights = [], []

        for _ in range(n_layers):
            forward_attns.append(
                MultiHeadedAttention(n_heads, hidden_size, dropout))
            backward_attns.append(
                MultiHeadedAttention(n_heads, hidden_size, dropout))

            forward_paddings.append(
                torch.nn.Parameter(
                    torch.randn(width, hidden_size) / np.sqrt(hidden_size)))
            backward_paddings.append(
                torch.nn.Parameter(
                    torch.randn(width, hidden_size) / np.sqrt(hidden_size)))

            forward_blocks.append(Highway(hidden_size, n_highway))
            backward_blocks.append(Highway(hidden_size, n_highway))

            if self.use_relative_position_weights:
                forward_weights.append(
                    torch.nn.Parameter(torch.randn(width + 1)))
                backward_weights.append(
                    torch.nn.Parameter(torch.randn(width + 1)))

        self.forward_attns = torch.nn.ModuleList(forward_attns)
        self.backward_attns = torch.nn.ModuleList(backward_attns)

        self.forward_paddings = torch.nn.ParameterList(forward_paddings)
        self.backward_paddings = torch.nn.ParameterList(backward_paddings)

        self.forward_blocks = torch.nn.ModuleList(forward_blocks)
        self.backward_blocks = torch.nn.ModuleList(backward_blocks)

        if self.use_relative_position_weights:
            self.forward_weights = torch.nn.ParameterList(forward_weights)
            self.backward_weights = torch.nn.ParameterList(backward_weights)

        if self.use_position:
            self.position = PositionalEncoding(hidden_size)