Exemple #1
0
    def __init__(self,
                 input_dim: int,
                 hidden_dim: int,
                 projection_dim: int,
                 feedforward_hidden_dim: int,
                 num_layers: int,
                 num_attention_heads: int,
                 use_positional_encoding: bool = True,
                 dropout_prob: float = 0.1,
                 residual_dropout_prob: float = 0.2,
                 attention_dropout_prob: float = 0.1) -> None:
        super(StackedSelfAttentionEncoder, self).__init__()

        self._use_positional_encoding = use_positional_encoding
        self._attention_layers: List[MultiHeadSelfAttention] = []
        self._feedfoward_layers: List[FeedForward] = []
        self._layer_norm_layers: List[LayerNorm] = []
        self._feed_forward_layer_norm_layers: List[LayerNorm] = []

        feedfoward_input_dim = input_dim
        for i in range(num_layers):
            feedfoward = FeedForward(
                feedfoward_input_dim,
                activations=[
                    Activation.by_name('relu')(),
                    Activation.by_name('linear')()
                ],
                hidden_dims=[feedforward_hidden_dim, hidden_dim],
                num_layers=2,
                dropout=dropout_prob)

            # Note: Please use `ModuleList` in new code. It provides better
            # support for running on multiple GPUs. We've kept `add_module` here
            # solely for backwards compatibility with existing serialized models.
            self.add_module(f"feedforward_{i}", feedfoward)
            self._feedfoward_layers.append(feedfoward)

            feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim())
            self.add_module(f"feedforward_layer_norm_{i}",
                            feedforward_layer_norm)
            self._feed_forward_layer_norm_layers.append(feedforward_layer_norm)

            self_attention = MultiHeadSelfAttention(
                num_heads=num_attention_heads,
                input_dim=hidden_dim,
                attention_dim=projection_dim,
                values_dim=projection_dim,
                attention_dropout_prob=attention_dropout_prob)
            self.add_module(f"self_attention_{i}", self_attention)
            self._attention_layers.append(self_attention)

            layer_norm = LayerNorm(self_attention.get_output_dim())
            self.add_module(f"layer_norm_{i}", layer_norm)
            self._layer_norm_layers.append(layer_norm)

            feedfoward_input_dim = hidden_dim

        self.dropout = Dropout(residual_dropout_prob)
        self._input_dim = input_dim
        self._output_dim = self._attention_layers[-1].get_output_dim()
Exemple #2
0
    def __init__(self, word_embeddings: TextFieldEmbedder, bin_count: int):

        super(DRMM, self).__init__()

        self.word_embeddings = word_embeddings
        self.cosine_module = CosineMatrixAttention()

        self.bin_count = bin_count
        self.matching_classifier = FeedForward(
            input_dim=bin_count,
            num_layers=2,
            hidden_dims=[bin_count, 1],
            activations=[
                Activation.by_name('tanh')(),
                Activation.by_name('tanh')()
            ])
        self.query_gate = FeedForward(
            input_dim=self.word_embeddings.get_output_dim(),
            num_layers=2,
            hidden_dims=[self.word_embeddings.get_output_dim(), 1],
            activations=[
                Activation.by_name('tanh')(),
                Activation.by_name('tanh')()
            ])
        self.query_softmax = MaskedSoftmax()
Exemple #3
0
    def __init__(self, output_dim: int, word_embedder: Embeddings,
                 char_embedder: Embeddings, filters: List[Tuple[int, int]],
                 n_highway: int, activation: str):
        super(ConvTokenEmbedder, self).__init__(output_dim, word_embedder,
                                                char_embedder)

        self.emb_dim = 0
        if word_embedder is not None:
            self.emb_dim += word_embedder.n_d

        if char_embedder is not None:
            self.convolutions = []
            char_embed_dim = char_embedder.n_d

            for i, (width, num) in enumerate(filters):
                conv = torch.nn.Conv1d(in_channels=char_embed_dim,
                                       out_channels=num,
                                       kernel_size=width,
                                       bias=True)
                self.convolutions.append(conv)

            self.convolutions = torch.nn.ModuleList(self.convolutions)

            self.n_filters = sum(f[1] for f in filters)
            self.n_highway = n_highway

            self.highways = Highway(self.n_filters,
                                    self.n_highway,
                                    activation=Activation.by_name("relu")())
            self.emb_dim += self.n_filters
            self.activation = Activation.by_name(activation)()

        self.projection = torch.nn.Linear(self.emb_dim,
                                          self.output_dim,
                                          bias=True)
Exemple #4
0
    def __init__(self, word_embeddings: TextFieldEmbedder,
                 encoder: Seq2VecEncoder, dropout_p: int,
                 vocab: Vocabulary) -> None:
        super().__init__(vocab)

        self.word_embeddings = word_embeddings

        self.embedding2input = FeedForward(
            input_dim=word_embeddings.get_output_dim(),
            num_layers=1,
            hidden_dims=encoder.get_input_dim(),
            activations=Activation.by_name('relu')(),
            dropout=dropout_p)

        self.encoder = encoder

        self.hidden2intermediate = FeedForward(
            input_dim=encoder.get_output_dim(),
            num_layers=1,
            hidden_dims=int(encoder.get_output_dim() / 2),
            activations=Activation.by_name('relu')(),
            dropout=dropout_p)

        self.intermediate2tag = nn.Linear(
            in_features=int(encoder.get_output_dim() / 2),
            out_features=vocab.get_vocab_size('labels'))

        self.accuracy = CategoricalAccuracy()
        self.loss_function = torch.nn.CrossEntropyLoss()
Exemple #5
0
    def __init__(
        self,
        input_dim: int,
        hidden_dim: int,
        attention_projection_dim: int,
        feedforward_hidden_dim: int,
        num_convs: int,
        conv_kernel_size: int,
        num_attention_heads: int,
        use_positional_encoding: bool = True,
        dropout_prob: float = 0.1,
        layer_dropout_undecayed_prob: float = 0.1,
        attention_dropout_prob: float = 0,
    ) -> None:
        super().__init__()

        check_dimensions_match(input_dim, hidden_dim, "input_dim", "hidden_dim")

        self._use_positional_encoding = use_positional_encoding

        self._conv_norm_layers = torch.nn.ModuleList(
            [LayerNorm(hidden_dim) for _ in range(num_convs)]
        )
        self._conv_layers = torch.nn.ModuleList()
        for _ in range(num_convs):
            padding = torch.nn.ConstantPad1d(
                (conv_kernel_size // 2, (conv_kernel_size - 1) // 2), 0
            )
            depthwise_conv = torch.nn.Conv1d(
                hidden_dim, hidden_dim, conv_kernel_size, groups=hidden_dim
            )
            pointwise_conv = torch.nn.Conv1d(hidden_dim, hidden_dim, 1)
            self._conv_layers.append(
                torch.nn.Sequential(
                    padding, depthwise_conv, pointwise_conv, Activation.by_name("relu")()
                )
            )

        self.attention_norm_layer = LayerNorm(hidden_dim)
        self.attention_layer = MultiHeadSelfAttention(
            num_heads=num_attention_heads,
            input_dim=hidden_dim,
            attention_dim=attention_projection_dim,
            values_dim=attention_projection_dim,
            attention_dropout_prob=attention_dropout_prob,
        )
        self.feedforward_norm_layer = LayerNorm(hidden_dim)
        self.feedforward = FeedForward(
            hidden_dim,
            activations=[Activation.by_name("relu")(), Activation.by_name("linear")()],
            hidden_dims=[feedforward_hidden_dim, hidden_dim],
            num_layers=2,
            dropout=dropout_prob,
        )

        self.dropout = Dropout(dropout_prob)
        self.residual_with_layer_dropout = ResidualWithLayerDropout(layer_dropout_undecayed_prob)
        self._input_dim = input_dim
        self._output_dim = hidden_dim
Exemple #6
0
    def __init__(
        self,
        input_dim,
        hidden_dim,
        projection_dim,
        feedforward_hidden_dim,
        num_layers,
        num_attention_heads,
        use_positional_encoding=True,
        dropout_prob=0.2,
    ):
        super(MaskedStackedSelfAttentionEncoder, self).__init__()

        self._use_positional_encoding = use_positional_encoding
        self._attention_layers = []
        self._feedfoward_layers = []
        self._layer_norm_layers = []
        self._feed_forward_layer_norm_layers = []

        feedfoward_input_dim = input_dim
        for i in range(num_layers):
            feedfoward = FeedForward(
                feedfoward_input_dim,
                activations=[
                    Activation.by_name("relu")(),
                    Activation.by_name("linear")()
                ],
                hidden_dims=[feedforward_hidden_dim, hidden_dim],
                num_layers=2,
                dropout=dropout_prob,
            )

            self.add_module("feedforward_{i}".format(feedfoward))
            self._feedfoward_layers.append(feedfoward)

            feedforward_layer_norm = LayerNorm(feedfoward.get_input_dim())
            self.add_module(
                "feedforward_layer_norm_{i}".format(feedforward_layer_norm))
            self._feed_forward_layer_norm_layers.append(feedforward_layer_norm)

            self_attention = MaskedMultiHeadSelfAttention(
                num_heads=num_attention_heads,
                input_dim=hidden_dim,
                attention_dim=projection_dim,
                values_dim=projection_dim,
            )
            self.add_module("self_attention_{i}".format(self_attention))
            self._attention_layers.append(self_attention)

            layer_norm = LayerNorm(self_attention.get_input_dim())
            self.add_module("layer_norm_{i}".format(layer_norm))
            self._layer_norm_layers.append(layer_norm)

            feedfoward_input_dim = hidden_dim

        self.dropout = torch.nn.Dropout(dropout_prob)
        self._input_dim = input_dim
        self._output_dim = self._attention_layers[-1].get_output_dim()
        self._output_layer_norm = LayerNorm(self._output_dim)
    def __init__(self,
                 input_dim: int,
                 hidden_dim: int,
                 projection_dim: int,
                 feedforward_hidden_dim: int,
                 num_layers: int,
                 num_attention_heads: int,
                 use_positional_encoding: bool = True,
                 dropout_prob: float = 0.1,
                 residual_dropout_prob: float = 0.2,
                 attention_dropout_prob: float = 0.1) -> None:
        super(StackedSelfAttentionEncoder, self).__init__()

        self._use_positional_encoding = use_positional_encoding
        self._attention_layers: List[MultiHeadSelfAttention] = []
        self._feedfoward_layers: List[FeedForward] = []
        self._layer_norm_layers: List[LayerNorm] = []
        self._feed_forward_layer_norm_layers: List[LayerNorm] = []

        feedfoward_input_dim = input_dim
        for i in range(num_layers):
            feedfoward = FeedForward(
                feedfoward_input_dim,
                activations=[
                    Activation.by_name('relu')(),
                    Activation.by_name('linear')()
                ],
                hidden_dims=[feedforward_hidden_dim, hidden_dim],
                num_layers=2,
                dropout=dropout_prob)

            self.add_module(f"feedforward_{i}", feedfoward)
            self._feedfoward_layers.append(feedfoward)

            feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim())
            self.add_module(f"feedforward_layer_norm_{i}",
                            feedforward_layer_norm)
            self._feed_forward_layer_norm_layers.append(feedforward_layer_norm)

            self_attention = MultiHeadSelfAttention(
                num_heads=num_attention_heads,
                input_dim=hidden_dim,
                attention_dim=projection_dim,
                values_dim=projection_dim,
                attention_dropout_prob=attention_dropout_prob)
            self.add_module(f"self_attention_{i}", self_attention)
            self._attention_layers.append(self_attention)

            layer_norm = LayerNorm(self_attention.get_output_dim())
            self.add_module(f"layer_norm_{i}", layer_norm)
            self._layer_norm_layers.append(layer_norm)

            feedfoward_input_dim = hidden_dim

        self.dropout = Dropout(residual_dropout_prob)
        self._input_dim = input_dim
        self._output_dim = self._attention_layers[-1].get_output_dim()
 def __init__(self,
              in_channels: int,
              out_channels: int,
              kernel_size: int,
              activation: str = 'relu',
              dim: int = 1,
              bias: bool = True) -> None:
     super().__init__()
     if dim == 1:
         padding_left = kernel_size // 2
         padding_right = padding_left if kernel_size % 2 != 0 else padding_left - 1
         self.depthwise_conv = torch.nn.Sequential(
             torch.nn.ReflectionPad1d((padding_left, padding_right)),
             torch.nn.Conv1d(in_channels=in_channels,
                             out_channels=in_channels,
                             kernel_size=kernel_size,
                             groups=in_channels,
                             bias=bias))
         self.pointwise_conv = torch.nn.Conv1d(in_channels=in_channels,
                                               out_channels=out_channels,
                                               kernel_size=1,
                                               bias=bias)
     elif dim == 2:
         if isinstance(kernel_size, collections.Iterable):
             kernel_1, kernel_2 = kernel_size
             padding_1_left = kernel_1 // 2
             padding_1_right = padding_1_left if kernel_1 % 2 != 0 else padding_1_left - 1
             padding_2_left = kernel_2 // 2
             padding_2_right = padding_2_left if kernel_2 % 2 != 0 else padding_2_left - 1
             padding = (padding_1_left, padding_1_right, padding_2_left,
                        padding_2_right)
         else:
             padding_left = kernel_size // 2
             padding_right = padding_left if kernel_size % 2 != 0 else padding_left - 1
             padding = (padding_left, padding_right, padding_left,
                        padding_right)
         self.depthwise_conv = torch.nn.Sequential(
             torch.nn.ReflectionPad2d(padding),
             torch.nn.Conv2d(in_channels=in_channels,
                             out_channels=in_channels,
                             kernel_size=kernel_size,
                             groups=in_channels,
                             bias=bias))
         self.pointwise_conv = torch.nn.Conv2d(in_channels=in_channels,
                                               out_channels=out_channels,
                                               kernel_size=1,
                                               bias=bias)
     else:
         raise Exception(
             f"We currently only handle 1 and 2 dimensional convolutions here. You gave {dim}."
         )
     if activation is not None:
         self._activation = Activation.by_name(activation)()
     else:
         self._activation = Activation.by_name("linear")()
Exemple #9
0
    def __init__(self,
                 input_dim: int,
                 hidden_dim: int,
                 attention_projection_dim: int,
                 feedforward_hidden_dim: int,
                 num_convs: int,
                 conv_kernel_size: int,
                 num_attention_heads: int,
                 use_positional_encoding: bool = True,
                 dropout_prob: float = 0.1,
                 layer_dropout_undecayed_prob: float = 0.1,
                 attention_dropout_prob: float = 0) -> None:
        super().__init__()

        check_dimensions_match(input_dim, hidden_dim, 'input_dim',
                               'hidden_dim')

        self._use_positional_encoding = use_positional_encoding

        self._conv_norm_layers = torch.nn.ModuleList(
            [LayerNorm(hidden_dim) for _ in range(num_convs)])
        self._conv_layers = torch.nn.ModuleList([
            DepthwiseSeparableConv(hidden_dim,
                                   hidden_dim,
                                   conv_kernel_size,
                                   activation="relu",
                                   dim=1) for _ in range(num_convs)
        ])

        self.attention_norm_layer = LayerNorm(hidden_dim)
        self.attention_layer = MemoryEfficientMultiHeadSelfAttention(
            num_heads=num_attention_heads,
            input_dim=hidden_dim,
            attention_dim=attention_projection_dim,
            values_dim=attention_projection_dim,
            attention_dropout_prob=attention_dropout_prob)
        self.feedforward_norm_layer = LayerNorm(hidden_dim)
        self.feedforward = FeedForward(
            hidden_dim,
            activations=[
                Activation.by_name('relu')(),
                Activation.by_name('linear')()
            ],
            hidden_dims=[feedforward_hidden_dim, hidden_dim],
            num_layers=2,
            dropout=dropout_prob)

        self.dropout = Dropout(dropout_prob)
        self.residual_with_layer_dropout = ResidualWithLayerDropout(
            layer_dropout_undecayed_prob)
        self._input_dim = input_dim
        self._output_dim = hidden_dim
    def __init__(self,
                 input_dim: int,
                 hidden_dim: int,
                 projection_dim: int,
                 feedforward_hidden_dim: int,
                 num_layers: int,
                 num_attention_heads: int,
                 use_positional_encoding: bool = True,
                 dropout_prob: float = 0.1,
                 residual_dropout_prob: float = 0.2,
                 attention_dropout_prob: float = 0.1) -> None:
        super(StackedSelfAttentionEncoder, self).__init__()

        self._use_positional_encoding = use_positional_encoding
        self._attention_layers: List[MultiHeadSelfAttention] = []
        self._feedfoward_layers: List[FeedForward] = []
        self._layer_norm_layers: List[LayerNorm] = []
        self._feed_forward_layer_norm_layers: List[LayerNorm] = []

        feedfoward_input_dim = input_dim
        for i in range(num_layers):
            feedfoward = FeedForward(feedfoward_input_dim,
                                     activations=[Activation.by_name('relu')(),
                                                  Activation.by_name('linear')()],
                                     hidden_dims=[feedforward_hidden_dim, hidden_dim],
                                     num_layers=2,
                                     dropout=dropout_prob)

            self.add_module(f"feedforward_{i}", feedfoward)
            self._feedfoward_layers.append(feedfoward)

            feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim())
            self.add_module(f"feedforward_layer_norm_{i}", feedforward_layer_norm)
            self._feed_forward_layer_norm_layers.append(feedforward_layer_norm)

            self_attention = MultiHeadSelfAttention(num_heads=num_attention_heads,
                                                    input_dim=hidden_dim,
                                                    attention_dim=projection_dim,
                                                    values_dim=projection_dim,
                                                    attention_dropout_prob=attention_dropout_prob)
            self.add_module(f"self_attention_{i}", self_attention)
            self._attention_layers.append(self_attention)

            layer_norm = LayerNorm(self_attention.get_output_dim())
            self.add_module(f"layer_norm_{i}", layer_norm)
            self._layer_norm_layers.append(layer_norm)

            feedfoward_input_dim = hidden_dim

        self.dropout = Dropout(residual_dropout_prob)
        self._input_dim = input_dim
        self._output_dim = self._attention_layers[-1].get_output_dim()
Exemple #11
0
 def __init__(self,
              iterations: int = 1,
              dropout: float = 0.3,
              stright_through: bool = False,
              hidden_dim: int = 40,
              corruption_rate: float = 0.1,
              corruption_iterations: int = 1,
              testing_iterations: int = 5,
              gumbel_t: float = 0.0,
              weight_tie: bool = False,
              activation: str = "sigmoid",
              use_predicate_rep: bool = False,
              subtract_gold: bool = False,
              fw_update: float = 0,
              graph_type: int = 0) -> None:
     super(SRLRefiner, self).__init__()
     self.gumbel_t = gumbel_t
     self.stright_through = stright_through
     self.iterations = iterations
     self.hidden_dim = hidden_dim
     self._dropout = Dropout(dropout)
     self.testing_iterations = testing_iterations
     self.dropout = dropout
     self.corruption_rate = corruption_rate
     self._corrupt_mask = lambda x: torch.bernoulli(
         x.data.new(x.data.size()[:-1]).fill_(1 - self.corruption_rate)
     ).unsqueeze(-1)
     self.corruption_iterations = corruption_iterations
     self.weight_tie = weight_tie
     self.activation = Activation.by_name(activation)()
     self.use_predicate_rep = use_predicate_rep
     self.subtract_gold = subtract_gold
     self.graph_type = graph_type
     self.fw_update = fw_update
 def __init__(self,
              input_dim: int,
              hidden_dim: int,
              activation: Activation = None) -> None:
     super().__init__()
     self._weight_matrix = nn.Linear(input_dim, hidden_dim)
     self._activation = activation or Activation.by_name('relu')()
Exemple #13
0
    def __init__(self,
                 vocab_size,
                 embed_size,
                 projection,
                 hidden_size=None,
                 activation: Activation = Activation.by_name('linear'),
                 pre_embed=None):
        super(EncoderAverage, self).__init__()
        self.vocab_size = vocab_size
        self.embed_size = embed_size

        if pre_embed is not None:
            print("Setting Embedding")
            weight = torch.Tensor(pre_embed)
            weight[0, :].zero_()

            self.embedding = nn.Embedding(vocab_size,
                                          embed_size,
                                          _weight=weight,
                                          padding_idx=0)
        else:
            self.embedding = nn.Embedding(vocab_size,
                                          embed_size,
                                          padding_idx=0)

        if projection:
            self.projection = nn.Linear(embed_size, hidden_size)
            self.output_size = hidden_size
        else:
            self.projection = lambda s: s
            self.output_size = embed_size

        self.activation = activation
Exemple #14
0
 def __init__(self,
              num_layers: int,
              in_channel: int,
              hidden_channel: int,
              kernel_size: int = 3,
              stride: int = 2,
              nonlinearity: Activation = Activation.by_name('linear')()):
     super(CNN, self).__init__()
     self._in_channel = in_channel
     self._hidden_channel = hidden_channel
     self._kernel_size = kernel_size
     self._stride = stride
     self._num_layers = num_layers
     layers = []
     for l in range(num_layers):
         in_channel = self._in_channel if l == 0 else self._hidden_channel
         conv = LengthAwareWrapper(
             nn.Conv2d(in_channel,
                       self._hidden_channel,
                       self._kernel_size,
                       stride=self._stride,
                       padding=1))
         bn = LengthAwareWrapper(nn.BatchNorm2d(self._hidden_channel),
                                 pass_through=True)
         layers.append((f"conv{l}", conv))
         layers.append((f"bn{l}", bn))
         layers.append((f"nonlinear{l}",
                        LengthAwareWrapper(nonlinearity,
                                           pass_through=True)))
     self.module = nn.Sequential(OrderedDict(layers))
     strides = [
         self.module[idx].stride for idx in range(len(self.module))
         if hasattr(self.module[idx], "stride")
     ]
     self._downsample_rate = reduce(lambda x, y: x * y, strides)
Exemple #15
0
 def __init__(self,
              input_dim: int,
              activation: Optional[Activation] = None,
              normalise: bool = True) -> None:
     super().__init__(normalise)
     self._weights = torch.nn.Linear(in_features=input_dim,
                                     out_features=input_dim)
     self._activation = activation or Activation.by_name('relu')()
Exemple #16
0
 def from_params(cls, params: Params) -> 'LinearMatrixAttention':
     tensor_1_dim = params.pop_int("tensor_1_dim")
     tensor_2_dim = params.pop_int("tensor_2_dim")
     combination = params.pop("combination", "x,y")
     activation = Activation.by_name(params.pop("activation", "linear"))()
     params.assert_empty(cls.__name__)
     return cls(tensor_1_dim=tensor_1_dim,
                tensor_2_dim=tensor_2_dim,
                combination=combination,
                activation=activation)
Exemple #17
0
    def initialize_network(self, n_tags: int, sense_dim: int, rep_dim: int):
        self.n_tags = n_tags

        self._arc_tag_arg_enc = Linear(rep_dim, self.hidden_dim)

        if self.use_predicate_rep:
            self._arc_tag_pred_enc = Linear(rep_dim, self.hidden_dim)

        if self.graph_type != 2:
            self._arc_tag_sense_enc = Linear(sense_dim, self.hidden_dim)

        if self.graph_type == 1:
            self._arc_tag_tags_enc = Linear(n_tags + 1, self.hidden_dim)
        elif self.graph_type == 2:
            self._arc_tag_tags_enc = Linear(n_tags + 1, self.hidden_dim)
        else:
            self._arc_tag_tags_enc = Linear(2 * n_tags + 1, self.hidden_dim)

        if self.weight_tie:
            self.arc_tag_refiner = lambda x: x.matmul(self._arc_tag_tags_enc.
                                                      weight[:, :n_tags + 1])

            if self.graph_type != 2:
                self.predicate_linear = Linear(rep_dim + n_tags + sense_dim,
                                               self.hidden_dim)
            else:
                self.predicate_linear = Linear(rep_dim + sense_dim,
                                               self.hidden_dim)

            self.predicte_refiner = lambda x: self._dropout(self.activation(self.predicate_linear(x)))\
                    .matmul(self.predicate_linear.weight[:,:sense_dim])
        else:
            self.arc_tag_refiner = FeedForward(self.hidden_dim,
                                               1,
                                               n_tags + 1,
                                               Activation.by_name("linear")(),
                                               dropout=self.dropout)

            self.predicte_refiner = FeedForward(
                rep_dim + n_tags + sense_dim,
                2, [self.hidden_dim] + [sense_dim],
                [self.activation] + [Activation.by_name("linear")()],
                dropout=self.dropout)
 def __init__(self,
              tensor_1_dim: int,
              tensor_2_dim: int,
              combination: str = 'x,y',
              activation: Activation = None) -> None:
     super().__init__()
     self._combination = combination
     combined_dim = util.get_combined_dim(combination, [tensor_1_dim, tensor_2_dim])
     self._weight_vector = Parameter(torch.Tensor(combined_dim))
     self._bias = Parameter(torch.Tensor(1))
     self._activation = activation or Activation.by_name('linear')()
     self.reset_parameters()
Exemple #19
0
 def from_params(cls, params: Params) -> 'Attention':
     tensor_1_dim = params.pop_int("tensor_1_dim")
     tensor_2_dim = params.pop_int("tensor_2_dim")
     combination = params.pop("combination", "x,y")
     activation = Activation.by_name(params.pop("activation", "linear"))()
     normalize = params.pop_bool('normalize', True)
     params.assert_empty(cls.__name__)
     return cls(normalize=normalize,
                tensor_1_dim=tensor_1_dim,
                tensor_2_dim=tensor_2_dim,
                combination=combination,
                activation=activation)
Exemple #20
0
    def __init__(self,
                 vocab: Vocabulary,
                 encoder_dim: int,
                 label_dim: int,
                 edge_dim: int,
                 activation: Activation = None) -> None:
        """
            Parameters
            ----------
            vocab : ``Vocabulary``, required
                A Vocabulary, required in order to compute sizes for input/output projections.
            encoder_dim : ``int``, required.
                The output dimension of the encoder.
            label_dim : ``int``, required.
                The dimension of the hidden layer of the MLP used for predicting the edge labels.
            edge_dim : ``int``, required.
                The dimension of the hidden layer of the MLP used for predicting edge existence.
            activation : ``Activation``, optional, (default = tanh).
                The activation function used in the MLPs.
            dropout : ``float``, optional, (default = 0.0)
                The variational dropout applied to the output of the encoder and MLP layers.
        """
        super(KGEdges, self).__init__(vocab)
        self._encoder_dim = encoder_dim
        if activation is None:
            self.activation = Activation.by_name("tanh")()
        else:
            self.activation = activation

        #edge existence:

        #these two matrices together form the feed forward network which takes the vectors of the two words in question and makes predictions from that
        #this is the trick described by Kiperwasser and Goldberg to make training faster.
        self.head_arc_feedforward = torch.nn.Linear(encoder_dim, edge_dim)
        self.child_arc_feedforward = torch.nn.Linear(
            encoder_dim, edge_dim,
            bias=False)  #bias is already added by head_arc_feedforward

        self.arc_out_layer = torch.nn.Linear(
            edge_dim, 1,
            bias=False)  # K&G don't use a bias for the output layer

        #edge labels:
        num_labels = vocab.get_vocab_size("head_tags")  # = edge labels

        #same trick again
        self.head_label_feedforward = torch.nn.Linear(encoder_dim, label_dim)
        self.child_label_feedforward = torch.nn.Linear(encoder_dim,
                                                       label_dim,
                                                       bias=False)

        self.label_out_layer = torch.nn.Linear(
            edge_dim, num_labels)  #output layer for edge labels
 def __init__(self,
              tensor_1_dim: int,
              tensor_2_dim: int,
              combination: str = 'x,y',
              activation: Activation = None) -> None:
     super().__init__()
     self._combination = combination
     combined_dim = util.get_combined_dim(combination, [tensor_1_dim, tensor_2_dim])
     self._weight_vector = Parameter(torch.Tensor(combined_dim))
     self._bias = Parameter(torch.Tensor(1))
     self._activation = activation or Activation.by_name('linear')()
     self.reset_parameters()
Exemple #22
0
    def __init__(self, hdim: int = 768, nlayers: int = 2, dropout_prob: int = 0.1):
        super(GCNNet, self).__init__()
        # self.gcns = nn.ModuleList([GCN(hdim, hdim, F.relu) for i in range(nlayers)])
        self._gcn_layers = []
        self._feedfoward_layers: List[FeedForward] = []
        self._layer_norm_layers: List[LayerNorm] = []
        self._feed_forward_layer_norm_layers: List[LayerNorm] = []
        feedfoward_input_dim, feedforward_hidden_dim, hidden_dim = hdim, hdim, hdim
        for i in range(nlayers):
            feedfoward = FeedForward(feedfoward_input_dim,
                                     activations=[Activation.by_name('relu')(),
                                                  Activation.by_name('linear')()],
                                     hidden_dims=[feedforward_hidden_dim, hidden_dim],
                                     num_layers=2,
                                     dropout=dropout_prob)

            # Note: Please use `ModuleList` in new code. It provides better
            # support for running on multiple GPUs. We've kept `add_module` here
            # solely for backwards compatibility with existing serialized models.
            self.add_module(f"feedforward_{i}", feedfoward)
            self._feedfoward_layers.append(feedfoward)

            feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim())
            self.add_module(f"feedforward_layer_norm_{i}", feedforward_layer_norm)
            self._feed_forward_layer_norm_layers.append(feedforward_layer_norm)

            gcn = GCN(hdim, hdim, F.relu)
            self.add_module(f"gcn_{i}", gcn)
            self._gcn_layers.append(gcn)

            layer_norm = LayerNorm(hdim)
            self.add_module(f"layer_norm_{i}", layer_norm)
            self._layer_norm_layers.append(layer_norm)

            feedfoward_input_dim = hidden_dim

        self.dropout = Dropout(dropout_prob)
        self._input_dim = hdim
        self._output_dim = hdim
 def __init__(self,
              tensor_1_dim,
              tensor_2_dim,
              combination=u'x,y',
              activation=None):
     super(LinearMatrixAttention, self).__init__()
     self._combination = combination
     combined_dim = util.get_combined_dim(combination,
                                          [tensor_1_dim, tensor_2_dim])
     self._weight_vector = Parameter(torch.Tensor(combined_dim))
     self._bias = Parameter(torch.Tensor(1))
     self._activation = activation or Activation.by_name(u'linear')()
     self.reset_parameters()
Exemple #24
0
 def __init__(self,
              u_input_dim: int,
              v_input_dim: int,
              projection_dim: int,
              activation: Optional[Activation] = None) -> None:
     super(HeterogenousSequenceAttention, self).__init__()
     self._output_dim = projection_dim
     self._u_input_dim = u_input_dim
     self._v_input_dim = v_input_dim
     self._u_projection = torch.nn.Linear(in_features=u_input_dim,
                                          out_features=projection_dim)
     self._v_projection = torch.nn.Linear(in_features=v_input_dim,
                                          out_features=projection_dim)
     self._activation = activation or Activation.by_name('relu')()
Exemple #25
0
    def __init__(self, params: Params, vocab: Vocabulary) -> None:
        super().__init__(vocab=vocab)

        enc_hidden_dim = params.pop_int('enc_hidden_dim', 300)
        disc_hidden_dim = params.pop_int('disc_hidden_dim', 1200)
        disc_num_layers = params.pop_int('disc_num_layers', 1)

        emb_dropout = params.pop_float('emb_dropout', 0.0)
        disc_dropout = params.pop_float('disc_dropout', 0.0)
        l2_weight = params.pop_float('l2_weight', 0.0)

        self.emb_dropout = nn.Dropout(emb_dropout)
        self.disc_dropout = nn.Dropout(disc_dropout)
        self._l2_weight = l2_weight

        self._token_embedder = Embedding.from_params(
            vocab=vocab, params=params.pop('token_embedder'))
        self._discriminator_encoder = PytorchSeq2VecWrapper(
            nn.LSTM(input_size=self._token_embedder.get_output_dim(),
                    hidden_size=enc_hidden_dim,
                    batch_first=True))
        self._discriminator = FeedForward(
            input_dim=4 * self._discriminator_encoder.get_output_dim(),
            hidden_dims=[disc_hidden_dim] * disc_num_layers +
            [self._NUM_LABELS],
            num_layers=disc_num_layers + 1,
            activations=[Activation.by_name('relu')()] * disc_num_layers +
            [Activation.by_name('linear')()])

        # Metrics
        self._metrics = {
            'labeled': {
                'discriminator_entropy': ScalarMetric(),
                'discriminator_accuracy': CategoricalAccuracy(),
                'loss': ScalarMetric()
            }
        }
    def __init__(self, word_embeddings: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder, dropout_p: int,
                 vocab: Vocabulary) -> None:
        super().__init__(vocab)

        self.word_embeddings = word_embeddings

        self.embedding2input = FeedForward(
            input_dim=word_embeddings.get_output_dim(),
            num_layers=1,
            hidden_dims=encoder.get_input_dim(),
            activations=Activation.by_name('relu')(),
            dropout=dropout_p)

        self.encoder = encoder

        self.hidden2intermediate = FeedForward(
            input_dim=encoder.get_output_dim(),
            num_layers=1,
            hidden_dims=int(encoder.get_output_dim() / 2),
            activations=Activation.by_name('relu')(),
            dropout=dropout_p)

        self.intermediate2tag = nn.Linear(
            in_features=int(encoder.get_output_dim() / 2),
            out_features=vocab.get_vocab_size('labels'))

        # self.accuracy = CategoricalAccuracy()

        label_vocab = vocab.get_token_to_index_vocabulary('labels').copy()
        # print("label_vocab: ", label_vocab)
        [label_vocab.pop(x) for x in ['O', 'OR']]
        labels_for_metric = list(label_vocab.values())
        # print("labels_for_metric: ", labels_for_metric)
        self.accuracy = CustomFBetaMeasure(beta=1.0,
                                           average='micro',
                                           labels=labels_for_metric)
 def __init__(
     self,
     tensor_1_dim: int,
     tensor_2_dim: int,
     combination: str = "x,y",
     activation: Activation = None,
     normalize: bool = True,
 ) -> None:
     super().__init__(normalize)
     self._combination = combination
     combined_dim = util.get_combined_dim(combination, [tensor_1_dim, tensor_2_dim])
     self._weight_vector = Parameter(torch.Tensor(combined_dim))
     self._bias = Parameter(torch.Tensor(1))
     self._activation = activation or Activation.by_name("linear")()
     self.reset_parameters()
Exemple #28
0
    def __init__(self,
                 vocab: Vocabulary,
                 title_embedder: TextFieldEmbedder,
                 abstract_embedder: TextFieldEmbedder,
                 dense_dim=75) -> None:

        super().__init__(vocab)

        self.title_embedder = title_embedder
        self.abstract_embedder = abstract_embedder
        self.intermediate_dim = 6
        self.n_layers = 3
        self.layer_dims = [dense_dim for i in range(self.n_layers - 1)]
        self.layer_dims.append(1)

        self.activations = [
            Activation.by_name("elu")(),
            Activation.by_name("elu")(),
            Activation.by_name("sigmoid")()
        ]
        self.layers = FeedForward(input_dim=self.intermediate_dim,
                                  num_layers=self.n_layers,
                                  hidden_dims=self.layer_dims,
                                  activations=self.activations)
Exemple #29
0
    def __init__(self,
                 embedding_dim: int,
                 pooling: str = "sum",
                 projection_dim: Optional[int] = None,
                 activation: Optional[str] = None) -> None:
        super().__init__()

        self._embedding_dim = embedding_dim
        self._pooling = pooling
        self._projection_dim = projection_dim

        self._activation = Activation.by_name(activation) if activation else None

        self._projection = (torch.nn.Linear(self._embedding_dim,
                                            self._projection_dim)
                            if projection_dim
                            else lambda x: x)
Exemple #30
0
    def __init__(
        self,
        hidden_dim: int,
        tag_dim: int,
        activation: str,
        embedding_dim: int,
    ):
        super(ContinuousEncoder, self).__init__()

        self.hidden_dim = hidden_dim
        self.tag_dim = tag_dim
        self.activation = Activation.by_name(activation)()
        self.embedding_dim = embedding_dim

        # ============= Covariance matrix & Mean vector ================
        interm_layer_size = (self.embedding_dim + self.hidden_dim) // 2
        self.linear_layer = nn.Linear(self.embedding_dim, interm_layer_size)
        self.linear_layer3 = nn.Linear(interm_layer_size, self.hidden_dim)

        self.hidden2mean = nn.Linear(self.hidden_dim, self.tag_dim)
        self.hidden2std = nn.Linear(self.hidden_dim, self.tag_dim)
    def __init__(self,
                 vocab_size,
                 embed_size,
                 hidden_size,
                 kernel_sizes,
                 activation: Activation = Activation.by_name('relu'),
                 pre_embed=None):
        super(EncoderCNN, self).__init__()
        self.vocab_size = vocab_size
        self.embed_size = embed_size

        if pre_embed is not None:
            print("Setting Embedding")
            weight = torch.Tensor(pre_embed)
            weight[0, :].zero_()

            self.embedding = nn.Embedding(vocab_size,
                                          embed_size,
                                          _weight=weight,
                                          padding_idx=0)
        else:
            self.embedding = nn.Embedding(vocab_size,
                                          embed_size,
                                          padding_idx=0)

        self.hidden_size = hidden_size

        convs = {}
        for i in range(len(kernel_sizes)):
            convs[str(i)] = nn.Conv1d(embed_size,
                                      hidden_size,
                                      kernel_sizes[i],
                                      padding=int((kernel_sizes[i] - 1) // 2))

        self.convolutions = nn.ModuleDict(convs)
        self.activation = activation

        self.output_size = hidden_size * len(kernel_sizes)
    def __init__(self,
                 output_dim: int,
                 embeddings: Embeddings,
                 filters: List[Tuple[int, int]],
                 n_highway: int,
                 activation: str,
                 use_cuda: bool,
                 input_field_name: str = None):
        super(ConvTokenEmbedder, self).__init__(input_field_name)
        self.embeddings = embeddings
        self.output_dim = output_dim
        self.use_cuda = use_cuda
        self.filters = filters

        convolutions = []
        for i, (width, num) in enumerate(filters):
            conv = torch.nn.Conv1d(in_channels=embeddings.n_d,
                                   out_channels=num,
                                   kernel_size=width,
                                   bias=True)
            convolutions.append(conv)

        self.convolutions = torch.nn.ModuleList(convolutions)

        self.n_filters = sum(f[1] for f in filters)
        self.n_highway = n_highway

        self.highways = Highway(self.n_filters,
                                self.n_highway,
                                activation=torch.nn.functional.relu)

        self.activation = Activation.by_name(activation)()
        self.projection = torch.nn.Linear(self.n_filters,
                                          output_dim,
                                          bias=True)
        self.reset_parameters()