def __init__(self, input_dim: int, hidden_dim: int, projection_dim: int, feedforward_hidden_dim: int, num_layers: int, num_attention_heads: int, use_positional_encoding: bool = True, dropout_prob: float = 0.1, residual_dropout_prob: float = 0.2, attention_dropout_prob: float = 0.1) -> None: super(StackedSelfAttentionEncoder, self).__init__() self._use_positional_encoding = use_positional_encoding self._attention_layers: List[MultiHeadSelfAttention] = [] self._feedfoward_layers: List[FeedForward] = [] self._layer_norm_layers: List[LayerNorm] = [] self._feed_forward_layer_norm_layers: List[LayerNorm] = [] feedfoward_input_dim = input_dim for i in range(num_layers): feedfoward = FeedForward( feedfoward_input_dim, activations=[ Activation.by_name('relu')(), Activation.by_name('linear')() ], hidden_dims=[feedforward_hidden_dim, hidden_dim], num_layers=2, dropout=dropout_prob) # Note: Please use `ModuleList` in new code. It provides better # support for running on multiple GPUs. We've kept `add_module` here # solely for backwards compatibility with existing serialized models. self.add_module(f"feedforward_{i}", feedfoward) self._feedfoward_layers.append(feedfoward) feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim()) self.add_module(f"feedforward_layer_norm_{i}", feedforward_layer_norm) self._feed_forward_layer_norm_layers.append(feedforward_layer_norm) self_attention = MultiHeadSelfAttention( num_heads=num_attention_heads, input_dim=hidden_dim, attention_dim=projection_dim, values_dim=projection_dim, attention_dropout_prob=attention_dropout_prob) self.add_module(f"self_attention_{i}", self_attention) self._attention_layers.append(self_attention) layer_norm = LayerNorm(self_attention.get_output_dim()) self.add_module(f"layer_norm_{i}", layer_norm) self._layer_norm_layers.append(layer_norm) feedfoward_input_dim = hidden_dim self.dropout = Dropout(residual_dropout_prob) self._input_dim = input_dim self._output_dim = self._attention_layers[-1].get_output_dim()
def __init__(self, input_dim: int, hidden_dim: int, projection_dim: int, feedforward_hidden_dim: int, num_layers: int, num_attention_heads: int, use_positional_encoding: bool = True, dropout_prob: float = 0.1, residual_dropout_prob: float = 0.2, attention_dropout_prob: float = 0.1) -> None: super(StackedSelfAttentionEncoder, self).__init__() self._use_positional_encoding = use_positional_encoding self._attention_layers: List[MultiHeadSelfAttention] = [] self._feedfoward_layers: List[FeedForward] = [] self._layer_norm_layers: List[LayerNorm] = [] self._feed_forward_layer_norm_layers: List[LayerNorm] = [] feedfoward_input_dim = input_dim for i in range(num_layers): feedfoward = FeedForward( feedfoward_input_dim, activations=[ Activation.by_name('relu')(), Activation.by_name('linear')() ], hidden_dims=[feedforward_hidden_dim, hidden_dim], num_layers=2, dropout=dropout_prob) self.add_module(f"feedforward_{i}", feedfoward) self._feedfoward_layers.append(feedfoward) feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim()) self.add_module(f"feedforward_layer_norm_{i}", feedforward_layer_norm) self._feed_forward_layer_norm_layers.append(feedforward_layer_norm) self_attention = MultiHeadSelfAttention( num_heads=num_attention_heads, input_dim=hidden_dim, attention_dim=projection_dim, values_dim=projection_dim, attention_dropout_prob=attention_dropout_prob) self.add_module(f"self_attention_{i}", self_attention) self._attention_layers.append(self_attention) layer_norm = LayerNorm(self_attention.get_output_dim()) self.add_module(f"layer_norm_{i}", layer_norm) self._layer_norm_layers.append(layer_norm) feedfoward_input_dim = hidden_dim self.dropout = Dropout(residual_dropout_prob) self._input_dim = input_dim self._output_dim = self._attention_layers[-1].get_output_dim()
def __init__(self, input_dim: int, hidden_dim: int, projection_dim: int, feedforward_hidden_dim: int, num_layers: int, num_attention_heads: int, use_positional_encoding: bool = True, dropout_prob: float = 0.1, residual_dropout_prob: float = 0.2, attention_dropout_prob: float = 0.1) -> None: super(StackedSelfAttentionEncoder, self).__init__() self._use_positional_encoding = use_positional_encoding self._attention_layers: List[MultiHeadSelfAttention] = [] self._feedfoward_layers: List[FeedForward] = [] self._layer_norm_layers: List[LayerNorm] = [] self._feed_forward_layer_norm_layers: List[LayerNorm] = [] feedfoward_input_dim = input_dim for i in range(num_layers): feedfoward = FeedForward(feedfoward_input_dim, activations=[Activation.by_name('relu')(), Activation.by_name('linear')()], hidden_dims=[feedforward_hidden_dim, hidden_dim], num_layers=2, dropout=dropout_prob) self.add_module(f"feedforward_{i}", feedfoward) self._feedfoward_layers.append(feedfoward) feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim()) self.add_module(f"feedforward_layer_norm_{i}", feedforward_layer_norm) self._feed_forward_layer_norm_layers.append(feedforward_layer_norm) self_attention = MultiHeadSelfAttention(num_heads=num_attention_heads, input_dim=hidden_dim, attention_dim=projection_dim, values_dim=projection_dim, attention_dropout_prob=attention_dropout_prob) self.add_module(f"self_attention_{i}", self_attention) self._attention_layers.append(self_attention) layer_norm = LayerNorm(self_attention.get_output_dim()) self.add_module(f"layer_norm_{i}", layer_norm) self._layer_norm_layers.append(layer_norm) feedfoward_input_dim = hidden_dim self.dropout = Dropout(residual_dropout_prob) self._input_dim = input_dim self._output_dim = self._attention_layers[-1].get_output_dim()
def __init__(self, hdim: int = 768, nlayers: int = 2, dropout_prob: int = 0.1): super(GCNNet, self).__init__() # self.gcns = nn.ModuleList([GCN(hdim, hdim, F.relu) for i in range(nlayers)]) self._gcn_layers = [] self._feedfoward_layers: List[FeedForward] = [] self._layer_norm_layers: List[LayerNorm] = [] self._feed_forward_layer_norm_layers: List[LayerNorm] = [] feedfoward_input_dim, feedforward_hidden_dim, hidden_dim = hdim, hdim, hdim for i in range(nlayers): feedfoward = FeedForward(feedfoward_input_dim, activations=[Activation.by_name('relu')(), Activation.by_name('linear')()], hidden_dims=[feedforward_hidden_dim, hidden_dim], num_layers=2, dropout=dropout_prob) # Note: Please use `ModuleList` in new code. It provides better # support for running on multiple GPUs. We've kept `add_module` here # solely for backwards compatibility with existing serialized models. self.add_module(f"feedforward_{i}", feedfoward) self._feedfoward_layers.append(feedfoward) feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim()) self.add_module(f"feedforward_layer_norm_{i}", feedforward_layer_norm) self._feed_forward_layer_norm_layers.append(feedforward_layer_norm) gcn = GCN(hdim, hdim, F.relu) self.add_module(f"gcn_{i}", gcn) self._gcn_layers.append(gcn) layer_norm = LayerNorm(hdim) self.add_module(f"layer_norm_{i}", layer_norm) self._layer_norm_layers.append(layer_norm) feedfoward_input_dim = hidden_dim self.dropout = Dropout(dropout_prob) self._input_dim = hdim self._output_dim = hdim
def __init__(self, feed_forward: FeedForward) -> None: super(TimeDistributedFeedForwardEncoder, self).__init__() self._input_dim = feed_forward.get_input_dim() self._output_dim = feed_forward.get_output_dim() self._time_distributed_fnn = TimeDistributed(feed_forward)