예제 #1
0
    def __init__(self,
                 channels: int = 4,
                 units: int = 10,
                 activation: typing.Union[str, typing.Type[nn.Module],
                                          nn.Module] = 'tanh',
                 recurrent_activation: typing.Union[str,
                                                    typing.Type[nn.Module],
                                                    nn.Module] = 'sigmoid',
                 direction: str = 'lt'):
        """:class:`SpatialGRU` constructor."""
        super().__init__()
        self._units = units
        self._activation = parse_activation(activation)
        self._recurrent_activation = parse_activation(recurrent_activation)
        self._direction = direction
        self._channels = channels

        if self._direction not in ('lt', 'rb'):
            raise ValueError(f"Invalid direction. "
                             f"`{self._direction}` received. "
                             f"Must be in `lt`, `rb`.")

        self._input_dim = self._channels + 3 * self._units

        self._wr = nn.Linear(self._input_dim, self._units * 3)
        self._wz = nn.Linear(self._input_dim, self._units * 4)
        self._w_ij = nn.Linear(self._channels, self._units)
        self._U = nn.Linear(self._units * 3, self._units, bias=False)

        self.reset_parameters()
예제 #2
0
    def build(self):
        """Build model structure."""
        self.embedding = self._make_default_embedding_layer()

        self.q_convs = nn.ModuleList()
        self.d_convs = nn.ModuleList()
        for i in range(self._params['max_ngram']):
            conv = nn.Sequential(
                nn.ConstantPad1d((0, i), 0),
                nn.Conv1d(in_channels=self._params['embedding_output_dim'],
                          out_channels=self._params['filters'],
                          kernel_size=i + 1),
                parse_activation(self._params['conv_activation_func']))
            self.q_convs.append(conv)
            self.d_convs.append(conv)

        self.kernels = nn.ModuleList()
        for i in range(self._params['kernel_num']):
            mu = 1. / (self._params['kernel_num'] -
                       1) + (2. * i) / (self._params['kernel_num'] - 1) - 1.0
            sigma = self._params['sigma']
            if mu > 1.0:
                sigma = self._params['exact_sigma']
                mu = 1.0
            self.kernels.append(GaussianKernel(mu=mu, sigma=sigma))

        dim = self._params['max_ngram']**2 * self._params['kernel_num']
        self.out = self._make_output_layer(dim)
예제 #3
0
    def build(self):
        """Build model structure."""
        self.lm_conv1d = nn.Conv1d(in_channels=self._params['right_length'],
                                   out_channels=self.params['lm_filters'],
                                   kernel_size=1,
                                   stride=1)
        lm_mlp_size = self._params['left_length'] * self._params['lm_filters']
        self.lm_mlp = self._make_multi_layer_perceptron_layer(lm_mlp_size)
        self.lm_linear = self._make_perceptron_layer(
            in_features=self._params['mlp_num_fan_out'], out_features=1)

        self.dm_conv_activation_func = parse_activation(
            self._params['dm_conv_activation_func'])
        self.dm_conv_left = nn.Conv1d(self._params['vocab_size'],
                                      self._params['dm_filters'],
                                      self._params['dm_kernel_size'])
        self.dm_mlp_left = self._make_perceptron_layer(
            in_features=self._params['dm_filters'],
            out_features=self._params['dm_filters'])
        self.dm_conv1_right = nn.Conv1d(self._params['vocab_size'],
                                        self._params['dm_filters'],
                                        self._params['dm_kernel_size'])
        self.dm_conv2_right = nn.Conv1d(self._params['dm_filters'],
                                        self._params['dm_filters'], 1)
        dm_mp_size = ((self._params['right_length'] -
                       self._params['dm_kernel_size'] + 1) //
                      (self._params['dm_right_pool_size']) *
                      self._params['dm_filters'])
        self.dm_mlp = self._make_multi_layer_perceptron_layer(dm_mp_size)
        self.dm_linear = self._make_perceptron_layer(
            in_features=self._params['mlp_num_fan_out'], out_features=1)

        self.dropout = nn.Dropout(self._params['dropout_rate'])

        self.out = self._make_output_layer(1)
예제 #4
0
    def _create_base_network(self) -> nn.Module:
        """
        Apply conv and maxpooling operation towards to each letter-ngram.

        The input shape is `fixed_text_length`*`number of letter-ngram`,
        as described in the paper, `n` is 3, `number of letter-trigram`
        is about 30,000 according to their observation.

        :return: A :class:`nn.Module` of CDSSM network, tensor in tensor out.
        """
        pad = nn.ConstantPad1d((0, self._params['kernel_size'] - 1), 0)
        conv = nn.Conv1d(
            in_channels=self._params['vocab_size'],
            out_channels=self._params['filters'],
            kernel_size=self._params['kernel_size']
        )
        activation = parse_activation(
            self._params['conv_activation_func']
        )
        dropout = nn.Dropout(p=self._params['dropout_rate'])
        pool = nn.AdaptiveMaxPool1d(1)
        squeeze = Squeeze()
        mlp = self._make_multi_layer_perceptron_layer(
            self._params['filters']
        )
        return nn.Sequential(
            pad, conv, activation, dropout, pool, squeeze, mlp
        )
예제 #5
0
    def build(self):
        """
        Build model structure.

        MatchPyramid text matching as image recognition.
        """
        self.embedding = self._make_default_embedding_layer()

        # Interaction
        self.matching = Matching(matching_type='dot')

        # Build conv
        activation = parse_activation(self._params['activation'])
        in_channel_2d = [1, *self._params['kernel_count'][:-1]]
        conv2d = [
            self._make_conv_pool_block(ic, oc, ks, activation)
            for ic, oc, ks, in zip(in_channel_2d, self._params['kernel_count'],
                                   self._params['kernel_size'])
        ]
        self.conv2d = nn.Sequential(*conv2d)

        # Dynamic Pooling
        self.dpool_layer = nn.AdaptiveAvgPool2d(self._params['dpool_size'])

        self.dropout = nn.Dropout(p=self._params['dropout_rate'])

        left_length = self._params['dpool_size'][0]
        right_length = self._params['dpool_size'][1]

        # Build output
        self.out = self._make_output_layer(left_length * right_length *
                                           self._params['kernel_count'][-1])
예제 #6
0
    def build(self):
        """
        Build model structure.

        ArcI use Siamese arthitecture.
        """
        self.embedding = self._make_default_embedding_layer()

        # Build conv
        activation = parse_activation(self._params['conv_activation_func'])
        left_in_channels = [
            self._params['embedding_output_dim'],
            *self._params['left_filters'][:-1]
        ]
        right_in_channels = [
            self._params['embedding_output_dim'],
            *self._params['right_filters'][:-1]
        ]
        conv_left = [
            self._make_conv_pool_block(ic, oc, ks, activation, ps)
            for ic, oc, ks, ps in zip(left_in_channels,
                                      self._params['left_filters'],
                                      self._params['left_kernel_sizes'],
                                      self._params['left_pool_sizes'])
        ]
        conv_right = [
            self._make_conv_pool_block(ic, oc, ks, activation, ps)
            for ic, oc, ks, ps in zip(right_in_channels,
                                      self._params['right_filters'],
                                      self._params['right_kernel_sizes'],
                                      self._params['right_pool_sizes'])
        ]
        self.conv_left = nn.Sequential(*conv_left)
        self.conv_right = nn.Sequential(*conv_right)

        self.dropout = nn.Dropout(p=self._params['dropout_rate'])

        left_length = self._params['left_length']
        right_length = self._params['right_length']
        for ps in self._params['left_pool_sizes']:
            left_length = left_length // ps
        for ps in self._params['right_pool_sizes']:
            right_length = right_length // ps
        self.mlp = self._make_multi_layer_perceptron_layer(
            left_length * self._params['left_filters'][-1] + (
                right_length * self._params['right_filters'][-1])
        )

        self.out = self._make_output_layer(
            self._params['mlp_num_fan_out']
        )
예제 #7
0
    def build(self):
        """
        Build model structure.

        ArcII has the desirable property of letting two sentences meet before
        their own high-level representations mature.
        """
        self.embedding = self._make_default_embedding_layer()

        # Phrase level representations
        self.conv1d_left = nn.Sequential(
            nn.ConstantPad1d((0, self._params['kernel_1d_size'] - 1), 0),
            nn.Conv1d(in_channels=self._params['embedding_output_dim'],
                      out_channels=self._params['kernel_1d_count'],
                      kernel_size=self._params['kernel_1d_size']))
        self.conv1d_right = nn.Sequential(
            nn.ConstantPad1d((0, self._params['kernel_1d_size'] - 1), 0),
            nn.Conv1d(in_channels=self._params['embedding_output_dim'],
                      out_channels=self._params['kernel_1d_count'],
                      kernel_size=self._params['kernel_1d_size']))

        # Interaction
        self.matching = Matching(matching_type='plus')

        # Build conv
        activation = parse_activation(self._params['activation'])
        in_channel_2d = [
            self._params['kernel_1d_count'],
            *self._params['kernel_2d_count'][:-1]
        ]
        conv2d = [
            self._make_conv_pool_block(ic, oc, ks, activation, ps)
            for ic, oc, ks, ps in
            zip(in_channel_2d, self._params['kernel_2d_count'],
                self._params['kernel_2d_size'], self._params['pool_2d_size'])
        ]
        self.conv2d = nn.Sequential(*conv2d)

        self.dropout = nn.Dropout(p=self._params['dropout_rate'])

        left_length = self._params['left_length']
        right_length = self._params['right_length']
        for ps in self._params['pool_2d_size']:
            left_length = left_length // ps[0]
        for ps in self._params['pool_2d_size']:
            right_length = right_length // ps[1]

        # Build output
        self.out = self._make_output_layer(left_length * right_length *
                                           self._params['kernel_2d_count'][-1])
예제 #8
0
 def _make_output_layer(self, in_features: int = 0) -> nn.Module:
     """:return: a correctly shaped torch module for model output."""
     task = self._params['task']
     if isinstance(task, tasks.Classification):
         out_features = task.num_classes
     elif isinstance(task, tasks.Ranking):
         out_features = 1
     else:
         raise ValueError(f"{task} is not a valid task type. "
                          f"Must be in `Ranking` and `Classification`.")
     if self._params['out_activation_func']:
         return nn.Sequential(
             nn.Linear(in_features, out_features),
             parse_activation(self._params['out_activation_func']))
     else:
         return nn.Linear(in_features, out_features)
예제 #9
0
    def _make_multi_layer_perceptron_layer(self, in_features) -> nn.Module:
        """:return: a multiple layer perceptron."""
        if not self._params['with_multi_layer_perceptron']:
            raise AttributeError(
                'Parameter `with_multi_layer_perception` not set.')

        activation = parse_activation(self._params['mlp_activation_func'])
        mlp_sizes = [
            in_features,
            *self._params['mlp_num_layers'] * [self._params['mlp_num_units']],
            self._params['mlp_num_fan_out']
        ]
        mlp = [
            self._make_perceptron_layer(in_f, out_f, activation)
            for in_f, out_f in zip(mlp_sizes, mlp_sizes[1:])
        ]
        return nn.Sequential(*mlp)
예제 #10
0
 def _make_output_layer(
         self,
         in_features: int = 0,
         activation: typing.Union[str, nn.Module] = None) -> nn.Module:
     """:return: a correctly shaped torch module for model output."""
     task = self._params['task']
     if isinstance(task, tasks.Classification):
         return nn.Sequential(nn.Linear(in_features, task.num_classes),
                              nn.Softmax(dim=-1))
     elif isinstance(task, tasks.Ranking):
         if activation:
             return nn.Sequential(nn.Linear(in_features, 1),
                                  parse_activation(activation))
         else:
             return nn.Linear(in_features, 1)
     else:
         raise ValueError(f"{task} is not a valid task type. "
                          f"Must be in `Ranking` and `Classification`.")
예제 #11
0
    def build(self):
        """
        Build model structure.

        aNMM: Ranking Short Answer Texts with Attention-Based Neural Matching Model.
        """
        self.embedding = self._make_default_embedding_layer()

        # QA Matching
        self.matching = Matching(matching_type='dot', normalize=True)

        # Value-shared Weighting
        activation = parse_activation(self._params['activation'])
        in_hidden_size = [
            self._params['num_bins'],
            *self._params['hidden_sizes']
        ]
        out_hidden_size = [
            *self._params['hidden_sizes'],
            1
        ]

        hidden_layers = [
            nn.Sequential(
                nn.Linear(in_size, out_size),
                activation
            )
            for in_size, out_size, in zip(
                in_hidden_size,
                out_hidden_size
            )
        ]
        self.hidden_layers = nn.Sequential(*hidden_layers)

        # Query Attention
        self.q_attention = Attention(self._params['embedding_output_dim'])

        self.dropout = nn.Dropout(p=self._params['dropout_rate'])

        # Build output
        self.out = self._make_output_layer(1)