Beispiel #1
0
    def __init__(self,
                 embed_size=15,
                 num_filters=(25, 50, 75, 100, 125, 150),
                 ngram_filter_sizes=(1, 2, 3, 4, 5, 6),
                 conv_layer_activation='tanh',
                 num_highway=1,
                 highway_layer_activation='relu',
                 highway_bias=HighwayBias(nonlinear_transform_bias=0.0,
                                          transform_gate_bias=-2.0),
                 output_size=None,
                 **kwargs):
        super(ConvolutionalEncoder, self).__init__(**kwargs)

        self._embed_size = embed_size
        self._num_filters = num_filters
        self._ngram_filter_sizes = ngram_filter_sizes
        self._num_highway = num_highway
        self._output_size = output_size

        with self.name_scope():
            self._convs = gluon.contrib.nn.HybridConcurrent()
            maxpool_output_size = 0
            with self._convs.name_scope():
                for num_filter, ngram_size in zip(self._num_filters,
                                                  self._ngram_filter_sizes):
                    seq = nn.HybridSequential()
                    seq.add(
                        nn.Conv1D(in_channels=self._embed_size,
                                  channels=num_filter,
                                  kernel_size=ngram_size,
                                  use_bias=True))
                    seq.add(
                        gluon.nn.HybridLambda(lambda F, x: F.max(x, axis=2)))
                    if conv_layer_activation is not None:
                        seq.add(nn.Activation(conv_layer_activation))
                    self._convs.add(seq)
                    maxpool_output_size += num_filter

            if self._num_highway:
                self._highways = Highway(maxpool_output_size,
                                         self._num_highway,
                                         activation=highway_layer_activation,
                                         highway_bias=highway_bias)
            else:
                self._highways = None
            if self._output_size:
                self._projection = nn.Dense(in_units=maxpool_output_size,
                                            units=self._output_size,
                                            use_bias=True)
            else:
                self._projection = None
                self._output_size = maxpool_output_size
Beispiel #2
0
    def __init__(self,
                 input_size,
                 num_layers,
                 activation='relu',
                 highway_bias=HighwayBias(nonlinear_transform_bias=0.0, transform_gate_bias=-2.0),
                 **kwargs):
        super(Highway, self).__init__(**kwargs)
        self._input_size = input_size
        self._num_layers = num_layers

        with self.name_scope():
            self.hnet = nn.HybridSequential()
            with self.hnet.name_scope():
                for _ in range(self._num_layers):
                    self.hnet.add(nn.Dense(units=self._input_size * 2,
                                           in_units=self._input_size,
                                           bias_initializer=highway_bias,
                                           use_bias=True,
                                           flatten=False))
            self._activation = nn.Activation(activation)
Beispiel #3
0
    def __init__(self, **kwargs):
        super(QANet, self).__init__(**kwargs)
        with self.name_scope():
            self.flatten = gluon.nn.Flatten()
            self.dropout = gluon.nn.Dropout(opt.layers_dropout)
            self.char_conv = ConvolutionalEncoder(
                embed_size=opt.char_emb_dim,
                num_filters=opt.char_conv_filters,
                ngram_filter_sizes=opt.char_conv_ngrams,
                conv_layer_activation=None,
                num_highway=0)

        self.highway = gluon.nn.HybridSequential()
        with self.highway.name_scope():
            self.highway.add(
                gluon.nn.Dense(units=opt.emb_encoder_conv_channels,
                               flatten=False,
                               use_bias=False,
                               weight_initializer=Xavier()))
            self.highway.add(
                Highway(input_size=opt.emb_encoder_conv_channels,
                        num_layers=opt.highway_layers,
                        activation='relu',
                        highway_bias=HighwayBias(nonlinear_transform_bias=0.0,
                                                 transform_gate_bias=0.0)))

        self.word_emb = gluon.nn.HybridSequential()
        with self.word_emb.name_scope():
            self.word_emb.add(
                gluon.nn.Embedding(input_dim=opt.word_corpus,
                                   output_dim=opt.word_emb_dim))
            self.word_emb.add(gluon.nn.Dropout(rate=opt.word_emb_dropout))
        self.char_emb = gluon.nn.HybridSequential()
        with self.char_emb.name_scope():
            self.char_emb.add(
                gluon.nn.Embedding(input_dim=opt.character_corpus,
                                   output_dim=opt.char_emb_dim,
                                   weight_initializer=Normal(sigma=0.1)))
            self.char_emb.add(gluon.nn.Dropout(rate=opt.char_emb_dropout))

        with self.name_scope():
            self.emb_encoder = Encoder(
                kernel_size=opt.emb_encoder_conv_kernerl_size,
                num_filters=opt.emb_encoder_conv_channels,
                conv_layers=opt.emb_encoder_num_conv_layers,
                num_heads=opt.emb_encoder_num_head,
                num_blocks=opt.emb_encoder_num_block)

            self.project = gluon.nn.Dense(units=opt.emb_encoder_conv_channels,
                                          flatten=False,
                                          use_bias=False,
                                          weight_initializer=Xavier())

        with self.name_scope():
            self.co_attention = CoAttention()

        with self.name_scope():
            self.model_encoder = Encoder(
                kernel_size=opt.model_encoder_conv_kernel_size,
                num_filters=opt.model_encoder_conv_channels,
                conv_layers=opt.model_encoder_conv_layers,
                num_heads=opt.model_encoder_num_head,
                num_blocks=opt.model_encoder_num_block)

        with self.name_scope():
            self.predict_begin = gluon.nn.Dense(
                units=1,
                use_bias=True,
                flatten=False,
                weight_initializer=Xavier(rnd_type='uniform',
                                          factor_type='in',
                                          magnitude=1),
                bias_initializer=Uniform(1.0 /
                                         opt.model_encoder_conv_channels))
            self.predict_end = gluon.nn.Dense(
                units=1,
                use_bias=True,
                flatten=False,
                weight_initializer=Xavier(rnd_type='uniform',
                                          factor_type='in',
                                          magnitude=1),
                bias_initializer=Uniform(1.0 /
                                         opt.model_encoder_conv_channels))