def __init__(self, embed_size=15, num_filters=(25, 50, 75, 100, 125, 150), ngram_filter_sizes=(1, 2, 3, 4, 5, 6), conv_layer_activation='tanh', num_highway=1, highway_layer_activation='relu', highway_bias=HighwayBias(nonlinear_transform_bias=0.0, transform_gate_bias=-2.0), output_size=None, **kwargs): super(ConvolutionalEncoder, self).__init__(**kwargs) self._embed_size = embed_size self._num_filters = num_filters self._ngram_filter_sizes = ngram_filter_sizes self._num_highway = num_highway self._output_size = output_size with self.name_scope(): self._convs = gluon.contrib.nn.HybridConcurrent() maxpool_output_size = 0 with self._convs.name_scope(): for num_filter, ngram_size in zip(self._num_filters, self._ngram_filter_sizes): seq = nn.HybridSequential() seq.add( nn.Conv1D(in_channels=self._embed_size, channels=num_filter, kernel_size=ngram_size, use_bias=True)) seq.add( gluon.nn.HybridLambda(lambda F, x: F.max(x, axis=2))) if conv_layer_activation is not None: seq.add(nn.Activation(conv_layer_activation)) self._convs.add(seq) maxpool_output_size += num_filter if self._num_highway: self._highways = Highway(maxpool_output_size, self._num_highway, activation=highway_layer_activation, highway_bias=highway_bias) else: self._highways = None if self._output_size: self._projection = nn.Dense(in_units=maxpool_output_size, units=self._output_size, use_bias=True) else: self._projection = None self._output_size = maxpool_output_size
def __init__(self, input_size, num_layers, activation='relu', highway_bias=HighwayBias(nonlinear_transform_bias=0.0, transform_gate_bias=-2.0), **kwargs): super(Highway, self).__init__(**kwargs) self._input_size = input_size self._num_layers = num_layers with self.name_scope(): self.hnet = nn.HybridSequential() with self.hnet.name_scope(): for _ in range(self._num_layers): self.hnet.add(nn.Dense(units=self._input_size * 2, in_units=self._input_size, bias_initializer=highway_bias, use_bias=True, flatten=False)) self._activation = nn.Activation(activation)
def __init__(self, **kwargs): super(QANet, self).__init__(**kwargs) with self.name_scope(): self.flatten = gluon.nn.Flatten() self.dropout = gluon.nn.Dropout(opt.layers_dropout) self.char_conv = ConvolutionalEncoder( embed_size=opt.char_emb_dim, num_filters=opt.char_conv_filters, ngram_filter_sizes=opt.char_conv_ngrams, conv_layer_activation=None, num_highway=0) self.highway = gluon.nn.HybridSequential() with self.highway.name_scope(): self.highway.add( gluon.nn.Dense(units=opt.emb_encoder_conv_channels, flatten=False, use_bias=False, weight_initializer=Xavier())) self.highway.add( Highway(input_size=opt.emb_encoder_conv_channels, num_layers=opt.highway_layers, activation='relu', highway_bias=HighwayBias(nonlinear_transform_bias=0.0, transform_gate_bias=0.0))) self.word_emb = gluon.nn.HybridSequential() with self.word_emb.name_scope(): self.word_emb.add( gluon.nn.Embedding(input_dim=opt.word_corpus, output_dim=opt.word_emb_dim)) self.word_emb.add(gluon.nn.Dropout(rate=opt.word_emb_dropout)) self.char_emb = gluon.nn.HybridSequential() with self.char_emb.name_scope(): self.char_emb.add( gluon.nn.Embedding(input_dim=opt.character_corpus, output_dim=opt.char_emb_dim, weight_initializer=Normal(sigma=0.1))) self.char_emb.add(gluon.nn.Dropout(rate=opt.char_emb_dropout)) with self.name_scope(): self.emb_encoder = Encoder( kernel_size=opt.emb_encoder_conv_kernerl_size, num_filters=opt.emb_encoder_conv_channels, conv_layers=opt.emb_encoder_num_conv_layers, num_heads=opt.emb_encoder_num_head, num_blocks=opt.emb_encoder_num_block) self.project = gluon.nn.Dense(units=opt.emb_encoder_conv_channels, flatten=False, use_bias=False, weight_initializer=Xavier()) with self.name_scope(): self.co_attention = CoAttention() with self.name_scope(): self.model_encoder = Encoder( kernel_size=opt.model_encoder_conv_kernel_size, num_filters=opt.model_encoder_conv_channels, conv_layers=opt.model_encoder_conv_layers, num_heads=opt.model_encoder_num_head, num_blocks=opt.model_encoder_num_block) with self.name_scope(): self.predict_begin = gluon.nn.Dense( units=1, use_bias=True, flatten=False, weight_initializer=Xavier(rnd_type='uniform', factor_type='in', magnitude=1), bias_initializer=Uniform(1.0 / opt.model_encoder_conv_channels)) self.predict_end = gluon.nn.Dense( units=1, use_bias=True, flatten=False, weight_initializer=Xavier(rnd_type='uniform', factor_type='in', magnitude=1), bias_initializer=Uniform(1.0 / opt.model_encoder_conv_channels))