def _get_embed(self, embed, vocab_size, embed_size, initializer, dropout, prefix): """ Construct an embedding block. """ if embed is None: assert embed_size is not None, '"embed_size" cannot be None if "word_embed" or ' \ 'token_type_embed is not given.' with self.name_scope(): embed = nn.HybridSequential(prefix=prefix) with embed.name_scope(): embed.add( nn.Embedding(input_dim=vocab_size, output_dim=embed_size, weight_initializer=initializer)) if dropout: embed.add(nn.Dropout(rate=dropout)) assert isinstance(embed, Block) return embed
def __init__(self, vocab_size, embedding_dim, num_classes, **kwargs): super(FastTextClassificationModel, self).__init__(**kwargs) with self.name_scope(): self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.embedding = nn.Embedding( self.vocab_size, self.embedding_dim, weight_initializer=mx.init.Xavier(), dtype='float32') num_output_units = num_classes if num_classes == 2: num_output_units = 1 logging.info('Number of output units in the last layer :%s', num_output_units) self.agg_layer = MeanPoolingLayer() self.dense = nn.Dense(num_output_units)
def __init__(self, mode, vocab_size, embed_dim, hidden_dim, num_layers, dropout=0.5, **kwargs): super(RNNModel, self).__init__(**kwargs) with self.name_scope(): self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding( vocab_size, embed_dim, weight_initializer=mx.init.Uniform(0.1)) if mode == 'rnn_relu': self.rnn = rnn.RNN(hidden_dim, num_layers, activation='relu', dropout=dropout, input_size=embed_dim) elif mode == 'rnn_tanh': self.rnn = rnn.RNN(hidden_dim, num_layers, dropout=dropout, input_size=embed_dim) elif mode == 'lstm': self.rnn = rnn.LSTM(hidden_dim, num_layers, dropout=dropout, input_size=embed_dim) elif mode == 'gru': self.rnn = rnn.GRU(hidden_dim, num_layers, dropout=dropout, input_size=embed_dim) else: raise ValueError( "Invalid mode %s. Options are rnn_relu, rnn_tanh, lstm and grb" % mode) self.decoder = nn.Dense(vocab_size, in_units=hidden_dim) self.hidden_dim = hidden_dim
def __init__(self, nwords, nword_dims, word_hidden_size, word_nlayers, sentence_hidden_size, sentence_nlayers, ndoc_dims, **kwargs): super(Encoder, self).__init__(**kwargs) with self.name_scope(): self.embedding = nn.Embedding(nwords, nword_dims) self.word_rnn = gluon.rnn.LSTM(word_hidden_size, num_layers=word_nlayers, layout='NTC', bidirectional=True) self.sentence_rnn = gluon.rnn.LSTM(sentence_hidden_size, num_layers=sentence_nlayers, layout='NTC', bidirectional=True) self.fully_encoder = nn.Dense(ndoc_dims, activation='tanh', flatten=False)
def __init__(self, mode, vocab_size, num_embed, num_hidden, num_layers, dropout=0.5, tie_weights=False, **kwargs): super(RNNModel, self).__init__(**kwargs) with self.name_scope(): self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(vocab_size, num_embed) if mode == 'rnn_relu': self.rnn = rnn.RNN(num_hidden, 'relu', num_layers, dropout=dropout, input_size=num_embed) elif mode == 'rnn_tanh': self.rnn = rnn.RNN(num_hidden, num_layers, dropout=dropout, input_size=num_embed) elif mode == 'lstm': self.rnn = rnn.LSTM(num_hidden, num_layers, dropout=dropout, input_size=num_embed) elif mode == 'gru': self.rnn = rnn.GRU(num_hidden, num_layers, dropout=dropout, input_size=num_embed) else: raise ValueError("Invalid mode %s. Options are rnn_relu, " "rnn_tanh, lstm, and gru" % mode) if tie_weights: self.decoder = nn.Dense(vocab_size, in_units=num_hidden, params=self.encoder.params) else: self.decoder = nn.Dense(vocab_size, in_units=num_hidden) self.num_hidden = num_hidden
def __init__(self, vocab_size, hidden_size, prefix=None, params=None, use_tuple=False): super(RNNDecoder2, self).__init__(prefix=prefix, params=params) self._vocab_size = vocab_size self._use_tuple = use_tuple with self.name_scope(): self._embed = nn.Embedding(input_dim=vocab_size, output_dim=hidden_size) self._rnn1 = rnn.RNNCell(input_size=hidden_size, hidden_size=hidden_size) self._rnn2 = rnn.RNNCell(input_size=hidden_size, hidden_size=hidden_size) self._map_to_vocab = nn.Dense(vocab_size, in_units=hidden_size)
def net_define_eu(): net = nn.Sequential() with net.name_scope(): net.add(nn.Embedding(config.MAX_WORDS, config.EMBEDDING_DIM)) net.add(rnn.GRU(128,layout='NTC',bidirectional=True, num_layers=1, dropout=0.2)) net.add(transpose(axes=(0,2,1))) net.add(nn.GlobalMaxPool1D()) ''' net.add(FeatureBlock1()) ''' net.add(extendDim(axes=3)) net.add(PrimeConvCap(16, 32, kernel_size=(1,1), padding=(0,0),strides=(1,1))) net.add(CapFullyNGBlock(16, num_cap=12, input_units=32, units=16, route_num=3)) net.add(nn.Dropout(0.2)) net.add(nn.Dense(6, activation='sigmoid')) net.initialize(init=init.Xavier()) return net
def __init__(self, hidden_dim, output_dim, num_layers, max_seq_len, drop_prob, alignment_dim, encoder_hidden_dim, **kwargs): super(Decoder, self).__init__(**kwargs) self.max_seq_len = max_seq_len self.encoder_hidden_dim = encoder_hidden_dim self.hidden_size = hidden_dim self.num_layers = num_layers with self.name_scope(): # hidden_dim is decoder_hidden_dim # output_dim is len(output_vocab) self.embedding = nn.Embedding(output_dim, hidden_dim) self.dropout = nn.Dropout(drop_prob) # 注意力机制。 self.attention = nn.Sequential() with self.attention.name_scope(): # the layer output (*V, in_units) -> (*V, alignment_dim) # namely (*V, hidden_dim + encoder_hidden_dim) -> (*V, alignment_dim) self.attention.add( # alignment_dim is 25 nn.Dense(alignment_dim, in_units=hidden_dim + encoder_hidden_dim, activation="tanh", flatten=False)) # the layer output (*V, alignment_dim) -> (*V, 1) # activation = None self.attention.add( nn.Dense(1, in_units=alignment_dim, flatten=False)) self.rnn = rnn.GRU(hidden_dim, num_layers, dropout=drop_prob, input_size=hidden_dim) # the layer output (*V, hidden_dim) -> (*V, output_dim) # activation = None self.out = nn.Dense(output_dim, in_units=hidden_dim, flatten=False) # the layer output (*V, hidden_dim + encoder_hidden_dim) -> (*V, hidden_dim) # activation = None self.rnn_concat_input = nn.Dense(hidden_dim, in_units=hidden_dim + encoder_hidden_dim, flatten=False)
def __init__(self, vocab_size, num_hiddens, ffn_num_hiddens, num_heads, num_layers, dropout, use_bias=False, **kwargs): super(TransformerEncoder, self).__init__(**kwargs) self.num_hiddens = num_hiddens self.embedding = nn.Embedding(vocab_size, num_hiddens) self.pos_encoding = PositionalEncoding(num_hiddens, dropout) self.blks = nn.Sequential() for _ in range(num_layers): self.blks.add( EncoderBlock(num_hiddens, ffn_num_hiddens, num_heads, dropout, use_bias))
def SequentialTextCNN(config): net = nn.Sequential() with net.name_scope(): net.add( nn.Embedding(input_dim=config['vocab_size'], output_dim=config['embedding_dim'])) net.add(nn.Lambda(lambda x: x.transpose((0, 2, 1)))) net.add( nn.Conv1D(channels=config['feature_map'], kernel_size=config['kernel_size'][0], strides=1)) net.add(nn.BatchNorm(axis=1)) net.add(nn.Activation('relu')) net.add(nn.GlobalMaxPool1D()) net.add(nn.Dropout(rate=config['dropout_rate'])) net.add(nn.Dense(units=2)) return net
def __init__(self, vocab_size, word_embed_size, hidden_size, dropout=0., intra_attention=False, **kwargs): super(NLIModel, self).__init__(**kwargs) self.word_embed_size = word_embed_size self.hidden_size = hidden_size self.use_intra_attention = intra_attention with self.name_scope(): self.dropout_layer = nn.Dropout(dropout) self.word_emb = nn.Embedding(vocab_size, word_embed_size) self.lin_proj = nn.Dense(hidden_size, in_units=word_embed_size, flatten=False, use_bias=False) if self.use_intra_attention: self.intra_attention = IntraSentenceAttention(hidden_size, hidden_size, dropout) input_size = hidden_size * 2 else: self.intra_attention = None input_size = hidden_size self.model = DecomposableAttention(input_size, hidden_size, 3, dropout)
def __init__(self, vocab_size, embedding_dim, hidden_dim, batch_size, **kwargs): super(AttentionDecoder, self).__init__(**kwargs) with self.name_scope(): self.hidden_dim = hidden_dim self.batch_size = batch_size self.word_embeddings = nn.Embedding(vocab_size, embedding_dim) self.gru = rnn.GRU(hidden_dim, input_size=embedding_dim, layout="NTC") # 各系列のGRUの隠れ層とAttention層で計算したコンテ # hidden_dim*2としているのはキストベクトルを # つなぎ合わせることで長さが2倍になるため # self.hidden2linear = nn.Dense(vocab_size,in_units=hidden_dim * 2) self.hidden2linear = nn.Dense(vocab_size, in_units=hidden_dim * 2, flatten=False)
def _get_embedding(self): prefix = 'embedding0_' if self._sparse_weight: embedding = nn.Sequential(prefix=prefix) else: embedding = nn.HybridSequential(prefix=prefix) with embedding.name_scope(): if self._sparse_weight: # sparse embedding has both sparse weight and sparse grad embed = contrib.nn.SparseEmbedding(self._vocab_size, self._embed_size, prefix=prefix) else: embed = nn.Embedding(self._vocab_size, self._embed_size, prefix=prefix, sparse_grad=self._sparse_grad) embedding.add(embed) if self._embed_dropout: embedding.add(nn.Dropout(self._embed_dropout)) return embedding
def net_define(): net = nn.Sequential() with net.name_scope(): net.add(nn.Embedding(config.MAX_WORDS, config.EMBEDDING_DIM)) net.add(rnn.GRU(128,layout='NTC',bidirectional=True, num_layers=2, dropout=0.2)) net.add(transpose(axes=(0,2,1))) # net.add(nn.MaxPool2D(pool_size=(config.MAX_LENGTH,1))) # net.add(nn.Conv2D(128, kernel_size=(101,1), padding=(50,0), groups=128,activation='relu')) net.add(PrimeConvCap(8,32, kernel_size=(1,1), padding=(0,0))) # net.add(AdvConvCap(8,32,8,32, kernel_size=(1,1), padding=(0,0))) net.add(CapFullyBlock(8*(config.MAX_LENGTH)/2, num_cap=12, input_units=32, units=16, route_num=5)) # net.add(CapFullyBlock(8*(config.MAX_LENGTH-8), num_cap=12, input_units=32, units=16, route_num=5)) # net.add(CapFullyBlock(8, num_cap=12, input_units=32, units=16, route_num=5)) net.add(nn.Dropout(0.2)) # net.add(LengthBlock()) net.add(nn.Dense(6, activation='sigmoid')) net.initialize(init=init.Xavier()) return net
def __init__(self, n_hidden, vocab_size, embed_dim, max_seq_length, **kwargs): super(korean_autospacing_base, self).__init__(**kwargs) # 입력 시퀀스 길이 self.in_seq_len = max_seq_length # 출력 시퀀스 길이 self.out_seq_len = max_seq_length # GRU의 hidden 개수 self.n_hidden = n_hidden # 고유문자개수 self.vocab_size = vocab_size # max_seq_length self.max_seq_length = max_seq_length # 임베딩 차원수 self.embed_dim = embed_dim with self.name_scope(): self.embedding = nn.Embedding(input_dim=self.vocab_size, output_dim=self.embed_dim) self.conv_unigram = nn.Conv2D(channels=128, kernel_size=(1, self.embed_dim)) self.conv_bigram = nn.Conv2D(channels=256, kernel_size=(2, self.embed_dim), padding=(1, 0)) self.conv_trigram = nn.Conv2D(channels=128, kernel_size=(3, self.embed_dim), padding=(1, 0)) self.conv_forthgram = nn.Conv2D(channels=64, kernel_size=(4, self.embed_dim), padding=(2, 0)) self.conv_fifthgram = nn.Conv2D(channels=32, kernel_size=(5, self.embed_dim), padding=(2, 0)) self.bi_gru = rnn.GRU(hidden_size=self.n_hidden, layout='NTC', bidirectional=True) self.dense_sh = nn.Dense(100, activation='relu', flatten=False) self.dense = nn.Dense(1, activation='sigmoid', flatten=False)
def __init__(self, config, **kwargs): super(TextCNN, self).__init__(**kwargs) V = config.vocab_size E = config.embedding_dim Nf = config.num_filters Ks = config.kernel_sizes C = config.num_classes Dr = config.dropout_prob with self.name_scope(): self.embedding = nn.Embedding(V, E) # embedding layer # three different convolutional layers self.conv1 = Conv_Max_Pooling(Nf, Ks[0]) self.conv2 = Conv_Max_Pooling(Nf, Ks[1]) self.conv3 = Conv_Max_Pooling(Nf, Ks[2]) self.dropout = nn.Dropout(Dr) # a dropout layer self.fc1 = nn.Dense(C) # a dense layer for classification
def __init__(self, n_classes = 2, kernel_size = 8, embed_size = 300, dropout = 0.5, seq_len = 500, vocab_size = 7500, **kwargs): super(CNN, self).__init__(**kwargs) self.dropout = False with self.name_scope(): self.encoder = nn.Embedding(vocab_size, embed_size, weight_initializer = mx.init.Uniform(0.1)) self.conv = nn.Conv2D(embed_size, (kernel_size, embed_size),1) self.act = nn.Activation('relu') self.pool = nn.MaxPool2D((seq_len-kernel_size+1, 1)) if dropout > 0.0: self.dropout = True self.drop = nn.Dropout(dropout) self.decoder = nn.Dense(n_classes)
def load_model(): num_classes = 2 num_hidden = 25 num_embed = 300 learning_rate = .01 epochs = 200 batch_size = 20 voca_size = 10000 context = mx.gpu() model_params_filename = "lstm_net.params_epoch4" model = nn.Sequential() with model.name_scope(): model.embed = nn.Embedding(voca_size, num_embed) model.add(rnn.LSTM(num_hidden, layout = 'NTC', dropout=0.7, bidirectional=False)) model.add(nn.Dense(num_classes)) model.load_params(model_params_filename, context) return model
def __init__(self, mode, vocab_size, embed_size, num_hiddens, num_layers, drop_prob=0.5, **kwargs): super(RNNModel, self).__init__(**kwargs) with self.name_scope(): self.dropout = nn.Dropout(drop_prob) # 将词索引变换成词向量。这些词向量也是模型参数。 self.embedding = nn.Embedding(vocab_size, embed_size, weight_initializer=init.Uniform(0.1)) if mode == 'rnn_relu': self.rnn = rnn.RNN(num_hiddens, num_layers, activation='relu', dropout=drop_prob, input_size=embed_size) elif mode == 'rnn_tanh': self.rnn = rnn.RNN(num_hiddens, num_layers, activation='tanh', dropout=drop_prob, input_size=embed_size) elif mode == 'lstm': self.rnn = rnn.LSTM(num_hiddens, num_layers, dropout=drop_prob, input_size=embed_size) elif mode == 'gru': self.rnn = rnn.GRU(num_hiddens, num_layers, dropout=drop_prob, input_size=embed_size) else: raise ValueError('Invalid mode %s. Options are rnn_relu, ' 'rnn_tanh, lstm, and gru' % mode) self.dense = nn.Dense(vocab_size, in_units=num_hiddens) self.num_hiddens = num_hiddens
def __init__(self, token_to_idx, subword_function, embedding_size, weight_initializer=None, sparse_grad=True, dtype='float32', **kwargs): super(FasttextEmbeddingModel, self).__init__(embedding_size=embedding_size, **kwargs) self.token_to_idx = token_to_idx self.subword_function = subword_function self.weight_initializer = weight_initializer self.sparse_grad = sparse_grad self.dtype = dtype with self.name_scope(): self.embedding = nn.Embedding( len(token_to_idx), embedding_size, weight_initializer=weight_initializer, sparse_grad=sparse_grad, dtype=dtype) self.subword_embedding = _MaskedSumEmbedding( len(subword_function), embedding_size, weight_initializer=weight_initializer, sparse_grad=sparse_grad, dtype=dtype)
def __init__(self, vocab_size, tag2idx, embedding_dim, hidden_dim): super(BiLSTM_CRF, self).__init__() with self.name_scope(): self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.vocab_size = vocab_size self.tag2idx = tag2idx self.tagset_size = len(tag2idx) self.word_embeds = nn.Embedding(vocab_size, embedding_dim) self.lstm = rnn.LSTM(hidden_dim // 2, num_layers=1, bidirectional=True) # Maps the output of the LSTM into tag space. self.hidden2tag = nn.Dense(self.tagset_size) # Matrix of transition parameters. Entry i,j is the score of # transitioning *to* i *from* j. self.transitions = nd.random_normal(shape=(self.tagset_size, self.tagset_size)) self.hidden = self.init_hidden()
def __init__(self, nwords, nword_dims, nhiddens, nlayers, natt_units, natt_hops, nfc, nclass, drop_prob, pool_way, nprune_p=None, nprune_q=None, **kwargs): super(SelfAttentiveBiLSTM, self).__init__(**kwargs) with self.name_scope(): self.embedding_layer = nn.Embedding(nwords, nword_dims) self.bilstm = rnn.LSTM( nhiddens, num_layers=nlayers, dropout=drop_prob, bidirectional=True) self.att_encoder = SelfAttention(natt_units, natt_hops) self.dense = nn.Dense(nfc, activation='tanh') self.output_layer = nn.Dense(nclass) self.dense_p, self.dense_q = None, None if all([nprune_p, nprune_q]): self.dense_p = nn.Dense( nprune_p, activation='tanh', flatten=False) self.dense_q = nn.Dense( nprune_q, activation='tanh', flatten=False) self.drop_prob = drop_prob self.pool_way = pool_way
def __init__(self, vocab_size, emb_size, hidden_size, num_layers=2, dropout=.3, \ bidir=True, latent_size=64, **kwargs): ''' init this class, create relevant rnns ''' super(VAEEncoder, self).__init__(**kwargs) with self.name_scope(): self.hidden_size = hidden_size self.hidden_factor = (2 if bidir else 1) * num_layers self.embedding_layer = nn.Embedding(vocab_size, emb_size) self.original_encoder = rnn.LSTM(hidden_size=hidden_size, num_layers=num_layers, \ dropout=dropout, bidirectional=bidir, \ prefix='VAEEncoder_org_encoder') self.paraphrase_encoder = rnn.LSTM(hidden_size=hidden_size, num_layers=num_layers, \ dropout=dropout, bidirectional=bidir, \ prefix='VAEEncoder_prp_encoder') # dense layers calculating mu and lv to sample, since the length of the input is # flexible, we need to use RNN self.output_mu = nn.Dense(units=latent_size) self.output_sg = nn.Dense(units=latent_size)
def test_summary(): net = gluon.model_zoo.vision.resnet50_v1() net.initialize() net.summary(mx.nd.ones((32, 3, 224, 224))) net2 = nn.Sequential() with net2.name_scope(): net2.add(nn.Embedding(10, 20)) net2.add(gluon.rnn.LSTM(30)) net2.add(nn.Dense(40, flatten=False)) net2.initialize() net2.summary(mx.nd.ones((80, 32))) net3 = gluon.rnn.LSTM(30) net3.initialize() begin_state = net3.begin_state(32) net3.summary(mx.nd.ones((80, 32, 5)), begin_state) net.hybridize() assert_raises(AssertionError, net.summary, mx.nd.ones((32, 3, 224, 224)))
def __init__(self, mode, vocab_size, num_embed, num_hidden, num_layers, dropout=0.5, **kwargs): super(GluonRNNModel, self).__init__(**kwargs) with self.name_scope(): self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding( vocab_size, num_embed, weight_initializer=mx.init.Uniform(0.1)) if mode == 'lstm': # we create a LSTM layer with certain number of hidden LSTM cell and layers # in our example num_hidden is 1000 and num of layers is 2 # The input to the LSTM will only be passed during the forward pass (see forward function below) self.rnn = rnn.LSTM(num_hidden, num_layers, dropout=dropout, input_size=num_embed) elif mode == 'gru': # we create a GRU layer with certain number of hidden GRU cell and layers # in our example num_hidden is 1000 and num of layers is 2 # The input to the GRU will only be passed during the forward pass (see forward function below) self.rnn = rnn.GRU(num_hidden, num_layers, dropout=dropout, input_size=num_embed) else: # we create a vanilla RNN layer with certain number of hidden vanilla RNN cell and layers # in our example num_hidden is 1000 and num of layers is 2 # The input to the vanilla will only be passed during the forward pass (see forward function below) self.rnn = rnn.RNN(num_hidden, num_layers, activation='relu', dropout=dropout, input_size=num_embed) self.decoder = nn.Dense(vocab_size, in_units=num_hidden) self.num_hidden = num_hidden
def __init__(self, vocab_size, num_layers=2, units=128, hidden_size=2048, num_heads=4, activation='approx_gelu', two_stream: bool = False, scaled=True, dropout=0.0, attention_dropout=0.0, use_residual=True, clamp_len: typing.Optional[int] = None, use_decoder=True, tie_decoder_weight=True, weight_initializer=None, bias_initializer='zeros', prefix=None, params=None): super().__init__(prefix=prefix, params=params) assert units % num_heads == 0, 'In TransformerDecoder, the units should be divided ' \ 'exactly by the number of heads. Received units={}, ' \ 'num_heads={}'.format(units, num_heads) self._num_layers = num_layers self._units = units self._hidden_size = hidden_size self._num_heads = num_heads self._two_stream = two_stream assert not two_stream, 'Not yet implemented.' self._dropout = dropout self._use_residual = use_residual self._clamp_len = clamp_len with self.name_scope(): self.word_embed = nn.Embedding(vocab_size, units) self.mask_embed = self.params.get('mask_embed', shape=(1, 1, units)) self.pos_embed = PositionalEmbedding(units) if dropout: self.dropout_layer = nn.Dropout(rate=dropout) self.transformer_cells = nn.HybridSequential() for i in range(num_layers): attention_cell = RelativeSegmentEmbeddingPositionalEmbeddingMultiHeadAttentionCell( d_head=units // num_heads, num_heads=num_heads, scaled=scaled, dropout=attention_dropout) self.transformer_cells.add( XLNetCell(attention_cell=attention_cell, units=units, hidden_size=hidden_size, num_heads=num_heads, activation=activation, weight_initializer=weight_initializer, bias_initializer=bias_initializer, dropout=dropout, scaled=scaled, use_residual=use_residual, prefix='transformer%d_' % i)) if use_decoder: self.decoder = nn.Dense( vocab_size, flatten=False, params=self.word_embed.params if tie_decoder_weight else None)
def __init__(self, vocab_size, word_vocab_size, num_embed, ker_width, num_filters, word_len, hw_layers, lstm_layers, lstm_units, dropout, **kwargs): super(LSTMCharWord, self).__init__(**kwargs) with self.name_scope(): self.features = nn.Sequential() self.features.add(nn.Embedding(vocab_size, num_embed)) hw_units = num_embed if (ker_width is not None) and (num_filters is not None): self.features.add(CharCNN(ker_width, num_embed, num_filters, word_len)) hw_units = sum(num_filters) if hw_layers is not None: for _ in range(hw_layers): self.features.add(HighwayLayer(hw_units)) self.rnn = rnn.LSTM(lstm_units, num_layers=lstm_layers, layout='NTC', dropout=dropout, input_size=hw_units) self.drop = nn.Dropout(dropout) self.decoder = Decoder(word_vocab_size, lstm_units)
def __init__(self, vocab_size, emb_size, hidden_size, num_layers=2, dropout=.3, bidir=True, **kwargs): ''' init this class, create relevant rnns, note: we will share the original sentence encoder between VAE encoder and VAE decoder ''' super(VAEDecoder, self).__init__(**kwargs) with self.name_scope(): self.embedding_layer = nn.Embedding(vocab_size, emb_size) self.paraphrase_decoder = rnn.LSTM(hidden_size=hidden_size, num_layers=num_layers, \ dropout=dropout, bidirectional=bidir, \ prefix='VAEDecoder_prp_decoder') # the `output_size` should be set eqaul to the vocab size (a probablity distribution # over all words in vocabulary) self.dense_output = nn.Dense(vocab_size, activation='tanh')
def __init__(self, mode, vocab_size, num_embed, num_hidden, num_layers, dropout_rnn=0.25, dropout=0.25, **kwargs): super(rnn_net, self).__init__(**kwargs) with self.name_scope(): self.drop = nn.Dropout(dropout_rnn) self.mlp = nn.Dense(units=num_hidden, activation='relu') self.encoder = nn.Embedding(vocab_size, num_embed, weight_initializer=mx.init.Uniform(0.1)) if mode == 'lstm': self._rnn = rnn.LSTM(num_hidden, num_layers, layout='NTC', dropout=dropout_rnn, input_size=num_embed) elif mode == 'gru': self._rnn = rnn.GRU(num_hidden, num_layers, layout='NTC', dropout=dropout_rnn, input_size=num_embed) else: self._rnn = rnn.RNN(num_hidden, num_layers, layout='NTC', activation='relu', dropout=dropout_rnn, input_size=num_embed) self.num_hidden = num_hidden
def __init__(self, num_tokens, embedding_size, weight_initializer=None, sparse_grad=True, dtype='float32', **kwargs): super(_MaskedSumEmbedding, self).__init__(**kwargs) self.num_tokens = num_tokens self.embedding_size = embedding_size self.weight_initializer = weight_initializer self.sparse_grad = sparse_grad self.dtype = dtype with self.name_scope(): self.embedding = nn.Embedding( num_tokens, embedding_size, weight_initializer=weight_initializer, sparse_grad=sparse_grad, dtype=dtype)