def __init__(self): super(Decoder, self).__init__() self.attention_network = Attention() # Decoder self.embedding = nn.Embedding( config.vocab_size, config.emb_dim, weight_attr=paddle.ParamAttr(initializer=I.Normal( std=config.trunc_norm_init_std))) self.x_context = nn.Linear(config.hidden_dim * 2 + config.emb_dim, config.emb_dim) self.lstm = nn.LSTM( config.emb_dim, config.hidden_dim, num_layers=1, direction='forward', weight_ih_attr=paddle.ParamAttr( initializer=I.Uniform(low=-config.rand_unif_init_mag, high=config.rand_unif_init_mag)), bias_ih_attr=paddle.ParamAttr(initializer=I.Constant(value=0.0))) if config.pointer_gen: self.p_gen_linear = nn.Linear( config.hidden_dim * 4 + config.emb_dim, 1) self.out1 = nn.Linear(config.hidden_dim * 3, config.hidden_dim) self.out2 = nn.Linear( config.hidden_dim, config.vocab_size, weight_attr=paddle.ParamAttr(initializer=I.Normal( std=config.trunc_norm_init_std)))
def __init__(self, with_lstm=True, n_group=1, in_channels=3): super(ResNet_ASTER, self).__init__() self.with_lstm = with_lstm self.n_group = n_group self.layer0 = nn.Sequential( nn.Conv2D(in_channels, 32, kernel_size=(3, 3), stride=1, padding=1, bias_attr=False), nn.BatchNorm2D(32), nn.ReLU()) self.inplanes = 32 self.layer1 = self._make_layer(32, 3, [2, 2]) # [16, 50] self.layer2 = self._make_layer(64, 4, [2, 2]) # [8, 25] self.layer3 = self._make_layer(128, 6, [2, 1]) # [4, 25] self.layer4 = self._make_layer(256, 6, [2, 1]) # [2, 25] self.layer5 = self._make_layer(512, 3, [2, 1]) # [1, 25] if with_lstm: self.rnn = nn.LSTM(512, 256, direction="bidirect", num_layers=2) self.out_channels = 2 * 256 else: self.out_channels = 512
def __init__(self): super(Encoder, self).__init__() # Initialized embeddings self.embedding = nn.Embedding( config.vocab_size, config.emb_dim, weight_attr=paddle.ParamAttr(initializer=I.Normal( std=config.trunc_norm_init_std))) # Initialized lstm weights self.lstm = nn.LSTM( config.emb_dim, config.hidden_dim, num_layers=1, direction='bidirect', weight_ih_attr=paddle.ParamAttr( initializer=I.Uniform(low=-config.rand_unif_init_mag, high=config.rand_unif_init_mag)), bias_ih_attr=paddle.ParamAttr(initializer=I.Constant(value=0.0))) # Initialized linear weights self.W_h = nn.Linear(config.hidden_dim * 2, config.hidden_dim * 2, bias_attr=False)
def __init__(self, in_channels, hidden_size): super(EncoderWithRNN, self).__init__() self.out_channels = hidden_size * 2 self.lstm = nn.LSTM(in_channels, hidden_size, direction='bidirectional', num_layers=2)
def __init__(self, attention_layer, vocab_size, num_classes, emb_dim=128, lstm_hidden_size=196, fc_hidden_size=96, lstm_layers=1, dropout_rate=0.0, padding_idx=0): super().__init__() self.padding_idx = padding_idx self.embedder = nn.Embedding( num_embeddings=vocab_size, embedding_dim=emb_dim, padding_idx=padding_idx) self.bilstm = nn.LSTM( input_size=emb_dim, hidden_size=lstm_hidden_size, num_layers=lstm_layers, dropout=dropout_rate, direction='bidirect') self.attention = attention_layer if isinstance(attention_layer, SelfAttention): self.fc = nn.Linear(lstm_hidden_size, fc_hidden_size) elif isinstance(attention_layer, SelfInteractiveAttention): self.fc = nn.Linear(lstm_hidden_size * 2, fc_hidden_size) else: raise RuntimeError("Unknown attention type %s." % attention_layer.__class__.__name__) self.output_layer = nn.Linear(fc_hidden_size, num_classes)
def __init__(self, in_channels, num_chars=92, visual_dim=16, fusion_dim=1024, node_input=32, node_embed=256, edge_input=5, edge_embed=256, num_gnn=2, num_classes=26, bidirectional=False): super().__init__() self.fusion = Block([visual_dim, node_embed], node_embed, fusion_dim) self.node_embed = nn.Embedding(num_chars, node_input, 0) hidden = node_embed // 2 if bidirectional else node_embed self.rnn = nn.LSTM(input_size=node_input, hidden_size=hidden, num_layers=1) self.edge_embed = nn.Linear(edge_input, edge_embed) self.gnn_layers = nn.LayerList( [GNNLayer(node_embed, edge_embed) for _ in range(num_gnn)]) self.node_cls = nn.Linear(node_embed, num_classes) self.edge_cls = nn.Linear(edge_embed, 2)
def __init__(self, n_mels, num_layers, hidden_size, output_size): super().__init__() self.lstm = nn.LSTM(n_mels, hidden_size, num_layers) self.linear = nn.Linear(hidden_size, output_size) self.similarity_weight = self.create_parameter( [1], default_initializer=I.Constant(10.)) self.similarity_bias = self.create_parameter( [1], default_initializer=I.Constant(-5.))
def __init__(self, vocab_size, embed_dim, hidden_size, num_layers): super(Seq2SeqEncoder, self).__init__() self.embedder = nn.Embedding(vocab_size, embed_dim) self.lstm = nn.LSTM(input_size=embed_dim, hidden_size=hidden_size, num_layers=num_layers, dropout=0.2 if num_layers > 1 else 0.)
def __init__(self, hidden_size, vocab_size, class_num=2, num_steps=128, num_layers=1, init_scale=0.1, dropout=None): # 参数含义如下: # 1.hidden_size,表示embedding-size,hidden和cell向量的维度 # 2.vocab_size,模型可以考虑的词表大小 # 3.class_num,情感类型个数,可以是2分类,也可以是多分类 # 4.num_steps,表示这个情感分析模型最大可以考虑的句子长度 # 5.num_layers,表示网络的层数 # 6.init_scale,表示网络内部的参数的初始化范围 # 长短时记忆网络内部用了很多Tanh,Sigmoid等激活函数,这些函数对数值精度非常敏感, # 因此我们一般只使用比较小的初始化范围,以保证效果 super(SentimentClassifier, self).__init__() self.hidden_size = hidden_size self.vocab_size = vocab_size self.class_num = class_num self.init_scale = init_scale self.num_layers = num_layers self.num_steps = num_steps self.dropout = dropout # 声明一个embedding层,用来把句子中的每个词转换为向量 self.embedding = nn.Embedding( num_embeddings=vocab_size, embedding_dim=hidden_size, sparse=False, weight_attr=paddle.ParamAttr( initializer=paddle.nn.initializer.Uniform(low=-init_scale, high=init_scale))) # 声明一个LSTM模型,用来把每个句子抽象成向量 self.simple_lstm_rnn = nn.LSTM(input_size=hidden_size, hidden_size=hidden_size, num_layers=num_layers) # 在得到一个句子的向量表示后,需要根据这个向量表示对这个句子进行分类 # 一般来说,可以把这个句子的向量表示乘以一个大小为[self.hidden_size, self.class_num]的W参数, # 并加上一个大小为[self.class_num]的b参数,从而达到把句子向量映射到分类结果的目的 # 我们需要声明最终在使用句子向量映射到具体情感类别过程中所需要使用的参数 # 这个参数的大小一般是[self.hidden_size, self.class_num] self.cls_fc = nn.Linear(in_features=self.hidden_size, out_features=self.class_num, weight_attr=None, bias_attr=None) self.dropout_layer = nn.Dropout(p=self.dropout, mode='upscale_in_train')
def __init__(self, n_chars, n_embed, n_out, pad_index=0): super(CharLSTMEncoder, self).__init__() self.n_chars = n_chars self.n_embed = n_embed self.n_out = n_out self.pad_index = pad_index # the embedding layer self.embed = nn.Embedding(num_embeddings=n_chars, embedding_dim=n_embed) # the lstm layer self.lstm = nn.LSTM(input_size=n_embed, hidden_size=n_out // 2, direction="bidirectional")
def __init__(self, vocabulary, image_height, channel=1): super(Model, self).__init__() assert image_height % 32 == 0, 'image Height has to be a multiple of 32' self.conv1 = nn.Conv2D(in_channels=channel, out_channels=64, kernel_size=3, stride=1, padding=1) self.relu1 = nn.ReLU() self.pool1 = nn.MaxPool2D(kernel_size=2, stride=2) self.conv2 = nn.Conv2D(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1) self.relu2 = nn.ReLU() self.pool2 = nn.MaxPool2D(kernel_size=2, stride=2) self.conv3 = nn.Conv2D(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1) self.relu3 = nn.ReLU() self.bn3 = nn.BatchNorm2D(256) self.conv4 = nn.Conv2D(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1) self.relu4 = nn.ReLU() self.pool4 = nn.MaxPool2D(kernel_size=(2,2), stride=(2, 1), padding=(0, 1)) self.conv5 = nn.Conv2D(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1) self.relu5 = nn.ReLU() self.bn5 = nn.BatchNorm2D(512) self.conv6 = nn.Conv2D(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1) self.relu6 = nn.ReLU() self.pool6 = nn.MaxPool2D(kernel_size=(2, 2), stride=(2, 1), padding=(0, 1)) self.conv7 = nn.Conv2D(in_channels=512, out_channels=512, kernel_size=2, stride=1, padding=0) self.relu7 = nn.ReLU() self.bn7 = nn.BatchNorm2D(512) self.lstm1 = nn.LSTM(input_size=512, hidden_size=256, direction='bidirectional') self.fc = nn.Linear(in_features=512, out_features=256) self.lstm2 = nn.LSTM(input_size=256, hidden_size=256, direction='bidirectional') self.output = nn.Linear(in_features=512, out_features=len(vocabulary))
def __init__(self, vocab_size, num_class, emb_dim=512, hidden_size=512, n_lstm_layer=3, is_bidirectory=True, padding_idx=0, epsilon=1e-5, dropout_rate=0.1): """Init model Args: vocab_size (int): vocab size. num_class (int): num of classes. emb_dim (int, optional): embedding dimmension. Defaults to 512. hidden_size (int, optional): hidden size. Defaults to 512. n_lstm_layer (int, optional): number of lstm layer. Defaults to 3. is_bidirectory (bool, optional): use bidirect lstm. Defaults to True. padding_idx (int, optional): padding index. Defaults to 0. epsilon (float, optional): epsilon. Defaults to 1e-5. dropout_rate (float, optional): dropout rate. Defaults to 0.1. """ super().__init__() self.padding_idx = padding_idx self.embedder = nn.Embedding(vocab_size, emb_dim, padding_idx=padding_idx) self.layer_norm = nn.LayerNorm(normalized_shape=emb_dim, epsilon=epsilon) self.dropout = nn.Dropout(p=dropout_rate) direction = 'bidirectional' if is_bidirectory else 'forward' self.lstm_encoder = nn.LSTM(emb_dim, hidden_size, num_layers=n_lstm_layer, direction=direction) # kernel_size = (5, hidden_size * 2) if is_bidirectory else (5, hidden_size) in_channels = hidden_size * 2 if is_bidirectory else hidden_size self.conv_encoder = nn.Conv1D(in_channels=in_channels, out_channels=hidden_size, kernel_size=5, padding=2) self.output_layer = nn.Conv1D(in_channels=hidden_size, out_channels=num_class, kernel_size=3, padding=1)
def __init__(self, n_words, pad_index, lstm_by_wp_embed_size=200, n_embed=300, n_lstm_hidden=300, n_lstm_layers=3): super(LSTMByWPEncoder, self).__init__() self.pad_index = pad_index self.word_embed = nn.Embedding(n_words, lstm_by_wp_embed_size) self.lstm = nn.LSTM(input_size=lstm_by_wp_embed_size, hidden_size=n_lstm_hidden, num_layers=n_lstm_layers, direction="bidirectional") self.mlp_input_size = n_lstm_hidden * 2
def __init__(self, batch_size, input_size, hidden_size, num_layers, dropout, task='pre-train'): super(ELMoBiLM, self).__init__() self._num_layers = num_layers self._dropout = dropout self._task = task self._lstm_layers = [] for direction in ['forward', 'backward']: layers = [] for i in range(num_layers): lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=1, direction='forward', weight_hh_attr=paddle.ParamAttr( initializer=I.XavierUniform()), weight_ih_attr=paddle.ParamAttr( initializer=I.XavierUniform()), bias_hh_attr=False, bias_ih_attr=paddle.ParamAttr( initializer=I.Constant(value=0.0))) self.add_sublayer('{}_lstm_layer_{}'.format(direction, i), lstm) hidden_state = paddle.zeros(shape=[1, batch_size, hidden_size], dtype='float32') cell_state = paddle.zeros(shape=[1, batch_size, hidden_size], dtype='float32') layers.append({ 'lstm': lstm, 'hidden_state': hidden_state, 'cell_state': cell_state }) self._lstm_layers.append(layers) if dropout: self._dropout_layer = nn.Dropout(p=dropout)
def __init__(self, vocab_size, embed_dim, hidden_size, num_layers, dropout_prob=0., init_scale=0.1): super(Seq2SeqEncoder, self).__init__() self.embedder = nn.Embedding( vocab_size, embed_dim, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale))) self.lstm = nn.LSTM(input_size=embed_dim, hidden_size=hidden_size, num_layers=num_layers, direction="forward", dropout=dropout_prob if num_layers > 1 else 0.)
def __init__(self, embed_dim, hidden_size, vocab_size, output_dim, vocab_path, padding_idx=0, num_layers=1, dropout_prob=0.0, init_scale=0.1, embedding_name=None): super(BiLSTM, self).__init__() if embedding_name is not None: self.embedder = TokenEmbedding(embedding_name, extended_vocab_path=vocab_path, keep_extended_vocab_only=True) embed_dim = self.embedder.embedding_dim else: self.embedder = nn.Embedding(vocab_size, embed_dim, padding_idx) self.lstm = nn.LSTM(embed_dim, hidden_size, num_layers, 'bidirectional', dropout=dropout_prob) self.fc = nn.Linear( hidden_size * 2, hidden_size, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale))) self.fc_1 = nn.Linear( hidden_size * 8, hidden_size, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale))) self.output_layer = nn.Linear( hidden_size, output_dim, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale)))
def __init__(self, input_size, hidden_size, num_layers=1, direction="forward", dropout=0.0, pooling_type=None, **kwargs): super().__init__() self._input_size = input_size self._hidden_size = hidden_size self._direction = direction self._pooling_type = pooling_type self.lstm_layer = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, direction=direction, dropout=dropout, **kwargs)
def __init__(self, feat, n_feats, n_words, pad_index=0, feat_pad_index=0, n_char_embed=50, n_feat_embed=60, n_lstm_char_embed=100, n_embed=300, embed_dropout=0.33, n_lstm_hidden=300, n_lstm_layers=3, lstm_dropout=0.33): super(LSTMEncoder, self).__init__() self.pad_index = pad_index if feat == "char": self.feat_embed = CharLSTMEncoder( n_chars=n_feats, n_embed=n_char_embed, n_out=n_lstm_char_embed, pad_index=feat_pad_index, ) feat_embed_size = n_lstm_char_embed else: self.feat_embed = nn.Embedding(num_embeddings=n_feats, embedding_dim=n_feat_embed) feat_embed_size = n_feat_embed self.word_embed = nn.Embedding(num_embeddings=n_words, embedding_dim=n_embed) self.embed_dropout = IndependentDropout(p=embed_dropout) self.lstm = nn.LSTM(input_size=n_embed + feat_embed_size, hidden_size=n_lstm_hidden, num_layers=n_lstm_layers, dropout=lstm_dropout, direction="bidirectional") self.lstm_dropout = SharedDropout(p=lstm_dropout) self.mlp_input_size = n_lstm_hidden * 2
def __init__(self, vocab_size, emb_dim=128, hidden_size=1024, n_layers=3, padding_idx=0, epsilon=1e-5, dropout_rate=0.1): """ __init__ """ super(LstmEncoderModel, self).__init__() self.padding_idx = padding_idx self.embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=padding_idx) self.dropout = nn.Dropout(p=dropout_rate) self.lstm_encoder = nn.LSTM(emb_dim, hidden_size, num_layers=n_layers, direction="bidirectional")
def __init__(self, vocab_size, embed_dim, hidden_size, num_layers, init_scale=0.1, enc_dropout=0.): super(LSTMEncoder, self).__init__() self.src_embedder = nn.Embedding( vocab_size, embed_dim, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale))) self.lstm = nn.LSTM(input_size=embed_dim, hidden_size=hidden_size, num_layers=num_layers, dropout=enc_dropout) if enc_dropout > 0.0: self.dropout = nn.Dropout(enc_dropout) else: self.dropout = None
def __init__(self, enc_bi_rnn=False, enc_drop_rnn=0.1, enc_gru=False, d_model=512, d_enc=512, mask=True, **kwargs): super().__init__() assert isinstance(enc_bi_rnn, bool) assert isinstance(enc_drop_rnn, (int, float)) assert 0 <= enc_drop_rnn < 1.0 assert isinstance(enc_gru, bool) assert isinstance(d_model, int) assert isinstance(d_enc, int) assert isinstance(mask, bool) self.enc_bi_rnn = enc_bi_rnn self.enc_drop_rnn = enc_drop_rnn self.mask = mask # LSTM Encoder if enc_bi_rnn: direction = 'bidirectional' else: direction = 'forward' kwargs = dict(input_size=d_model, hidden_size=d_enc, num_layers=2, time_major=False, dropout=enc_drop_rnn, direction=direction) if enc_gru: self.rnn_encoder = nn.GRU(**kwargs) else: self.rnn_encoder = nn.LSTM(**kwargs) # global feature transformation encoder_rnn_out_size = d_enc * (int(enc_bi_rnn) + 1) self.linear = nn.Linear(encoder_rnn_out_size, encoder_rnn_out_size)
def __init__(self, vocab_size, hidden_size, batch_size, num_layers=1, init_scale=0.1, dropout=0.0): super(RnnLm, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.init_scale = init_scale self.batch_size = batch_size self.reset_states() self.embedder = nn.Embedding( vocab_size, hidden_size, weight_attr=paddle.ParamAttr(initializer=I.Uniform( low=-init_scale, high=init_scale))) self.lstm = nn.LSTM( input_size=hidden_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout, weight_ih_attr=paddle.ParamAttr(initializer=I.Uniform( low=-init_scale, high=init_scale)), weight_hh_attr=paddle.ParamAttr(initializer=I.Uniform( low=-init_scale, high=init_scale))) self.fc = nn.Linear( hidden_size, vocab_size, weight_attr=paddle.ParamAttr(initializer=I.Uniform( low=-init_scale, high=init_scale)), bias_attr=paddle.ParamAttr(initializer=I.Uniform( low=-init_scale, high=init_scale))) self.dropout = nn.Dropout(p=dropout)
def __init__(self, d_hidden: int, conv_layers: int, kernel_size: int, p_dropout: float): super().__init__() k = math.sqrt(1.0 / (d_hidden * kernel_size)) self.conv_batchnorms = paddle.nn.LayerList([ Conv1dBatchNorm( d_hidden, d_hidden, kernel_size, stride=1, padding=int((kernel_size - 1) / 2), bias_attr=paddle.ParamAttr( initializer=nn.initializer.Uniform(low=-k, high=k)), data_format='NLC') for i in range(conv_layers) ]) self.p_dropout = p_dropout self.hidden_size = int(d_hidden / 2) self.lstm = nn.LSTM(d_hidden, self.hidden_size, direction="bidirectional")
def __init__(self, embed_dim, hidden_size, vocab_size, output_dim, padding_idx=0, num_layers=1, dropout_prob=0.0, init_scale=0.1, embed_weight=None): super(BiLSTM, self).__init__() self.embedder = nn.Embedding(vocab_size, embed_dim, padding_idx) self.embedder.weight.set_value( embed_weight) if embed_weight is not None else None self.lstm = nn.LSTM(embed_dim, hidden_size, num_layers, 'bidirectional', dropout=dropout_prob) self.fc = nn.Linear( hidden_size * 2, hidden_size, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale))) self.fc_1 = nn.Linear( hidden_size * 8, hidden_size, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale))) self.output_layer = nn.Linear( hidden_size, output_dim, weight_attr=paddle.ParamAttr( initializer=I.Uniform(low=-init_scale, high=init_scale)))
def __init__(self, vocab_size, num_classes, emb_dim=128, padding_idx=0, lstm_hidden_size=198, direction='forward', lstm_layers=1, dropout_rate=0.0, pooling_type=None, fc_hidden_size=96): super().__init__() self.direction = direction self.embedder = nn.Embedding(num_embeddings=vocab_size, embedding_dim=emb_dim, padding_idx=padding_idx) # self.lstm_encoder = nlp.seq2vec.LSTMEncoder(emb_dim, # lstm_hidden_size, # num_layers=lstm_layers, # direction=direction, # dropout=dropout_rate, # pooling_type=pooling_type) self.lstm_layer = nn.LSTM(input_size=emb_dim, hidden_size=lstm_hidden_size, num_layers=lstm_layers, direction=direction, dropout=dropout_rate) self.fc = nn.Linear( lstm_hidden_size * (2 if direction == 'bidirect' else 1), fc_hidden_size) self.output_layer = nn.Linear(fc_hidden_size, num_classes) self.softmax = nn.Softmax(axis=1)
def paddle_lstm(): np.random.seed(SEED) x = np.random.rand(1, 80, 512).astype(np.float32) # np.save('org.npy', x) with fluid.dygraph.guard(): lstm = nn.LSTM(512, 256, num_layers=2, direction='bidirectional') # sd = np.load('lstm.npy', allow_pickle=True).tolist() # lstm.set_state_dict(sd) state_dict = lstm.state_dict() sd = OrderedDict() for key, value in state_dict.items(): v = value.numpy() print(key, value.shape, np.sum(v), np.mean(v), np.max(v), np.min(v)) sd[key] = v np.save('lstm.npy', sd) inp = fluid.dygraph.to_variable(x) ret, _ = lstm(inp) print(len(ret)) return ret.numpy()
def __init__( self, user_size, adgroup_size, pid_size, cms_segid_size, cms_group_size, final_gender_size, age_level_size, pvalue_level_size, shopping_level_size, occupation_size, new_user_class_level_size, campaign_size, customer_size, cate_size, brand_size, # above is all sparse feat size sparse_embed_size=4, att_embedding_size=8, sess_count=5, sess_max_length=10, l2_reg_embedding=1e-6): super().__init__() # feature size self.user_size = user_size self.adgroup_size = adgroup_size self.pid_size = pid_size self.cms_segid_size = cms_segid_size self.cms_group_size = cms_group_size self.final_gender_size = final_gender_size self.age_level_size = age_level_size self.pvalue_level_size = pvalue_level_size self.shopping_level_size = shopping_level_size self.occupation_size = occupation_size self.new_user_class_level_size = new_user_class_level_size self.campaign_size = campaign_size self.customer_size = customer_size self.cate_size = cate_size self.brand_size = brand_size # sparse embed size self.sparse_embed_size = sparse_embed_size # transform attention embed size self.att_embedding_size = att_embedding_size # hyper_parameters self.sess_count = 5 self.sess_max_length = 10 # sparse embedding layer self.userid_embeddings_var = paddle.nn.Embedding( self.user_size, self.sparse_embed_size, sparse=True, weight_attr=paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(l2_reg_embedding), initializer=nn.initializer.Normal(mean=0.0, std=0.0001))) self.adgroup_embeddings_var = paddle.nn.Embedding( self.adgroup_size, self.sparse_embed_size, sparse=True, weight_attr=paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(l2_reg_embedding), initializer=nn.initializer.Normal(mean=0.0, std=0.0001))) self.pid_embeddings_var = paddle.nn.Embedding( self.pid_size, self.sparse_embed_size, #sparse=True, weight_attr=paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(l2_reg_embedding), initializer=nn.initializer.Normal(mean=0.0, std=0.0001))) self.cmsid_embeddings_var = paddle.nn.Embedding( self.cms_segid_size, self.sparse_embed_size, #sparse=True, weight_attr=paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(l2_reg_embedding), initializer=nn.initializer.Normal(mean=0.0, std=0.0001))) self.cmsgroup_embeddings_var = paddle.nn.Embedding( self.cms_group_size, self.sparse_embed_size, #sparse=True, weight_attr=paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(l2_reg_embedding), initializer=nn.initializer.Normal(mean=0.0, std=0.0001))) self.gender_embeddings_var = paddle.nn.Embedding( self.final_gender_size, self.sparse_embed_size, #sparse=True, weight_attr=paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(l2_reg_embedding), initializer=nn.initializer.Normal(mean=0.0, std=0.0001))) self.age_embeddings_var = paddle.nn.Embedding( self.age_level_size, self.sparse_embed_size, #sparse=True, weight_attr=paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(l2_reg_embedding), initializer=nn.initializer.Normal(mean=0.0, std=0.0001))) self.pvalue_embeddings_var = paddle.nn.Embedding( self.pvalue_level_size, self.sparse_embed_size, #sparse=True, weight_attr=paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(l2_reg_embedding), initializer=nn.initializer.Normal(mean=0.0, std=0.0001))) self.shopping_embeddings_var = paddle.nn.Embedding( self.shopping_level_size, self.sparse_embed_size, #sparse=True, weight_attr=paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(l2_reg_embedding), initializer=nn.initializer.Normal(mean=0.0, std=0.0001))) self.occupation_embeddings_var = paddle.nn.Embedding( self.occupation_size, self.sparse_embed_size, #sparse=True, weight_attr=paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(l2_reg_embedding), initializer=nn.initializer.Normal(mean=0.0, std=0.0001))) self.new_user_class_level_embeddings_var = paddle.nn.Embedding( self.new_user_class_level_size, self.sparse_embed_size, #sparse=True, weight_attr=paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(l2_reg_embedding), initializer=nn.initializer.Normal(mean=0.0, std=0.0001))) self.campaign_embeddings_var = paddle.nn.Embedding( self.campaign_size, self.sparse_embed_size, sparse=True, weight_attr=paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(l2_reg_embedding), initializer=nn.initializer.Normal(mean=0.0, std=0.0001))) self.customer_embeddings_var = paddle.nn.Embedding( self.customer_size, self.sparse_embed_size, sparse=True, weight_attr=paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(l2_reg_embedding), initializer=nn.initializer.Normal(mean=0.0, std=0.0001))) self.cate_embeddings_var = paddle.nn.Embedding( self.cate_size, self.sparse_embed_size, sparse=True, padding_idx=0, weight_attr=paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(l2_reg_embedding), initializer=nn.initializer.Normal(mean=0.0, std=0.0001))) self.brand_embeddings_var = paddle.nn.Embedding( self.brand_size, self.sparse_embed_size, sparse=True, padding_idx=0, weight_attr=paddle.ParamAttr( regularizer=paddle.regularizer.L2Decay(l2_reg_embedding), initializer=nn.initializer.Normal(mean=0.0, std=0.0001))) # sess interest extractor layer self.position_encoding = PositionalEncoder(2 * self.sparse_embed_size) self.transform = nn.TransformerEncoderLayer( d_model=self.att_embedding_size, nhead=8, dim_feedforward=64, weight_attr=self._get_weight_attr(), bias_attr=False, dropout=0.0) # sess interest interacting layer self.bilstm = nn.LSTM(2 * self.sparse_embed_size, 2 * self.sparse_embed_size, num_layers=2, direction='bidirectional') # sess interest activating layer self.transform_actpool = AttentionSequencePoolingLayer( weight_normalization=True, name='transform') self.lstm_actpool = AttentionSequencePoolingLayer( weight_normalization=True, name='lstm') # MLP moudle self.mlp = MLP(mlp_hidden_units=[77, 200, 80])
def __init__( self, out_channels, # 90 + unknown + start + padding enc_bi_rnn=False, dec_bi_rnn=False, dec_drop_rnn=0.0, dec_gru=False, d_model=512, d_enc=512, d_k=64, pred_dropout=0.1, max_text_length=30, mask=True, pred_concat=True, **kwargs): super().__init__() self.num_classes = out_channels self.enc_bi_rnn = enc_bi_rnn self.d_k = d_k self.start_idx = out_channels - 2 self.padding_idx = out_channels - 1 self.max_seq_len = max_text_length self.mask = mask self.pred_concat = pred_concat encoder_rnn_out_size = d_enc * (int(enc_bi_rnn) + 1) decoder_rnn_out_size = encoder_rnn_out_size * (int(dec_bi_rnn) + 1) # 2D attention layer self.conv1x1_1 = nn.Linear(decoder_rnn_out_size, d_k) self.conv3x3_1 = nn.Conv2D(d_model, d_k, kernel_size=3, stride=1, padding=1) self.conv1x1_2 = nn.Linear(d_k, 1) # Decoder RNN layer if dec_bi_rnn: direction = 'bidirectional' else: direction = 'forward' kwargs = dict(input_size=encoder_rnn_out_size, hidden_size=encoder_rnn_out_size, num_layers=2, time_major=False, dropout=dec_drop_rnn, direction=direction) if dec_gru: self.rnn_decoder = nn.GRU(**kwargs) else: self.rnn_decoder = nn.LSTM(**kwargs) # Decoder input embedding self.embedding = nn.Embedding(self.num_classes, encoder_rnn_out_size, padding_idx=self.padding_idx) # Prediction layer self.pred_dropout = nn.Dropout(pred_dropout) pred_num_classes = self.num_classes - 1 if pred_concat: fc_in_channel = decoder_rnn_out_size + d_model + d_enc else: fc_in_channel = d_model self.prediction = nn.Linear(fc_in_channel, pred_num_classes)
def func_test_layer_str(self): module = nn.ELU(0.2) self.assertEqual(str(module), 'ELU(alpha=0.2)') module = nn.CELU(0.2) self.assertEqual(str(module), 'CELU(alpha=0.2)') module = nn.GELU(True) self.assertEqual(str(module), 'GELU(approximate=True)') module = nn.Hardshrink() self.assertEqual(str(module), 'Hardshrink(threshold=0.5)') module = nn.Hardswish(name="Hardswish") self.assertEqual(str(module), 'Hardswish(name=Hardswish)') module = nn.Tanh(name="Tanh") self.assertEqual(str(module), 'Tanh(name=Tanh)') module = nn.Hardtanh(name="Hardtanh") self.assertEqual(str(module), 'Hardtanh(min=-1.0, max=1.0, name=Hardtanh)') module = nn.PReLU(1, 0.25, name="PReLU", data_format="NCHW") self.assertEqual( str(module), 'PReLU(num_parameters=1, data_format=NCHW, init=0.25, dtype=float32, name=PReLU)' ) module = nn.ReLU() self.assertEqual(str(module), 'ReLU()') module = nn.ReLU6() self.assertEqual(str(module), 'ReLU6()') module = nn.SELU() self.assertEqual( str(module), 'SELU(scale=1.0507009873554805, alpha=1.6732632423543772)') module = nn.LeakyReLU() self.assertEqual(str(module), 'LeakyReLU(negative_slope=0.01)') module = nn.Sigmoid() self.assertEqual(str(module), 'Sigmoid()') module = nn.Hardsigmoid() self.assertEqual(str(module), 'Hardsigmoid()') module = nn.Softplus() self.assertEqual(str(module), 'Softplus(beta=1, threshold=20)') module = nn.Softshrink() self.assertEqual(str(module), 'Softshrink(threshold=0.5)') module = nn.Softsign() self.assertEqual(str(module), 'Softsign()') module = nn.Swish() self.assertEqual(str(module), 'Swish()') module = nn.Tanhshrink() self.assertEqual(str(module), 'Tanhshrink()') module = nn.ThresholdedReLU() self.assertEqual(str(module), 'ThresholdedReLU(threshold=1.0)') module = nn.LogSigmoid() self.assertEqual(str(module), 'LogSigmoid()') module = nn.Softmax() self.assertEqual(str(module), 'Softmax(axis=-1)') module = nn.LogSoftmax() self.assertEqual(str(module), 'LogSoftmax(axis=-1)') module = nn.Maxout(groups=2) self.assertEqual(str(module), 'Maxout(groups=2, axis=1)') module = nn.Linear(2, 4, name='linear') self.assertEqual( str(module), 'Linear(in_features=2, out_features=4, dtype=float32, name=linear)' ) module = nn.Upsample(size=[12, 12]) self.assertEqual( str(module), 'Upsample(size=[12, 12], mode=nearest, align_corners=False, align_mode=0, data_format=NCHW)' ) module = nn.UpsamplingNearest2D(size=[12, 12]) self.assertEqual( str(module), 'UpsamplingNearest2D(size=[12, 12], data_format=NCHW)') module = nn.UpsamplingBilinear2D(size=[12, 12]) self.assertEqual( str(module), 'UpsamplingBilinear2D(size=[12, 12], data_format=NCHW)') module = nn.Bilinear(in1_features=5, in2_features=4, out_features=1000) self.assertEqual( str(module), 'Bilinear(in1_features=5, in2_features=4, out_features=1000, dtype=float32)' ) module = nn.Dropout(p=0.5) self.assertEqual(str(module), 'Dropout(p=0.5, axis=None, mode=upscale_in_train)') module = nn.Dropout2D(p=0.5) self.assertEqual(str(module), 'Dropout2D(p=0.5, data_format=NCHW)') module = nn.Dropout3D(p=0.5) self.assertEqual(str(module), 'Dropout3D(p=0.5, data_format=NCDHW)') module = nn.AlphaDropout(p=0.5) self.assertEqual(str(module), 'AlphaDropout(p=0.5)') module = nn.Pad1D(padding=[1, 2], mode='constant') self.assertEqual( str(module), 'Pad1D(padding=[1, 2], mode=constant, value=0.0, data_format=NCL)') module = nn.Pad2D(padding=[1, 0, 1, 2], mode='constant') self.assertEqual( str(module), 'Pad2D(padding=[1, 0, 1, 2], mode=constant, value=0.0, data_format=NCHW)' ) module = nn.ZeroPad2D(padding=[1, 0, 1, 2]) self.assertEqual(str(module), 'ZeroPad2D(padding=[1, 0, 1, 2], data_format=NCHW)') module = nn.Pad3D(padding=[1, 0, 1, 2, 0, 0], mode='constant') self.assertEqual( str(module), 'Pad3D(padding=[1, 0, 1, 2, 0, 0], mode=constant, value=0.0, data_format=NCDHW)' ) module = nn.CosineSimilarity(axis=0) self.assertEqual(str(module), 'CosineSimilarity(axis=0, eps=1e-08)') module = nn.Embedding(10, 3, sparse=True) self.assertEqual(str(module), 'Embedding(10, 3, sparse=True)') module = nn.Conv1D(3, 2, 3) self.assertEqual(str(module), 'Conv1D(3, 2, kernel_size=[3], data_format=NCL)') module = nn.Conv1DTranspose(2, 1, 2) self.assertEqual( str(module), 'Conv1DTranspose(2, 1, kernel_size=[2], data_format=NCL)') module = nn.Conv2D(4, 6, (3, 3)) self.assertEqual(str(module), 'Conv2D(4, 6, kernel_size=[3, 3], data_format=NCHW)') module = nn.Conv2DTranspose(4, 6, (3, 3)) self.assertEqual( str(module), 'Conv2DTranspose(4, 6, kernel_size=[3, 3], data_format=NCHW)') module = nn.Conv3D(4, 6, (3, 3, 3)) self.assertEqual( str(module), 'Conv3D(4, 6, kernel_size=[3, 3, 3], data_format=NCDHW)') module = nn.Conv3DTranspose(4, 6, (3, 3, 3)) self.assertEqual( str(module), 'Conv3DTranspose(4, 6, kernel_size=[3, 3, 3], data_format=NCDHW)') module = nn.PairwiseDistance() self.assertEqual(str(module), 'PairwiseDistance(p=2.0)') module = nn.InstanceNorm1D(2) self.assertEqual(str(module), 'InstanceNorm1D(num_features=2, epsilon=1e-05)') module = nn.InstanceNorm2D(2) self.assertEqual(str(module), 'InstanceNorm2D(num_features=2, epsilon=1e-05)') module = nn.InstanceNorm3D(2) self.assertEqual(str(module), 'InstanceNorm3D(num_features=2, epsilon=1e-05)') module = nn.GroupNorm(num_channels=6, num_groups=6) self.assertEqual( str(module), 'GroupNorm(num_groups=6, num_channels=6, epsilon=1e-05)') module = nn.LayerNorm([2, 2, 3]) self.assertEqual( str(module), 'LayerNorm(normalized_shape=[2, 2, 3], epsilon=1e-05)') module = nn.BatchNorm1D(1) self.assertEqual( str(module), 'BatchNorm1D(num_features=1, momentum=0.9, epsilon=1e-05, data_format=NCL)' ) module = nn.BatchNorm2D(1) self.assertEqual( str(module), 'BatchNorm2D(num_features=1, momentum=0.9, epsilon=1e-05)') module = nn.BatchNorm3D(1) self.assertEqual( str(module), 'BatchNorm3D(num_features=1, momentum=0.9, epsilon=1e-05, data_format=NCDHW)' ) module = nn.SyncBatchNorm(2) self.assertEqual( str(module), 'SyncBatchNorm(num_features=2, momentum=0.9, epsilon=1e-05)') module = nn.LocalResponseNorm(size=5) self.assertEqual( str(module), 'LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=1.0)') module = nn.AvgPool1D(kernel_size=2, stride=2, padding=0) self.assertEqual(str(module), 'AvgPool1D(kernel_size=2, stride=2, padding=0)') module = nn.AvgPool2D(kernel_size=2, stride=2, padding=0) self.assertEqual(str(module), 'AvgPool2D(kernel_size=2, stride=2, padding=0)') module = nn.AvgPool3D(kernel_size=2, stride=2, padding=0) self.assertEqual(str(module), 'AvgPool3D(kernel_size=2, stride=2, padding=0)') module = nn.MaxPool1D(kernel_size=2, stride=2, padding=0) self.assertEqual(str(module), 'MaxPool1D(kernel_size=2, stride=2, padding=0)') module = nn.MaxPool2D(kernel_size=2, stride=2, padding=0) self.assertEqual(str(module), 'MaxPool2D(kernel_size=2, stride=2, padding=0)') module = nn.MaxPool3D(kernel_size=2, stride=2, padding=0) self.assertEqual(str(module), 'MaxPool3D(kernel_size=2, stride=2, padding=0)') module = nn.AdaptiveAvgPool1D(output_size=16) self.assertEqual(str(module), 'AdaptiveAvgPool1D(output_size=16)') module = nn.AdaptiveAvgPool2D(output_size=3) self.assertEqual(str(module), 'AdaptiveAvgPool2D(output_size=3)') module = nn.AdaptiveAvgPool3D(output_size=3) self.assertEqual(str(module), 'AdaptiveAvgPool3D(output_size=3)') module = nn.AdaptiveMaxPool1D(output_size=16, return_mask=True) self.assertEqual( str(module), 'AdaptiveMaxPool1D(output_size=16, return_mask=True)') module = nn.AdaptiveMaxPool2D(output_size=3, return_mask=True) self.assertEqual(str(module), 'AdaptiveMaxPool2D(output_size=3, return_mask=True)') module = nn.AdaptiveMaxPool3D(output_size=3, return_mask=True) self.assertEqual(str(module), 'AdaptiveMaxPool3D(output_size=3, return_mask=True)') module = nn.SimpleRNNCell(16, 32) self.assertEqual(str(module), 'SimpleRNNCell(16, 32)') module = nn.LSTMCell(16, 32) self.assertEqual(str(module), 'LSTMCell(16, 32)') module = nn.GRUCell(16, 32) self.assertEqual(str(module), 'GRUCell(16, 32)') module = nn.PixelShuffle(3) self.assertEqual(str(module), 'PixelShuffle(upscale_factor=3)') module = nn.SimpleRNN(16, 32, 2) self.assertEqual( str(module), 'SimpleRNN(16, 32, num_layers=2\n (0): RNN(\n (cell): SimpleRNNCell(16, 32)\n )\n (1): RNN(\n (cell): SimpleRNNCell(32, 32)\n )\n)' ) module = nn.LSTM(16, 32, 2) self.assertEqual( str(module), 'LSTM(16, 32, num_layers=2\n (0): RNN(\n (cell): LSTMCell(16, 32)\n )\n (1): RNN(\n (cell): LSTMCell(32, 32)\n )\n)' ) module = nn.GRU(16, 32, 2) self.assertEqual( str(module), 'GRU(16, 32, num_layers=2\n (0): RNN(\n (cell): GRUCell(16, 32)\n )\n (1): RNN(\n (cell): GRUCell(32, 32)\n )\n)' ) module1 = nn.Sequential( ('conv1', nn.Conv2D(1, 20, 5)), ('relu1', nn.ReLU()), ('conv2', nn.Conv2D(20, 64, 5)), ('relu2', nn.ReLU())) self.assertEqual( str(module1), 'Sequential(\n '\ '(conv1): Conv2D(1, 20, kernel_size=[5, 5], data_format=NCHW)\n '\ '(relu1): ReLU()\n '\ '(conv2): Conv2D(20, 64, kernel_size=[5, 5], data_format=NCHW)\n '\ '(relu2): ReLU()\n)' ) module2 = nn.Sequential( nn.Conv3DTranspose(4, 6, (3, 3, 3)), nn.AvgPool3D(kernel_size=2, stride=2, padding=0), nn.Tanh(name="Tanh"), module1, nn.Conv3D(4, 6, (3, 3, 3)), nn.MaxPool3D(kernel_size=2, stride=2, padding=0), nn.GELU(True)) self.assertEqual( str(module2), 'Sequential(\n '\ '(0): Conv3DTranspose(4, 6, kernel_size=[3, 3, 3], data_format=NCDHW)\n '\ '(1): AvgPool3D(kernel_size=2, stride=2, padding=0)\n '\ '(2): Tanh(name=Tanh)\n '\ '(3): Sequential(\n (conv1): Conv2D(1, 20, kernel_size=[5, 5], data_format=NCHW)\n (relu1): ReLU()\n'\ ' (conv2): Conv2D(20, 64, kernel_size=[5, 5], data_format=NCHW)\n (relu2): ReLU()\n )\n '\ '(4): Conv3D(4, 6, kernel_size=[3, 3, 3], data_format=NCDHW)\n '\ '(5): MaxPool3D(kernel_size=2, stride=2, padding=0)\n '\ '(6): GELU(approximate=True)\n)' )
def __init__(self, in_channels, hidden_size): super(LSTMLayer, self).__init__() self.cell = nn.LSTM(in_channels, hidden_size, direction='bidirectional', num_layers=2)