def spatial_pool(x, pooling_type, name): _, channel, height, width = x.shape if pooling_type == 'att': input_x = x # [N, 1, C, H * W] input_x = fluid.layers.reshape(input_x, shape=(0, 1, channel, -1)) context_mask = fluid.layers.conv2d( input=x, num_filters=1, filter_size=1, stride=1, padding=0, param_attr=ParamAttr(name=name + "_weights"), bias_attr=ParamAttr(name=name + "_bias")) # [N, 1, H * W] context_mask = fluid.layers.reshape(context_mask, shape=(0, 0, -1)) # [N, 1, H * W] context_mask = fluid.layers.softmax(context_mask, axis=2) # [N, 1, H * W, 1] context_mask = fluid.layers.reshape(context_mask, shape=(0, 0, -1, 1)) # [N, 1, C, 1] context = fluid.layers.matmul(input_x, context_mask) # [N, C, 1, 1] context = fluid.layers.reshape(context, shape=(0, channel, 1, 1)) else: # [N, C, 1, 1] context = fluid.layers.pool2d( input=x, pool_type='avg', global_pooling=True) return context
def __init__(self): self.fc1 = layers.fc(size=256, act=None, param_attr=ParamAttr(name='fc1.w'), bias_attr=ParamAttr(name='fc1.b')) self.fc_tuple = (layers.fc(size=128, act=None, param_attr=ParamAttr(name='fc2.w'), bias_attr=ParamAttr(name='fc2.b')), (layers.fc(size=1, act=None, param_attr=ParamAttr(name='fc3.w'), bias_attr=ParamAttr(name='fc3.b')), 10), 10) self.fc_dict = { 'k1': layers.fc(size=128, act=None, param_attr=ParamAttr(name='fc4.w'), bias_attr=ParamAttr(name='fc4.b')), 'k2': { 'k22': layers.fc(size=1, act=None, param_attr=ParamAttr(name='fc5.w'), bias_attr=ParamAttr(name='fc5.b')) }, 'k3': 1, }
def __init__(self, num_channels, num_filters, filter_size=(1, 3), stride=1, groups=1, padding=(0, 1), act="leaky", is_test=True): super(ConvBNLayer, self).__init__() self.conv = Conv2D( num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, stride=stride, padding=padding, groups=groups, param_attr=ParamAttr( initializer=fluid.initializer.Normal(0, 0.2)), bias_attr=False, act=None) self.batch_norm = BatchNorm( num_channels=num_filters, is_test=is_test, param_attr=ParamAttr( initializer=fluid.initializer.Normal(0, 0.2), regularizer=L2Decay(0.)), bias_attr=ParamAttr( initializer=fluid.initializer.Constant(100), regularizer=L2Decay(0.))) self.act = act
def __init__(self, in_c=768, out_c=768, filter_size=[3, 1], dilation=1, stride=1, affine=False, use_cudnn=True, name=None): super(ReluConvBN, self).__init__() #conv_std = (2.0 / # (filter_size[0] * filter_size[1] * out_c * in_c))**0.5 conv_param = fluid.ParamAttr(name=name if name is None else (name + "_conv.weights"), initializer=fluid.initializer.MSRA()) self.conv = Conv2D(in_c, out_c, filter_size, dilation=[dilation, 1], stride=stride, padding=[(filter_size[0] - 1) * dilation // 2, 0], param_attr=conv_param, act=None, bias_attr=False, use_cudnn=use_cudnn) gama = ParamAttr(initializer=fluid.initializer.Constant(value=1), trainable=affine) beta = ParamAttr(initializer=fluid.initializer.Constant(value=0), trainable=affine) self.bn = BatchNorm(out_c, param_attr=gama, bias_attr=beta)
def __init__(self, prefix, num_channels=3, num_filters=1, size_k=1, padding=0, groups=1, act=None): super(Conv1D, self).__init__() fan_in = num_channels * size_k * 1 k = 1. / math.sqrt(fan_in) param_attr = ParamAttr(name=prefix + "_w", initializer=fluid.initializer.Uniform(low=-k, high=k)) bias_attr = ParamAttr(name=prefix + "_b", initializer=fluid.initializer.Uniform(low=-k, high=k)) self._conv2d = fluid.dygraph.Conv2D(num_channels=num_channels, num_filters=num_filters, filter_size=(1, size_k), stride=1, padding=(0, padding), groups=groups, act=act, param_attr=param_attr, bias_attr=bias_attr)
def build_model(self): att_outs = [] for i, (input_dim, feature ) in enumerate(zip(self.feature_dims, self.feature_input)): att = LSTMAttentionModel(input_dim, self.embedding_size, self.lstm_size, self.drop_rate) att_out = att.forward(feature, is_training=(self.mode == 'train')) att_outs.append(att_out) out = fluid.layers.concat(att_outs, axis=1) fc1 = fluid.layers.fc( input=out, size=8192, act='relu', bias_attr=ParamAttr( regularizer=fluid.regularizer.L2Decay(0.0), initializer=fluid.initializer.NormalInitializer(scale=0.0))) fc2 = fluid.layers.fc( input=fc1, size=4096, act='tanh', bias_attr=ParamAttr( regularizer=fluid.regularizer.L2Decay(0.0), initializer=fluid.initializer.NormalInitializer(scale=0.0))) self.logit = fluid.layers.fc(input=fc2, size=self.num_classes, act=None, \ bias_attr=ParamAttr(regularizer=fluid.regularizer.L2Decay(0.0), initializer=fluid.initializer.NormalInitializer(scale=0.0))) self.output = fluid.layers.sigmoid(self.logit)
def __init__(self, name, cfg, mode='train'): super(AttentionCluster, self).__init__() self.name = name self.cfg = cfg self.mode = mode self.is_training = (mode == 'train') self.get_config() self.fc1 = Linear( input_dim=36864, output_dim=1024, act='tanh', param_attr=ParamAttr( name="fc1.weights", initializer=fluid.initializer.MSRA(uniform=False)), bias_attr=ParamAttr(name="fc1.bias", initializer=fluid.initializer.MSRA())) self.fc2 = Linear( input_dim=1024, output_dim=4096, act='tanh', param_attr=ParamAttr( name="fc2.weights", initializer=fluid.initializer.MSRA(uniform=False)), bias_attr=ParamAttr(name="fc2.bias", initializer=fluid.initializer.MSRA()))
def forward(self, input, is_training): input_fc = fluid.layers.fc(input=input, size=self.embedding_size, act='tanh', bias_attr=ParamAttr(regularizer=fluid.regularizer.L2Decay(0.0), initializer=fluid.initializer.NormalInitializer(scale=0.0))) lstm_forward_fc = fluid.layers.fc(input=input_fc, size=self.lstm_size*4, act=None, bias_attr=ParamAttr(regularizer=fluid.regularizer.L2Decay(0.0), initializer=fluid.initializer.NormalInitializer(scale=0.0))) lstm_forward, _ = fluid.layers.dynamic_lstm(input=lstm_forward_fc, size=self.lstm_size*4, is_reverse=False) lsmt_backward_fc = fluid.layers.fc(input=input_fc, size=self.lstm_size*4, act=None, bias_attr=ParamAttr(regularizer=fluid.regularizer.L2Decay(0.0), initializer=fluid.initializer.NormalInitializer(scale=0.0))) lstm_backward, _ = fluid.layers.dynamic_lstm(input=lsmt_backward_fc, size=self.lstm_size*4, is_reverse=True) lstm_concat = fluid.layers.concat(input=[lstm_forward, lstm_backward], axis=1) lstm_dropout = fluid.layers.dropout(x=lstm_concat, dropout_prob=0.5, is_test = (not is_training)) lstm_weight = fluid.layers.fc(input=lstm_dropout, size=1, act='sequence_softmax', bias_attr=ParamAttr(regularizer=fluid.regularizer.L2Decay(0.0), initializer=fluid.initializer.NormalInitializer(scale=0.0))) scaled = fluid.layers.elementwise_mul(x=lstm_dropout, y=lstm_weight, axis=0) lstm_pool = fluid.layers.sequence_pool(input=scaled, pool_type='sum') return lstm_pool
def _build_encoder(self): self.enc_output, enc_last_hidden, enc_last_cell = basic_lstm( self.src_emb, None, None, self.hidden_size, num_layers=self.num_layers, batch_first=self.batch_first, \ dropout_prob=self.dropout, \ param_attr = ParamAttr( initializer=fluid.initializer.UniformInitializer(low=-self.init_scale, high=self.init_scale) ), \ bias_attr = ParamAttr( initializer = fluid.initializer.Constant(0.0) ), \ sequence_length=self.src_sequence_length) return self.enc_output, enc_last_hidden, enc_last_cell
def __init__(self, name_scope, in_channel, out_channel, opt): super(ConvBNLayer, self).__init__(name_scope) self._conv = SpectralConv(name_scope, in_channel, out_channel, opt) self._bn = BatchNorm("%s_bn"%name_scope, num_channels = out_channel, param_attr=ParamAttr(name="%s_conv_w"%name_scope, initializer=fluid.initializer.Normal(loc=1.0, scale=0.02, seed=0)), bias_attr=ParamAttr(name="%s_bn_b"%name_scope, initializer=fluid.initializer.ConstantInitializer(value=0.0)), )
def __init__(self, num_layers, hidden_size): self.num_layers = num_layers self.hidden_size = hidden_size self.lstm_cells = [] param_attr = ParamAttr( initializer=uniform_initializer(1.0 / math.sqrt(hidden_size))) bias_attr = ParamAttr( initializer=uniform_initializer(1.0 / math.sqrt(hidden_size))) for i in range(num_layers): self.lstm_cells.append(LSTMCell(hidden_size, param_attr, bias_attr))
def __init__(self, hidden_size, bias=False, init_scale=0.1): super(AttentionLayer, self).__init__() self.input_proj = Linear( hidden_size, hidden_size, param_attr=ParamAttr(initializer=UniformInitializer( low=-init_scale, high=init_scale)), bias_attr=bias) self.output_proj = Linear( hidden_size + hidden_size, hidden_size, param_attr=ParamAttr(initializer=UniformInitializer( low=-init_scale, high=init_scale)), bias_attr=bias)
def __init__(self, input_dim, seg_num, n_att, name): super(ShiftingAttentionModel, self).__init__() self.n_att = n_att self.input_dim = input_dim self.seg_num = seg_num self.name = name self.gnorm = np.sqrt(n_att) self.conv = Conv2D( num_channels=self.input_dim, num_filters=n_att, filter_size=1, param_attr=ParamAttr(initializer=fluid.initializer.MSRA( uniform=False)), bias_attr=ParamAttr(initializer=fluid.initializer.MSRA()))
def __init__(self, num_layers, hidden_size, dropout_prob=0., init_scale=0.1): self.num_layers = num_layers self.hidden_size = hidden_size self.dropout_prob = dropout_prob self.lstm_cells = [] self.init_scale = init_scale param_attr = ParamAttr(initializer=uniform_initializer(init_scale)) bias_attr = ParamAttr(initializer=zero_constant) for i in range(num_layers): self.lstm_cells.append(LSTMCell(hidden_size, param_attr, bias_attr))
def __init__(self, dim_in, dim_out, batch_size, prefix, dim_inner, cfg, \ test_mode = False, max_pool_stride = 2): super(spacetime_nonlocal, self).__init__() self.cfg = cfg self.prefix = prefix self.dim_inner = dim_inner self.max_pool_stride = max_pool_stride self.conv3d_1 = Conv3D( num_channels=dim_in, num_filters=dim_inner, filter_size=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(loc=0.0, scale=cfg.NONLOCAL.conv_init_std)), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(value=0.))) self.conv3d_2 = Conv3D( num_channels=dim_in, num_filters=dim_inner, filter_size=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(loc=0.0, scale=cfg.NONLOCAL.conv_init_std)), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(value=0.))) self.conv3d_3 = Conv3D( num_channels=dim_in, num_filters=dim_inner, filter_size=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(loc=0.0, scale=cfg.NONLOCAL.conv_init_std)), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(value=0.))) self.conv3d_4 = Conv3D( num_channels=dim_inner, num_filters=dim_out, filter_size=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(loc=0.0, scale=cfg.NONLOCAL.conv_init_std)), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(value=0.))) self.bn = BatchNorm( num_channels=dim_out, is_test=test_mode, momentum=cfg.NONLOCAL.bn_momentum, epsilon=cfg.NONLOCAL.bn_epsilon, param_attr=ParamAttr( initializer=fluid.initializer.Constant( value=cfg.NONLOCAL.bn_init_gamma), regularizer=fluid.regularizer.L2Decay( cfg.TRAIN.weight_decay_bn)), bias_attr=ParamAttr( regularizer=fluid.regularizer.L2Decay( cfg.TRAIN.weight_decay_bn)))
def __init__(self, num_layers, hidden_size, dropout_prob=0., init_scale=0.1): self.num_layers = num_layers self.hidden_size = hidden_size self.dropout_prob = dropout_prob self.lstm_cells = [] self.init_scale = init_scale param_attr = ParamAttr( initializer=fluid.initializer.UniformInitializer(low=-init_scale, high=init_scale)) bias_attr = ParamAttr(initializer=fluid.initializer.Constant(0.0)) for i in range(num_layers): self.lstm_cells.append(LSTMCell(hidden_size, param_attr, bias_attr))
def _build_rnn_graph(self, inputs, init_hidden, init_cell, sequence_length_ph): rnn_out, last_hidden, last_cell = basic_lstm( input=inputs, init_hidden=init_hidden, init_cell=init_cell, hidden_size=self.n_hidden_, num_layers=self.num_layers_, batch_first=True, dropout_prob=self.dropout_prob_, sequence_length=sequence_length_ph, param_attr=ParamAttr( initializer=fluid.initializer.UniformInitializer( low=-self.init_scale_, high=self.init_scale_)), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0)), forget_bias=0.0) return rnn_out, last_hidden, last_cell
def __init__(self, n_channel, name=None): super(MixedOp, self).__init__() PRIMITIVES = ConvBN_PRIMITIVES ops = [] for primitive in PRIMITIVES: op = OPS[primitive](n_channel, name if name is None else name + "/" + primitive) if 'pool' in primitive: gama = ParamAttr( initializer=fluid.initializer.Constant(value=1), trainable=False) beta = ParamAttr( initializer=fluid.initializer.Constant(value=0), trainable=False) BN = BatchNorm(n_channel, param_attr=gama, bias_attr=beta) op = fluid.dygraph.Sequential(op, BN) ops.append(op) self._ops = fluid.dygraph.LayerList(ops)
def Conv3dAffine(blob_in, prefix, dim_in, dim_out, filter_size, stride, padding, cfg, group=1, test_mode=False, bn_init=None): blob_out = fluid.layers.conv3d(input=blob_in, num_filters=dim_out, filter_size=filter_size, stride=stride, padding=padding, groups=group, param_attr=ParamAttr( name=prefix + "_weights", initializer=fluid.initializer.MSRA()), bias_attr=False, name=prefix + "_conv") blob_out_shape = blob_out.shape affine_name = "bn" + prefix[3:] affine_scale = fluid.layers.create_parameter( shape=[blob_out_shape[1]], dtype=blob_out.dtype, attr=ParamAttr(name=affine_name + '_scale'), default_initializer=fluid.initializer.Constant(value=1.)) affine_bias = fluid.layers.create_parameter( shape=[blob_out_shape[1]], dtype=blob_out.dtype, attr=ParamAttr(name=affine_name + '_offset'), default_initializer=fluid.initializer.Constant(value=0.)) blob_out = fluid.layers.affine_channel(blob_out, scale=affine_scale, bias=affine_bias, name=affine_name) return blob_out
def Conv3dBN(blob_in, prefix, dim_in, dim_out, filter_size, stride, padding, cfg, group=1, test_mode=False, bn_init=None): blob_out = fluid.layers.conv3d(input=blob_in, num_filters=dim_out, filter_size=filter_size, stride=stride, padding=padding, groups=group, param_attr=ParamAttr( name=prefix + "_weights", initializer=fluid.initializer.MSRA()), bias_attr=False, name=prefix + "_conv") bn_name = "bn" + prefix[3:] blob_out = fluid.layers.batch_norm( blob_out, is_test=test_mode, momentum=cfg.MODEL.bn_momentum, epsilon=cfg.MODEL.bn_epsilon, name=bn_name, param_attr=ParamAttr(name=bn_name + "_scale", initializer=fluid.initializer.Constant( value=bn_init if (bn_init != None) else 1.), regularizer=fluid.regularizer.L2Decay( cfg.TRAIN.weight_decay_bn)), bias_attr=ParamAttr(name=bn_name + "_offset", regularizer=fluid.regularizer.L2Decay( cfg.TRAIN.weight_decay_bn)), moving_mean_name=bn_name + "_mean", moving_variance_name=bn_name + "_variance") return blob_out
def _build_decoder(self, enc_last_hidden, enc_last_cell, mode='train'): softmax_weight = layers.create_parameter([self.hidden_size, self.tar_vocab_size], dtype="float32", name="softmax_weight", \ default_initializer=fluid.initializer.UniformInitializer(low=-self.init_scale, high=self.init_scale)) if mode == 'train': #fluid.layers.Print(self.tar_emb) #fluid.layers.Print(enc_last_hidden) #fluid.layers.Print(enc_last_cell) dec_output, dec_last_hidden, dec_last_cell = basic_lstm( self.tar_emb, enc_last_hidden, enc_last_cell, \ self.hidden_size, num_layers=self.num_layers, \ batch_first=self.batch_first, \ dropout_prob=self.dropout, \ param_attr = ParamAttr( initializer=fluid.initializer.UniformInitializer(low=-self.init_scale, high=self.init_scale) ), \ bias_attr = ParamAttr( initializer = fluid.initializer.Constant(0.0) )) dec_output = layers.matmul(dec_output, softmax_weight) return dec_output else: print("mode not supprt", mode)
def weight_tying_fc(x): embedding_w = layers.create_parameter(shape=[decoder_config['vocab_size'], decoder_config['embedding_size']], dtype='float32', attr=ParamAttr(name='word_embedding', initializer=fluid.initializer.Uniform())) bias = layers.create_parameter([decoder_config['vocab_size']], dtype='float32', is_bias=True, name='out_fc_bias') proj_x = layers.fc(x, decoder_config['embedding_size']) return layers.elementwise_add(layers.matmul(proj_x, embedding_w, transpose_y=True), bias)
def build_model(self): att_outs = [] for i, (input_dim, cluster_num, feature) in enumerate( zip(self.feature_dims, self.cluster_nums, self.feature_input)): att = ShiftingAttentionModel(input_dim, self.seg_num, cluster_num, "satt{}".format(i)) att_out = att.forward(feature) att_outs.append(att_out) out = fluid.layers.concat(att_outs, axis=1) if self.drop_rate > 0.: out = fluid.layers.dropout(out, self.drop_rate, is_test=(not self.is_training)) fc1 = fluid.layers.fc( out, size=1024, act='tanh', param_attr=ParamAttr( name="fc1.weights", initializer=fluid.initializer.MSRA(uniform=False)), bias_attr=ParamAttr(name="fc1.bias", initializer=fluid.initializer.MSRA())) fc2 = fluid.layers.fc( fc1, size=4096, act='tanh', param_attr=ParamAttr( name="fc2.weights", initializer=fluid.initializer.MSRA(uniform=False)), bias_attr=ParamAttr(name="fc2.bias", initializer=fluid.initializer.MSRA())) aggregate_model = LogisticModel() self.output, self.logit = aggregate_model.build_model( model_input=fc2, vocab_size=self.class_num, is_training=self.is_training)
def __init__(self, name, cfg, mode='train'): super(NonLocal, self).__init__() self.name = name self.cfg = cfg self.mode = mode self.is_training = (mode == 'train') self.linear = Linear(10, 10) self.get_config() self.use_temp_convs_set, self.temp_strides_set, self.pool_stride = resnet_video.obtain_arc( cfg.MODEL.video_arc_choice, cfg[mode.upper()]['video_length']) self.conv3d = Conv3D( num_channels=3, num_filters=64, filter_size=[1 + self.use_temp_convs_set[0][0] * 2, 7, 7], stride=[self.temp_strides_set[0][0], 2, 2], padding=[self.use_temp_convs_set[0][0], 3, 3], param_attr=ParamAttr(initializer=fluid.initializer.MSRA()), bias_attr=False) self.test_mode = False if (mode == 'train') else True self.bn_conv1 = BatchNorm( num_channels=64, is_test=self.test_mode, momentum=cfg.MODEL.bn_momentum, epsilon=cfg.MODEL.bn_epsilon, param_attr=ParamAttr(regularizer=fluid.regularizer.L2Decay( cfg.TRAIN.weight_decay_bn)), bias_attr=ParamAttr(regularizer=fluid.regularizer.L2Decay( cfg.TRAIN.weight_decay_bn)), moving_mean_name="bn_conv1_mean", moving_variance_name="bn_conv1_variance") self.fc = Linear( 2048, cfg.MODEL.num_classes, param_attr=ParamAttr(initializer=fluid.initializer.Normal( loc=0.0, scale=cfg.MODEL.fc_init_std)), bias_attr=ParamAttr(initializer=fluid.initializer.Constant( value=0.)))
def channel_conv(input, inner_ch, out_ch, name): conv = fluid.layers.conv2d( input=input, num_filters=inner_ch, filter_size=1, stride=1, padding=0, param_attr=ParamAttr(name=name + "_conv1_weights"), bias_attr=ParamAttr(name=name + "_conv1_bias"), name=name + "_conv1", ) conv = fluid.layers.layer_norm( conv, begin_norm_axis=1, param_attr=ParamAttr(name=name + "_ln_weights"), bias_attr=ParamAttr(name=name + "_ln_bias"), act="relu", name=name + "_ln") conv = fluid.layers.conv2d( input=conv, num_filters=out_ch, filter_size=1, stride=1, padding=0, param_attr=ParamAttr( name=name + "_conv2_weights", initializer=ConstantInitializer(value=0.0), ), bias_attr=ParamAttr( name=name + "_conv2_bias", initializer=ConstantInitializer(value=0.0), ), name=name + "_conv2") return conv
def __init__(self, vocab_size, embed_dim, hidden_size, num_layers, dropout_prob=0., init_scale=0.1): super(Decoder, self).__init__() self.embedder = Embedding( size=[vocab_size, embed_dim], param_attr=ParamAttr(initializer=UniformInitializer( low=-init_scale, high=init_scale))) self.lstm_attention = RNN(DecoderCell( num_layers, embed_dim, hidden_size, dropout_prob, init_scale), is_reverse=False, time_major=False) self.output_layer = Linear( hidden_size, vocab_size, param_attr=ParamAttr(initializer=UniformInitializer( low=-init_scale, high=init_scale)), bias_attr=False)
def create_rnn_op(self): x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim], dtype='float32', name='x', append_batch_size=False) x.stop_gradient = False h_boot = layers.data(shape=[self.input_dim], dtype='float32', name='h_boot') h_boot.stop_gradient = False rnn = layers.StaticRNN() with rnn.step(): h_pre = rnn.memory(init=h_boot) x_t = rnn.step_input(x) temp_l = layers.fc( input=x_t, size=self.input_dim, param_attr=ParamAttr( name='W', initializer=fluid.initializer.ConstantInitializer(1.0)), bias_attr=False) temp_r = layers.fc( input=h_pre, size=self.input_dim, param_attr=ParamAttr( name='U', initializer=fluid.initializer.ConstantInitializer(0.0)), bias_attr=False) h = layers.sigmoid(x=layers.elementwise_add(x=temp_l, y=temp_r)) rnn.update_memory(h_pre, h) rnn.output(h) return rnn()
def __init__(self, vocab_size, embed_dim, hidden_size, num_layers, dropout_prob=0., init_scale=0.1): super(Encoder, self).__init__() self.embedder = Embedding( size=[vocab_size, embed_dim], param_attr=ParamAttr(initializer=UniformInitializer( low=-init_scale, high=init_scale))) self.stack_lstm = RNN(EncoderCell(num_layers, embed_dim, hidden_size, dropout_prob, init_scale), is_reverse=False, time_major=False)
def __init__(self): self.fc1 = layers.fc(size=256, act=None, param_attr=ParamAttr(name='fc1.w'), bias_attr=ParamAttr(name='fc1.b')) self.fc2 = layers.fc(size=128, act=None, param_attr=ParamAttr(name='fc2.w'), bias_attr=ParamAttr(name='fc2.b')) self.fc3 = layers.fc(size=1, act=None, param_attr=ParamAttr(name='fc3.w'), bias_attr=ParamAttr(name='fc3.b'))
def __init__(self, num_layers, input_size, hidden_size, dropout_prob=0., init_scale=0.1): super(EncoderCell, self).__init__() self.dropout_prob = dropout_prob # use add_sublayer to add multi-layers self.lstm_cells = [] for i in range(num_layers): self.lstm_cells.append( self.add_sublayer( "lstm_%d" % i, BasicLSTMCell( input_size=input_size if i == 0 else hidden_size, hidden_size=hidden_size, param_attr=ParamAttr(initializer=UniformInitializer( low=-init_scale, high=init_scale)))))