def conv_7x1_1x7(x, c_out, stride, affine=True, name=''): x = fluid.layers.relu(x) k = (1. / x.shape[1] / 1 / 7)**0.5 x = fluid.layers.conv2d( x, c_out, (1, 7), padding=(0, 3), param_attr=fluid.ParamAttr(name=name + "_conv_7x1_1x7_1", initializer=UniformInitializer(low=-k, high=k)), bias_attr=False) k = (1. / x.shape[1] / 7 / 1)**0.5 x = fluid.layers.conv2d( x, c_out, (7, 1), padding=(3, 0), param_attr=fluid.ParamAttr(name=name + "_conv_7x1_1x7_2", initializer=UniformInitializer(low=-k, high=k)), bias_attr=False) gama, beta = bn_param_config(name, affine, "conv_7x1_1x7_bn") x = fluid.layers.batch_norm( x, param_attr=gama, bias_attr=beta, moving_mean_name=name + "_conv_7x1_1x7_bn_mean", moving_variance_name=name + "_conv_7x1_1x7_bn_variance") return x
def _factorized_reduce(self, x, c_out, affine=True, name=''): assert c_out % 2 == 0 x = fluid.layers.relu(x) x_sliced = x[:, :, 1:, 1:] k = (1. / x.shape[1] / 1 / 1)**0.5 conv1 = fluid.layers.conv2d( x, c_out // 2, 1, stride=2, param_attr=fluid.ParamAttr( name=name + "/fr_conv1", initializer=UniformInitializer( low=-k, high=k)), bias_attr=False) k = (1. / x_sliced.shape[1] / 1 / 1)**0.5 conv2 = fluid.layers.conv2d( x_sliced, c_out // 2, 1, stride=2, param_attr=fluid.ParamAttr( name=name + "/fr_conv2", initializer=UniformInitializer( low=-k, high=k)), bias_attr=False) x = fluid.layers.concat(input=[conv1, conv2], axis=1) gama, beta = self._bn_param_config(name, affine, "fr_bn") x = fluid.layers.batch_norm( x, param_attr=gama, bias_attr=beta, moving_mean_name=name + "/fr_mean", moving_variance_name=name + "/fr_variance") return x
def model(x, y, c_in, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3, name="model"): c_curr = stem_multiplier * c_in k = (1. / x.shape[1] / 3 / 3)**0.5 x = fluid.layers.conv2d( x, c_curr, 3, padding=1, param_attr=fluid.ParamAttr(name=name + "_conv_0", initializer=UniformInitializer(low=-k, high=k)), bias_attr=False) x = fluid.layers.batch_norm( x, param_attr=fluid.ParamAttr(name=name + "_bn0_scale", initializer=ConstantInitializer(value=1)), bias_attr=fluid.ParamAttr(name=name + "_bn0_offset", initializer=ConstantInitializer(value=0)), moving_mean_name=name + "_bn0_mean", moving_variance_name=name + "_bn0_variance") s0 = s1 = x reduction_prev = False c_curr = c_in for i in range(layers): if i in [layers // 3, 2 * layers // 3]: c_curr *= 2 reduction = True else: reduction = False s0, s1 = s1, cell(s0, s1, steps, multiplier, c_curr, reduction, reduction_prev, name + "_l" + str(i)) reduction_prev = reduction out = fluid.layers.pool2d(s1, pool_type='avg', global_pooling=True) out = fluid.layers.squeeze(out, axes=[2, 3]) k = (1. / out.shape[1])**0.5 logits = fluid.layers.fc( out, num_classes, param_attr=fluid.ParamAttr(name=name + "_fc_weights", initializer=UniformInitializer(low=-k, high=k)), bias_attr=fluid.ParamAttr(name=name + "_fc_bias", initializer=UniformInitializer(low=-k, high=k))) train_loss = fluid.layers.reduce_mean( fluid.layers.softmax_with_cross_entropy(logits, y)) return logits, train_loss
def __init__(self, hidden_size, bias=False, init_scale=0.1): super(AttentionLayer, self).__init__() self.input_proj = Linear( hidden_size, hidden_size, param_attr=ParamAttr(initializer=UniformInitializer( low=-init_scale, high=init_scale)), bias_attr=bias) self.output_proj = Linear( hidden_size + hidden_size, hidden_size, param_attr=ParamAttr(initializer=UniformInitializer( low=-init_scale, high=init_scale)), bias_attr=bias)
def _classifier(self, x, num_classes, name): out = fluid.layers.pool2d(x, pool_type='avg', global_pooling=True) out = fluid.layers.squeeze(out, axes=[2, 3]) k = (1. / out.shape[1])**0.5 out = fluid.layers.fc( out, num_classes, param_attr=fluid.ParamAttr(name=name + "/fc_weights", initializer=UniformInitializer(low=-k, high=k)), bias_attr=fluid.ParamAttr(name=name + "/fc_bias", initializer=UniformInitializer(low=-k, high=k))) return out
def _relu_conv_bn(self, x, c_out, kernel_size, stride, padding, affine=True, name=''): x = fluid.layers.relu(x) k = (1. / x.shape[1] / kernel_size / kernel_size)**0.5 x = fluid.layers.conv2d( x, c_out, kernel_size, stride=stride, padding=padding, param_attr=fluid.ParamAttr( name=name + "/rcb_conv", initializer=UniformInitializer( low=-k, high=k)), bias_attr=False) gama, beta = self._bn_param_config(name, affine, "rcb_bn") x = fluid.layers.batch_norm( x, param_attr=gama, bias_attr=beta, moving_mean_name=name + "/rcb_mean", moving_variance_name=name + "/rcb_variance") return x
def _dil_conv(self, x, c_out, kernel_size, stride, padding, dilation, affine=True, name=''): c_in = x.shape[1] x = fluid.layers.relu(x) k = (1. / x.shape[1] / kernel_size / kernel_size)**0.5 x = fluid.layers.conv2d( x, c_in, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=c_in, use_cudnn=False, param_attr=fluid.ParamAttr( name=name + "/dil_conv1", initializer=UniformInitializer( low=-k, high=k)), bias_attr=False) k = (1. / x.shape[1] / 1 / 1)**0.5 x = fluid.layers.conv2d( x, c_out, 1, padding=0, param_attr=fluid.ParamAttr( name=name + "/dil_conv2", initializer=UniformInitializer( low=-k, high=k)), bias_attr=False) gama, beta = self._bn_param_config(name, affine, "dil_conv_bn") x = fluid.layers.batch_norm( x, param_attr=gama, bias_attr=beta, moving_mean_name=name + "/dil_bn_mean", moving_variance_name=name + "/dil_bn_variance") return x
def __init__(self, vocab_size, embed_dim, hidden_size, num_layers, dropout_prob=0., init_scale=0.1): super(Decoder, self).__init__() self.embedder = Embedding( size=[vocab_size, embed_dim], param_attr=ParamAttr(initializer=UniformInitializer( low=-init_scale, high=init_scale))) self.lstm_attention = RNN(DecoderCell( num_layers, embed_dim, hidden_size, dropout_prob, init_scale), is_reverse=False, time_major=False) self.output_layer = Linear( hidden_size, vocab_size, param_attr=ParamAttr(initializer=UniformInitializer( low=-init_scale, high=init_scale)), bias_attr=False)
def __init__(self, vocab_size, embed_dim, hidden_size, num_layers, dropout_prob=0., init_scale=0.1): super(Encoder, self).__init__() self.embedder = Embedding( size=[vocab_size, embed_dim], param_attr=ParamAttr(initializer=UniformInitializer( low=-init_scale, high=init_scale))) self.stack_lstm = RNN(EncoderCell(num_layers, embed_dim, hidden_size, dropout_prob, init_scale), is_reverse=False, time_major=False)
def __init__(self, num_layers, input_size, hidden_size, dropout_prob=0., init_scale=0.1): super(EncoderCell, self).__init__() self.dropout_prob = dropout_prob # use add_sublayer to add multi-layers self.lstm_cells = [] for i in range(num_layers): self.lstm_cells.append( self.add_sublayer( "lstm_%d" % i, BasicLSTMCell( input_size=input_size if i == 0 else hidden_size, hidden_size=hidden_size, param_attr=ParamAttr(initializer=UniformInitializer( low=-init_scale, high=init_scale)))))
def conv_bn(x, c_out, kernel_size, padding, stride, name): k = (1. / x.shape[1] / kernel_size / kernel_size)**0.5 conv1 = fluid.layers.conv2d( x, c_out, kernel_size, stride=stride, padding=padding, param_attr=fluid.ParamAttr( name=name + "_conv", initializer=UniformInitializer( low=-k, high=k)), bias_attr=False) bn1 = fluid.layers.batch_norm( conv1, param_attr=fluid.ParamAttr( name=name + "_bn_scale", initializer=ConstantInitializer(value=1)), bias_attr=fluid.ParamAttr( name=name + "_bn_offset", initializer=ConstantInitializer(value=0)), moving_mean_name=name + "_bn_mean", moving_variance_name=name + "_bn_variance") return bn1
def sep_conv(x, c_out, kernel_size, stride, padding, affine=True, name=''): c_in = x.shape[1] x = fluid.layers.relu(x) k = (1. / x.shape[1] / kernel_size / kernel_size)**0.5 x = fluid.layers.conv2d( x, c_in, kernel_size, stride=stride, padding=padding, groups=c_in, use_cudnn=False, param_attr=fluid.ParamAttr(name=name + "_sep_conv_1_1", initializer=UniformInitializer(low=-k, high=k)), bias_attr=False) k = (1. / x.shape[1] / 1 / 1)**0.5 x = fluid.layers.conv2d( x, c_in, 1, padding=0, param_attr=fluid.ParamAttr(name=name + "_sep_conv_1_2", initializer=UniformInitializer(low=-k, high=k)), bias_attr=False) gama, beta = bn_param_config(name, affine, "sep_conv_bn1") x = fluid.layers.batch_norm(x, param_attr=gama, bias_attr=beta, moving_mean_name=name + "_sep_bn1_mean", moving_variance_name=name + "_sep_bn1_variance") x = fluid.layers.relu(x) k = (1. / x.shape[1] / kernel_size / kernel_size)**0.5 x = fluid.layers.conv2d( x, c_in, kernel_size, stride=1, padding=padding, groups=c_in, use_cudnn=False, param_attr=fluid.ParamAttr(name=name + "_sep_conv2_1", initializer=UniformInitializer(low=-k, high=k)), bias_attr=False) k = (1. / x.shape[1] / 1 / 1)**0.5 x = fluid.layers.conv2d( x, c_in, 1, padding=0, param_attr=fluid.ParamAttr(name=name + "_sep_conv2_2", initializer=UniformInitializer(low=-k, high=k)), bias_attr=False) gama, beta = bn_param_config(name, affine, "sep_conv_bn2") x = fluid.layers.batch_norm(x, param_attr=gama, bias_attr=beta, moving_mean_name=name + "_sep_bn2_mean", moving_variance_name=name + "_sep_bn2_variance") return x