def channel_conv(input, inner_ch, out_ch, name): conv = fluid.layers.conv2d( input=input, num_filters=inner_ch, filter_size=1, stride=1, padding=0, param_attr=ParamAttr(name=name + "_conv1_weights"), bias_attr=ParamAttr(name=name + "_conv1_bias"), name=name + "_conv1", ) conv = fluid.layers.layer_norm( conv, begin_norm_axis=1, param_attr=ParamAttr(name=name + "_ln_weights"), bias_attr=ParamAttr(name=name + "_ln_bias"), act="relu", name=name + "_ln") conv = fluid.layers.conv2d( input=conv, num_filters=out_ch, filter_size=1, stride=1, padding=0, param_attr=ParamAttr( name=name + "_conv2_weights", initializer=ConstantInitializer(value=0.0), ), bias_attr=ParamAttr( name=name + "_conv2_bias", initializer=ConstantInitializer(value=0.0), ), name=name + "_conv2") return conv
def bn_param_config(name='', affine=False, op=None): gama_name = name + "_" + str(op) + "_gama" beta_name = name + "_" + str(op) + "_beta" gama = ParamAttr(name=gama_name, initializer=ConstantInitializer(value=1), trainable=affine) beta = ParamAttr(name=beta_name, initializer=ConstantInitializer(value=0), trainable=affine) return gama, beta
def _init_alphas(self): n_ops = sum(range(2, 2 + self.n_nodes)) self.alphas_normal = fluid.layers.create_parameter(shape=[n_ops, len(PRIMITIVES)], dtype="float32", default_initializer=ConstantInitializer(value=0)) self.alphas_reduce = fluid.layers.create_parameter(shape=[n_ops, len(PRIMITIVES)], dtype="float32", default_initializer=ConstantInitializer(value=0)) # setup alphas list self._alphas = [self.alphas_normal, self.alphas_reduce]
def model(x, y, c_in, num_classes, layers, steps=4, multiplier=4, stem_multiplier=3, name="model"): c_curr = stem_multiplier * c_in k = (1. / x.shape[1] / 3 / 3)**0.5 x = fluid.layers.conv2d( x, c_curr, 3, padding=1, param_attr=fluid.ParamAttr(name=name + "_conv_0", initializer=UniformInitializer(low=-k, high=k)), bias_attr=False) x = fluid.layers.batch_norm( x, param_attr=fluid.ParamAttr(name=name + "_bn0_scale", initializer=ConstantInitializer(value=1)), bias_attr=fluid.ParamAttr(name=name + "_bn0_offset", initializer=ConstantInitializer(value=0)), moving_mean_name=name + "_bn0_mean", moving_variance_name=name + "_bn0_variance") s0 = s1 = x reduction_prev = False c_curr = c_in for i in range(layers): if i in [layers // 3, 2 * layers // 3]: c_curr *= 2 reduction = True else: reduction = False s0, s1 = s1, cell(s0, s1, steps, multiplier, c_curr, reduction, reduction_prev, name + "_l" + str(i)) reduction_prev = reduction out = fluid.layers.pool2d(s1, pool_type='avg', global_pooling=True) out = fluid.layers.squeeze(out, axes=[2, 3]) k = (1. / out.shape[1])**0.5 logits = fluid.layers.fc( out, num_classes, param_attr=fluid.ParamAttr(name=name + "_fc_weights", initializer=UniformInitializer(low=-k, high=k)), bias_attr=fluid.ParamAttr(name=name + "_fc_bias", initializer=UniformInitializer(low=-k, high=k))) train_loss = fluid.layers.reduce_mean( fluid.layers.softmax_with_cross_entropy(logits, y)) return logits, train_loss
def __init__(self, c_in, num_classes, layers, method, steps=4, multiplier=4, stem_multiplier=3): super(Network, self).__init__() self._c_in = c_in self._num_classes = num_classes self._layers = layers self._steps = steps self._multiplier = multiplier self._primitives = PRIMITIVES self._method = method c_cur = stem_multiplier * c_in self.stem = fluid.dygraph.Sequential( Conv2D(num_channels=3, num_filters=c_cur, filter_size=3, padding=1, param_attr=fluid.ParamAttr(initializer=MSRAInitializer()), bias_attr=False), BatchNorm(num_channels=c_cur, param_attr=fluid.ParamAttr( initializer=ConstantInitializer(value=1)), bias_attr=fluid.ParamAttr( initializer=ConstantInitializer(value=0)))) c_prev_prev, c_prev, c_cur = c_cur, c_cur, c_in cells = [] reduction_prev = False for i in range(layers): if i in [layers // 3, 2 * layers // 3]: c_cur *= 2 reduction = True else: reduction = False cell = Cell(steps, multiplier, c_prev_prev, c_prev, c_cur, reduction, reduction_prev, method) reduction_prev = reduction cells.append(cell) c_prev_prev, c_prev = c_prev, multiplier * c_cur self.cells = fluid.dygraph.LayerList(cells) self.global_pooling = Pool2D(pool_type='avg', global_pooling=True) self.classifier = Linear( input_dim=c_prev, output_dim=num_classes, param_attr=ParamAttr(initializer=MSRAInitializer()), bias_attr=ParamAttr(initializer=MSRAInitializer())) self._initialize_alphas()
def _create_mask_variables(cls, main_program, startup_program, params_and_grads): r""" Create sparse mask Tensors according to supported layers in :attr:`main_program`. This function is called in second step of `ASPHelper._minimize` Args: main_program (Program): Program with model definition and its parameters. startup_program (Program): Program for initializing parameters. params_and_grads (list): Variable pairs of parameters and their gradients. """ asp_info = cls._get_program_asp_info(main_program) with program_guard(main_program, startup_program): for param_and_grad in params_and_grads: if ASPHelper._is_supported_layer(main_program, param_and_grad[0].name): mask_param = layers.create_parameter( name=param_and_grad[0].name + ASPHelper.MASK_APPENDDED_NAME, shape=param_and_grad[0].shape, dtype=param_and_grad[0].dtype, default_initializer=ConstantInitializer(value=1.0)) mask_param.stop_gradient = True mask_param.trainable = False asp_info.update_mask_vars(param_and_grad[0].name, mask_param)
def __init__(self, memory, base_name, input_dim, output_dim, bias=True): super(Linear, self).__init__() self.input_dim = input_dim self.output_dim = output_dim self.base_name = base_name self.memory = memory self.w_name = "%s_weight" % base_name self.b_name = "%s_bias" % base_name start_block = memory.startup_program.global_block() main_block = memory.main_program.current_block() self.weight = start_block.create_parameter( name=self.w_name, dtype='float32', shape=[input_dim, output_dim], with_initializer=True, initializer=XavierInitializer(uniform=True, fan_in=input_dim, fan_out=output_dim)) self.memory.add_weight(self.weight) self.main_weight = main_block.create_parameter( name=self.w_name, dtype='float32', shape=[input_dim, output_dim]) if bias: self.bias = start_block.create_parameter( name=self.b_name, dtype='float32', shape=[output_dim], with_initializer=True, initializer=ConstantInitializer(value=0.0)) self.memory.add_weight(self.bias) self.main_bias = main_block.create_parameter(name=self.b_name, dtype='float32', shape=[output_dim]) self.call_count = 0
def __init__(self, memory, base_name, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=False): super(Conv2d, self).__init__() self.memory = memory self.base_name = base_name self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.stride = stride self.padding = padding self.dilation = dilation self.groups = groups self.bias = bias start_block = memory.startup_program.global_block() main_block = memory.main_program.current_block() conv_name = "%s_w" % self.base_name self.kernel_size = _pair(self.kernel_size) self.stride = _pair(self.stride) self.padding = _pair(self.padding) self.dilation = _pair(self.dilation) def _get_default_param_initializer(): std = (2.0 / (self.kernel_size[0]**2 * self.in_channels))**0.5 return Normal(0.0, std, 0) weight_shape = [self.out_channels, self.in_channels] + self.kernel_size self.conv_weight = start_block.create_parameter( name=conv_name, dtype='float32', shape=weight_shape, with_initlializer=True, initializer=_get_default_param_initializer()) self.memory.add_weight(self.conv_weight) self.main_conv_weight = main_block.create_parameter(name=conv_name, dtype='float32', shape=weight_shape) if bias: self.b_name = "%s_bias" % self.base_name self.bias = start_block.create_parameter( name=self.b_name, dtype='float32', shape=[self.out_channels], with_initializer=True, intializer=ConstantInitializer(value=0.0)) self.memory.add_weight(self.bias) self.main_bias = main_block.create_parameter( name=self.b_name, dtype='float32', shape=[self.out_channels]) self.call_count = 0
def conv_bn(x, c_out, kernel_size, padding, stride, name): k = (1. / x.shape[1] / kernel_size / kernel_size)**0.5 conv1 = fluid.layers.conv2d( x, c_out, kernel_size, stride=stride, padding=padding, param_attr=fluid.ParamAttr( name=name + "_conv", initializer=UniformInitializer( low=-k, high=k)), bias_attr=False) bn1 = fluid.layers.batch_norm( conv1, param_attr=fluid.ParamAttr( name=name + "_bn_scale", initializer=ConstantInitializer(value=1)), bias_attr=fluid.ParamAttr( name=name + "_bn_offset", initializer=ConstantInitializer(value=0)), moving_mean_name=name + "_bn_mean", moving_variance_name=name + "_bn_variance") return bn1
def __init__(self, c_curr, c_out, kernel_size, padding, stride, name=None): super(ConvBN, self).__init__() self.conv = Conv2D( num_channels=c_curr, num_filters=c_out, filter_size=kernel_size, stride=stride, padding=padding, param_attr=fluid.ParamAttr(name=name + "_conv" if name is not None else None, initializer=MSRAInitializer()), bias_attr=False) self.bn = BatchNorm( num_channels=c_out, param_attr=fluid.ParamAttr( name=name + "_bn_scale" if name is not None else None, initializer=ConstantInitializer(value=1)), bias_attr=fluid.ParamAttr( name=name + "_bn_offset" if name is not None else None, initializer=ConstantInitializer(value=0)), moving_mean_name=name + "_bn_mean" if name is not None else None, moving_variance_name=name + "_bn_variance" if name is not None else None)
def set_program(cls): data = fluid.layers.data(name=cls.data_desc[0][0], shape=cls.data_desc[0][1]) out = fluid.layers.fc(input=data, size=cls.hidden_size, param_attr=WeightNormParamAttr( dim=None, name='weight_norm_param', initializer=ConstantInitializer(1.0)), bias_attr=False, act=None) loss = fluid.layers.reduce_sum(out) fluid.backward.append_backward(loss=loss) cls.fetch_list = [ 'weight_norm_param_g', 'weight_norm_param_v', 'weight_norm_param_g@GRAD' ]
def test_param(self): shape = [784, 100] val = 1.0625 b = main_program.global_block() param = b.create_parameter(name='fc.w', shape=shape, dtype='float32', initializer=ConstantInitializer(val)) self.assertIsNotNone(param) self.assertEqual('fc.w', param.name) self.assertEqual((784, 100), param.shape) self.assertEqual(core.VarDesc.VarType.FP32, param.dtype) self.assertEqual(0, param.block.idx) exe = Executor(core.CPUPlace()) p = exe.run(main_program, fetch_list=[param])[0] self.assertTrue(np.allclose(p, np.ones(shape) * val)) p = io.get_parameter_value_by_name('fc.w', exe, main_program) self.assertTrue(np.allclose(np.array(p), np.ones(shape) * val))
def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, with_bias=False, with_scale=True): theta = fluid.layers.conv2d( input=input, num_filters=dim_inner, filter_size=1, stride=1, padding=0, param_attr=ParamAttr(name=prefix + '_theta_w'), bias_attr=ParamAttr(name=prefix + '_theta_b', initializer=ConstantInitializer( value=0.)) if with_bias else False) theta_shape = theta.shape theta_shape_op = fluid.layers.shape(theta) theta_shape_op.stop_gradient = True # we have to use explicit batch size (to support arbitrary spacetime size) # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) theta = fluid.layers.transpose(theta, [0, 2, 1]) phi = fluid.layers.conv2d( input=input, num_filters=dim_inner, filter_size=1, stride=1, padding=0, param_attr=ParamAttr(name=prefix + '_phi_w'), bias_attr=ParamAttr(name=prefix + '_phi_b', initializer=ConstantInitializer( value=0.)) if with_bias else False, name=prefix + '_phi') phi = fluid.layers.reshape(phi, [0, 0, -1]) theta_phi = fluid.layers.matmul(theta, phi) g = fluid.layers.conv2d( input=input, num_filters=dim_inner, filter_size=1, stride=1, padding=0, param_attr=ParamAttr(name=prefix + '_g_w'), bias_attr=ParamAttr(name=prefix + '_g_b', initializer=ConstantInitializer( value=0.)) if with_bias else False, name=prefix + '_g') g = fluid.layers.reshape(g, [0, 0, -1]) # scale if with_scale: theta_phi = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) p = fluid.layers.softmax(theta_phi) # note g's axis[2] corresponds to p's axis[2] # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) p = fluid.layers.transpose(p, [0, 2, 1]) t = fluid.layers.matmul(g, p) # reshape back # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) n = fluid.layers.slice(theta_shape_op, axes=[0], starts=[0], ends=[1]) h = fluid.layers.slice(theta_shape_op, axes=[0], starts=[2], ends=[3]) w = fluid.layers.slice(theta_shape_op, axes=[0], starts=[3], ends=[4]) ch = int(theta_shape[1]) t_re = fluid.layers.reshape(t, shape=[n, ch, h, w]) blob_out = t_re blob_out = fluid.layers.conv2d( input=blob_out, num_filters=dim_out, filter_size=1, stride=1, padding=0, param_attr=ParamAttr(name=prefix + '_out_w', initializer=ConstantInitializer(value=0.0)), bias_attr=ParamAttr(name=prefix + '_out_b', initializer=ConstantInitializer( value=0.0)) if with_bias else False, name=prefix + '_out') blob_out_shape = blob_out.shape return blob_out
def create_mpc_parameter(self, attr, shape, dtype, is_bias=False, default_initializer=None, stop_gradient=False, type=core.VarDesc.VarType.LOD_TENSOR): """ Create mpc parameters for this layers. Refer to LayerHelper.create_parameter in Paddle 1.7. :param attr: :param shape: :param dtype: :param is_bias: :param default_initializer: :param stop_gradient: :param type: :return: """ # Deepcopy the attr so that parameters can be shared in program attr = copy.deepcopy(attr) attr = ParamAttr._to_attr(attr) if not attr: return None assert isinstance(attr, ParamAttr) suffix = 'b' if is_bias else 'w' if attr.name is None: attr.name = unique_name.generate(".".join([self.name, suffix])) if default_initializer is None and attr.initializer is None: if isinstance(dtype, core.VarDesc.VarType): if dtype != core.VarDesc.VarType.INT64: raise TypeError( "Can not create mpc parameter with default initializer " "when dtype is not int64 type. Set default_initializer " "to fit the parameter dtype!") else: if not dtype == "int64": raise TypeError( "Can not create mpc parameter with default initializer when " "dtype is not int64 type. Set default_initializer to " "fit the parameter dtype!") if is_bias: attr._set_default_bias_initializer() else: attr._set_default_initializer(ConstantInitializer(0)) else: attr._set_default_initializer(default_initializer) # TODO(xukun07): not support WeightNormParamAttr in this first version # Paddle1.7: If weight normalization is set, insert extra parameters and ops. # Refer to https://arxiv.org/pdf/1602.07868.pdf if isinstance(attr, WeightNormParamAttr): # param = self._create_weight_normalize(attr, shape, dtype) # WeightNormParamAttr.params_with_weight_norm.append(param) # return param raise NotImplementedError( "The WeightNormParamAttr for attr is not " "supported in this version") startup_program_global_block = self.startup_program.global_block() create_mpc_parameter(block=startup_program_global_block, dtype=dtype, shape=shape, type=type, **attr._to_kwargs(with_initializer=True)) main_program_global_block = self.main_program.global_block() return create_mpc_parameter(block=main_program_global_block, dtype=dtype, shape=shape, type=type, **attr._to_kwargs())
from paddle.fluid import Program from paddle.fluid import Operator from paddle.fluid.initializer import XavierInitializer from paddle.fluid.initializer import ConstantInitializer use_mkldnn = False my_program = Program() cur_block = my_program.current_block() # implement y = Wx + b layer, input variable: W, x, b, output variable: y # initlizer W->Xavier initlization, b->Constant initialization x_var = cur_block.create_var(name='fc_x', shape=[-1, 128], dtype='float32') y_var = cur_block.create_var(name='fc_y', shape=[-1, 64], dtype='float32') Wx_var = cur_block.create_var(name='fc_Wx', shape=[-1, 64], dtype='float32') xavier_init = XavierInitializer(uniform=True, fan_in=128, fan_out=64) const_init = ConstantInitializer(value=0.0) W_var = cur_block.create_parameter(name='fc_W', dtype='float32', shape=[128, 64], initializer=xavier_init) b_var = cur_block.create_parameter(name='fc_b', dtype='float32', shape=[64], initializer=const_init) mul_op_desc = cur_block.desc.append_op() mul_op = Operator(block=cur_block, desc=mul_op_desc, type='mul', inputs={ 'X': x_var, 'Y': W_var }, outputs={'Out': Wx_var},
def bn_param_config(affine=False): gama = ParamAttr(initializer=ConstantInitializer(value=1), trainable=affine) beta = ParamAttr(initializer=ConstantInitializer(value=0), trainable=affine) return gama, beta
def __init__(self, channels=1, params=[0, 1, 1, 1, 1], n_iter=10): super(FlowLayer, self).__init__() self.n_iter = n_iter sobel = np.kron(np.resize(np.eye(channels), [channels, channels, 1, 1]), np.array([[[[-0.5, 0, 0.5], [-0.5, 0, 0.5], [-0.5, 0, 0.5]]]])) # Sobel矩阵 wx = np.array([[[[-1, 1]]]]).repeat(channels, axis=0) wy = np.array([[[[-1], [1]]]]).repeat(channels, axis=0) if params[0]: self.conv_img_grad = Conv2D( num_channels=channels, num_filters=channels, filter_size=3, padding=1, stride=1, bias_attr=False, param_attr=fluid.ParamAttr(initializer=NumpyArrayInitializer( value=sobel))) self.conv_img_grad2 = Conv2D( num_channels=channels, num_filters=channels, filter_size=3, padding=1, stride=1, bias_attr=False, param_attr=fluid.ParamAttr(initializer=NumpyArrayInitializer( value=sobel.transpose([0, 1, 3, 2])))) else: self.conv_img_grad = Conv2D( num_channels=channels, num_filters=channels, filter_size=3, padding=1, stride=1, bias_attr=False, param_attr=fluid.ParamAttr( initializer=NumpyArrayInitializer(value=sobel), trainable=False)) self.conv_img_grad2 = Conv2D( num_channels=channels, num_filters=channels, filter_size=3, padding=1, stride=1, bias_attr=False, param_attr=fluid.ParamAttr(initializer=NumpyArrayInitializer( value=sobel.transpose([0, 1, 3, 2])), trainable=False)) if params[1]: self.conv_f_grad = Conv2D( num_channels=channels, num_filters=channels, filter_size=3, padding=0, stride=1, bias_attr=False, groups=channels, param_attr=fluid.ParamAttr(initializer=NumpyArrayInitializer( value=wx))) self.conv_f_grad2 = Conv2D( num_channels=channels, num_filters=channels, filter_size=3, padding=0, stride=1, bias_attr=False, groups=channels, param_attr=fluid.ParamAttr(initializer=NumpyArrayInitializer( value=wy))) self.conv_div = Conv2D( num_channels=channels, num_filters=channels, filter_size=(1, 2), padding=0, stride=1, bias_attr=False, groups=channels, param_attr=fluid.ParamAttr(initializer=NumpyArrayInitializer( value=wx))) self.conv_div2 = Conv2D( num_channels=channels, num_filters=channels, filter_size=(2, 1), padding=0, stride=1, bias_attr=False, groups=channels, param_attr=fluid.ParamAttr(initializer=NumpyArrayInitializer( value=wy))) else: self.conv_f_grad = Conv2D( num_channels=channels, num_filters=channels, filter_size=3, padding=0, stride=1, bias_attr=False, groups=channels, param_attr=fluid.ParamAttr( initializer=NumpyArrayInitializer(value=wx), trainable=False)) self.conv_f_grad2 = Conv2D( num_channels=channels, num_filters=channels, filter_size=3, padding=0, stride=1, bias_attr=False, groups=channels, param_attr=fluid.ParamAttr( initializer=NumpyArrayInitializer(value=wy), trainable=False)) self.conv_div = Conv2D( num_channels=channels, num_filters=channels, filter_size=(1, 2), padding=0, stride=1, bias_attr=False, groups=channels, param_attr=fluid.ParamAttr( initializer=NumpyArrayInitializer(value=wx), trainable=False)) self.conv_div2 = Conv2D( num_channels=channels, num_filters=channels, filter_size=(2, 1), padding=0, stride=1, bias_attr=False, groups=channels, param_attr=fluid.ParamAttr( initializer=NumpyArrayInitializer(value=wy), trainable=False)) self.channels = channels self.t = 0.3 # theta self.l = 0.15 # lambda self.a = 0.25 # tau if params[2]: self.t = fluid.layers.create_parameter( shape=[1], dtype='float32', attr=fluid.ParamAttr(initializer=ConstantInitializer( value=self.t))) if params[3]: self.l = fluid.layers.create_parameter( shape=[1], dtype='float32', attr=fluid.ParamAttr(initializer=ConstantInitializer( value=self.l))) if params[4]: self.a = fluid.layers.create_parameter( shape=[1], dtype='float32', attr=fluid.ParamAttr(initializer=ConstantInitializer( value=self.a)))