def resreid_train(images, num_class=751, trainable=True): """use resnet50 as backbone, modify the stride of last layer to be 1 for rich person features """ with flow.scope.namespace("base"): stem = layer0(images, trainable=trainable) body = resnet_conv_x_body(stem, lambda x: x, trainable=trainable) with flow.scope.namespace("gap"): pool5 = flow.nn.avg_pool2d(body, ksize=[16, 8], strides=1, padding="VALID", data_format="NCHW", name="pool5") feature = flow.reshape(pool5, [pool5.shape[0], -1]) if not trainable: return feature bn1 = flow.layers.batch_normalization( feature, axis=1, center=False, beta_initializer=flow.constant_initializer(0), gamma_initializer=flow.random_normal_initializer(mean=1, stddev=0.02), trainable=trainable, name='bnout') fc6 = flow.layers.dense( inputs=bn1, units=num_class, activation=None, use_bias=False, kernel_initializer=flow.random_normal_initializer(mean=0, stddev=0.01), trainable=trainable, name="fc6", ) return feature, fc6
def dense( cls, input, units, name, use_bias=False, trainable=True, reuse=False, const_init=False, ): name_ = name if reuse == False else name + "_reuse" in_shape = input.shape in_num_axes = len(in_shape) assert in_num_axes >= 2 inputs = flow.reshape(input, (-1, in_shape[-1])) if in_num_axes > 2 else input weight = flow.get_variable( name="{}-weight".format(name), shape=(units, inputs.shape[1]), dtype=inputs.dtype, initializer=flow.random_normal_initializer(stddev=0.02) if not const_init else flow.constant_initializer(0.002), trainable=trainable, model_name="weight", reuse=reuse, ) out = flow.matmul( a=inputs, b=weight, transpose_b=True, name=name_ + "matmul", ) if use_bias: bias = flow.get_variable( name="{}-bias".format(name), shape=(units, ), dtype=inputs.dtype, initializer=flow.random_normal_initializer() if not const_init else flow.constant_initializer(0.002), trainable=trainable, model_name="bias", reuse=reuse, ) out = flow.nn.bias_add(out, bias, name=name_ + "_bias_add") out = flow.reshape(out, in_shape[:-1] + (units, )) if in_num_axes > 2 else out return out
def _get_initializer(model_name): if model_name == "weight": return flow.random_normal_initializer(stddev=0.01) # return flow.variance_scaling_initializer(2.0, mode="fan_out", distribution="random_normal", data_format="NCHW") elif model_name == "bias": return flow.zeros_initializer() elif model_name == "gamma": return flow.ones_initializer() elif model_name == "beta": return flow.zeros_initializer() elif model_name == "dense_weight": return flow.random_normal_initializer(0, 0.01)
def conv2d_layer( name, input, out_channel, kernel_size=3, strides=1, padding="SAME", # or [[], [], [], []] data_format="NCHW", dilation_rate=1, use_bias=True, weight_initializer=flow.random_normal_initializer(mean=0.0, stddev=0.02), bias_initializer=flow.zeros_initializer(), trainable=True, reuse=True): weight_shape = (out_channel, input.shape[1], kernel_size, kernel_size) weight = flow.get_variable(name + "_weight", shape=weight_shape, dtype=input.dtype, initializer=weight_initializer, trainable=trainable, reuse=reuse) output = flow.nn.conv2d(input, weight, strides, padding, data_format, dilation_rate) if use_bias: bias = flow.get_variable(name + "_bias", shape=(out_channel, ), dtype=input.dtype, initializer=bias_initializer, trainable=trainable) output = flow.nn.bias_add(output, bias, data_format) return output
def broadcast_to_compatible_with_fn(x_def: oft.Numpy.Placeholder( x.shape, dtype=flow.float)): x_var = flow.get_variable( "x_var", shape=x.shape, dtype=flow.float, initializer=flow.constant_initializer(0), trainable=True, ) compatible_var = [ flow.get_variable( "compatible_var_{}".format(i), shape=cp_shape, dtype=flow.float, initializer=flow.random_normal_initializer(), trainable=False, ) for i, cp_shape in enumerate(compatible_shape) ] x_var = x_var + x_def y = flow.broadcast_to_compatible_with(x_var, compatible_var) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([], [1e-3]), momentum=0).minimize(y) flow.watch_diff(x_var, dx_watcher) return y
def _AddClassfication(input_blob, label_blob, hidden_size, label_num, initializer_range, scope_name='classification', is_train=True): with flow.scope.namespace(scope_name): output_weight_blob = flow.get_variable( name="output_weights", shape=[label_num, hidden_size], dtype=input_blob.dtype, # initializer=bert_util.CreateInitializer(initializer_range), initializer=flow.random_normal_initializer( mean=0.0, stddev=initializer_range, seed=None, dtype=None), trainable=is_train) output_bias_blob = flow.get_variable( name="output_bias", shape=[label_num], dtype=input_blob.dtype, initializer=flow.constant_initializer(0.0), trainable=is_train) logit_blob = flow.matmul(input_blob, output_weight_blob, transpose_b=True) logit_blob = flow.nn.bias_add(logit_blob, output_bias_blob) # pre_example_loss = flow.nn.sparse_softmax_cross_entropy_with_logits( # logits=logit_blob, labels=label_blob # ) # loss = pre_example_loss # return loss, pre_example_loss, logit_blob return logit_blob
def model() -> tp.Numpy: with get_placement(): x = flow.get_variable( name="x", shape=(10, 801, 820, 4), dtype=dtype, initializer=flow.random_normal_initializer(mean=10, stddev=1), distribute=flow.distribute.split(0), ) y = flow.get_variable( name="y", shape=(10, 801, 820, 4), dtype=dtype, initializer=flow.random_normal_initializer(mean=10, stddev=1), distribute=flow.distribute.split(0), ) return flow.math.reduce_mean(x + y)
def get_variable(name): return flow.get_variable( name=name, shape=(10, 80, 40, 20), dtype=dtype, initializer=flow.random_normal_initializer(mean=10, stddev=1), distribute=flow.distribute.split(0), )
def dnn_2(input_tensor, cfg, optimizer, model_weight=True, trainable=True): input_tensor = flow.reshape(input_tensor, [input_tensor.shape[0], -1]) dense0 = flow.layers.dense( inputs=input_tensor, units=cfg[0], activation=flow.nn.relu, use_bias=True, kernel_initializer=flow.random_normal_initializer(mean=0, stddev=0.1), trainable=trainable, name="dense0") dense1 = flow.layers.dense( inputs=dense0, units=cfg[1], activation=None, use_bias=True, kernel_initializer=flow.random_normal_initializer(mean=0, stddev=0.1), trainable=trainable, name="dense1") def getTypeAndShape(inputs, units): in_shape = inputs.shape in_num_axes = len(in_shape) inputs = (flow.reshape(inputs, (-1, in_shape[-1])) if in_num_axes > 2 else inputs) shape = (units, inputs.shape[1]) dtype = inputs.dtype return shape, dtype if model_weight == True: shape_list = [] dtype_list = [] shape_weight, dtype = getTypeAndShape(input_tensor, cfg[0]) shape_list.append(shape_weight) dtype_list.append(dtype) shape_weight, dtype = getTypeAndShape(dense0, cfg[1]) shape_list.append(shape_weight) dtype_list.append(dtype) modelWeight.addDense(dtype_old=dtype_list, shape=shape_list, optimizer=optimizer, dense_num=2) return dense1
def deconv2d( cls, input, filters, size, name, strides=2, trainable=True, reuse=False, const_init=False, use_bias=False, ): name_ = name if reuse == False else name + "_reuse" # weight : [in_channels, out_channels, height, width] weight_shape = (input.shape[1], filters, size, size) output_shape = ( input.shape[0], input.shape[1], input.shape[2] * strides, input.shape[3] * strides, ) weight = flow.get_variable( name + "-weight", shape=weight_shape, dtype=input.dtype, initializer=flow.random_normal_initializer(stddev=0.02) if not const_init else flow.constant_initializer(0.002), trainable=trainable, reuse=reuse, ) output = flow.nn.conv2d_transpose( input, weight, strides=[strides, strides], output_shape=output_shape, padding="SAME", data_format="NCHW", name=name_, ) if use_bias: bias = flow.get_variable( name + "-bias", shape=(filters,), dtype=input.dtype, initializer=flow.constant_initializer(0.0), trainable=trainable, reuse=reuse, ) output = flow.nn.bias_add(output, bias, "NCHW") return output
def model() -> tp.Numpy: with get_placement(): x = flow.get_variable( name="x", shape=(4, 5), dtype=flow.float32, initializer=flow.random_normal_initializer(mean=10, stddev=1), ) w = flow.get_variable( name="w", shape=(5, 6), dtype=flow.float32, initializer=flow.random_normal_initializer(mean=10, stddev=1), distribute=flow.distribute.split(0), ) y = flow.matmul(x, w) flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [0.01]), momentum=0.9).minimize(y) return y
def broadcast_to_compatible_with_fn(x_def: oft.ListNumpy.Placeholder( shape=x_shape, dtype=flow.float)): compatible_var = [ flow.get_variable( "compatible_var_{}".format(i), shape=cp_shape, dtype=flow.float, initializer=flow.random_normal_initializer(), trainable=False, ) for i, cp_shape in enumerate(compatible_shape) ] return flow.broadcast_to_compatible_with(x_def, compatible_var)
def build_network(self,inputs): # weight_2d=self.conv2d.weight.data # weigt_3d=np.zeros(weight_2d.shape) # weight_3d=flow.expand_dims(weight_3d,axis=2) # weight_3d[:, :, 0, :, :] = weight_2d #init=flow.constant_initializer(weight_3d) # init=flow.kaiming_initializer(shape=inputs.shape,mode="fan_out",nonlinearity="relu") init=flow.random_normal_initializer(mean=0, stddev=1) output=conv3d_layer(self.name,inputs=inputs,filters=self.conv2d.out_channels, kernel_size=self.kernel_dim,strides=self.stride, padding=self.padding, use_bias=True,weight_initializer=init,trainable=self.trainable) return output
def add() -> tp.Numpy: with get_placement(): x = flow.get_variable( name="x", shape=(9, 3), dtype=dtype, initializer=flow.random_normal_initializer(mean=10, stddev=1), distribute=flow.distribute.split(0), ) y = flow.get_variable( name="y", shape=(9, 3), dtype=dtype, initializer=flow.constant_initializer(5, dtype=dtype), ) z = flow.get_variable( name="z", shape=(9, 3), dtype=dtype, initializer=flow.random_normal_initializer(), ) return flow.math.add_n([x, y, z])
def conv2d( cls, input, filters, size, name, strides=2, padding="same", trainable=True, reuse=False, const_init=False, use_bias=True, ): name_ = name if reuse == False else name + "_reuse" # (output_dim, k_h, k_w, input.shape[3]) if NHWC weight_shape = (filters, input.shape[1], size, size) weight = flow.get_variable( name + "-weight", shape=weight_shape, dtype=input.dtype, initializer=flow.random_normal_initializer(stddev=0.02) if not const_init else flow.constant_initializer(0.002), trainable=trainable, reuse=reuse, ) output = flow.nn.compat_conv2d( input, weight, strides=[strides, strides], padding=padding, data_format="NCHW", name=name_, ) if use_bias: bias = flow.get_variable( name + "-bias", shape=(filters,), dtype=input.dtype, initializer=flow.constant_initializer(0.0), trainable=trainable, reuse=reuse, ) output = flow.nn.bias_add(output, bias, "NCHW") return output
def HS_reid_train(images, num_class=751, trainable=False): """Slice feature map into two parts horizontally by GAP in order to mining discriminative features""" with flow.scope.namespace("base"): stem = layer0(images, trainable=trainable) body = resnet_conv_x_body(stem, lambda x: x, trainable=trainable) with flow.scope.namespace("gap"): pool5 = flow.nn.avg_pool2d(body, ksize=[4, 8], strides=4, padding="VALID", data_format="NCHW", name="pool5") feature = flow.reshape(pool5, [pool5.shape[0], -1]) if not trainable: return feature bn1 = flow.layers.batch_normalization( feature, axis=1, center=False, beta_initializer=flow.constant_initializer(0), gamma_initializer=flow.random_normal_initializer(mean=1, stddev=0.02), trainable=trainable, name='bnout') fc6 = flow.layers.dense( inputs=bn1, units=num_class, activation=None, use_bias=False, kernel_initializer=flow.random_normal_initializer(mean=0, stddev=0.01), trainable=trainable, name="fc6", ) return feature, fc6
def inflate_batch_norm(inputs, num_features, trainable=True): name = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f') output = flow.layers.batch_normalization( inputs=inputs, axis=1, momentum=0.997, epsilon=1.001e-5, gamma_initializer=flow.random_normal_initializer(mean=1, stddev=0.02), center=True, scale=True, trainable=trainable, name="inflate_bn_" + name) return output
def kaiming_initializer( shape: Sequence[int], distribution: str = "random_normal", mode: str = "fan_in", nonlinearity: str = "leaky_relu", negative_slope: float = 0.0, data_format: str = "NCHW", ) -> None: r"""Initialize weight according to the method described in `Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification` - He, K. et al. (2015), using a normal or uniform distribution. Args: shape (Sequence[int]): Blob shape. distribution (str, optional): 'random_normal' or 'random_uniform'. Defaults to "random_normal". mode (str, optional): 'fan_in', 'fan_out' or 'fan_avg'. Defaults to "fan_in". nonlinearity (str, optional): None, 'tanh', 'sigmoid', 'relu' or 'leaky_relu'. Defaults to "leaky_relu". negative_slope (float, optional): The negative slope of leaky_relu. Defaults to 0.0. data_format (str, optional): 'NCHW', 'NHWC'. Defaults to "NCHW". Raises: NotImplementedError: Only support normal and uniform distribution Returns: [type]: flow.random_normal_initializer or flow.random_uniform_initializer """ assert isinstance(shape, tuple) # Kaiming Initialization only deals with FC, Conv and Deconv's weight assert len(shape) >= 2 elem_cnt = functools.reduce(lambda a, b: a * b, shape, 1) assert elem_cnt > 0 assert distribution in ["random_normal", "random_uniform"] assert mode in ["fan_in", "fan_out", "fan_avg"] assert nonlinearity in [None, "tanh", "sigmoid", "relu", "leaky_relu"] assert data_format in ["NCHW", "NHWC"] fan = _CalcFan(shape, mode, _get_data_format(data_format)) gain = _CalcGain(nonlinearity, negative_slope) std = gain / math.sqrt(fan) if distribution == "random_normal": return flow.random_normal_initializer(0.0, std) elif distribution == "random_uniform": bound = math.sqrt(3.0) * std return flow.random_uniform_initializer(-bound, bound) else: raise NotImplementedError( "Only support normal and uniform distribution")
def GPT(idx, config, target=None): b, t = idx.shape assert t <= config.block_size, "Cannot forward, model block size is exhausted." #forward the GPT model #token_embeddings = flow.layers.dense word_embedding = flow.get_variable( 'word_emb', initializer=flow.random_normal_initializer(), shape=(config.vocab_size, config.n_embd)) token_embeddings = flow.gather(word_embedding, idx) #positions embedding pos_emb = flow.get_variable(name='pos_emb', shape=(1, config.block_size, config.n_embd), dtype=flow.float32, initializer=flow.zeros_initializer()) #position_embeddings = fpos_emb[:, :t, :] # each position maps to a (learnable) vector position_embeddings = flow.slice(pos_emb, [None, 0, None], [None, t, None]) x = flow.nn.dropout((token_embeddings + position_embeddings), config.embd_pdrop) #Blocks for block_id in range(config.n_layer): with flow.scope.namespace('Block' + str(block_id)): x = Block(x, config) x = flow.layers.layer_norm(x, name='output_layernorm') logits = flow.layers.dense(x, config.vocab_size, use_bias=False, activation=flow.zeros_initializer(), name='output_logits') loss = None if target is not None: #TODO logits = flow.reshape(logits, [-1, config.vocab_size]) target = flow.reshape(target, [-1]) target = flow.one_hot(target, depth=config.vocab_size, dtype=flow.float32) loss = flow.nn.softmax_cross_entropy_with_logits(logits, target) return logits, loss
def Build_EmbeddingLayer(self, vocab_size, embedding_size=128, word_embedding_name="Embedding_Layer"): """ Build a Embedding Layer :param input_ids_blob:The input ID Blob :param vocab_size: The input Vocab size :param embedding_size: The embedding Size :param initializer_range: The range of Initializer, Use flow.truncated_normal :param word_embedding_name: The name of Embedding variable :return: The output and the Embedding table. """ self.embedding_table = flow.get_variable( name=word_embedding_name + "_Embed", shape=[vocab_size, embedding_size], dtype=flow.float32, initializer=flow.random_normal_initializer(0, self.hidden_size**-0.5))
def deconv(input, out_channel, name_prefix, kernel_size=4, strides=[2, 2], trainable=True, reuse=True): weight = flow.get_variable( name_prefix + "_weight", shape=(input.shape[1], out_channel, kernel_size, kernel_size), dtype=flow.float, initializer=flow.random_normal_initializer(mean=0.0, stddev=0.02), trainable=trainable, reuse=reuse) return flow.nn.conv2d_transpose(input, weight, strides=strides, padding="SAME", output_shape=(input.shape[0], out_channel, input.shape[2] * strides[0], input.shape[3] * strides[1]))
def fit_dense(input_blob, hidden_size, label_num, initializer_range, scope_name='fit_dense', is_train=True): with flow.scope.namespace(scope_name): in_shape = input_blob.shape in_num_axes = len(in_shape) assert in_num_axes >= 2 input_blob = (flow.reshape(input_blob, (-1, in_shape[-1])) if in_num_axes > 2 else input_blob) output_weight_blob = flow.get_variable( name="weight", shape=[label_num, hidden_size], dtype=input_blob.dtype, # initializer=bert_util.CreateInitializer(initializer_range), initializer=flow.random_normal_initializer( mean=0.0, stddev=initializer_range, seed=None, dtype=None), trainable=is_train) output_bias_blob = flow.get_variable( name="bias", shape=[label_num], dtype=input_blob.dtype, initializer=flow.constant_initializer(0.0), trainable=is_train) logit_blob = flow.matmul(input_blob, output_weight_blob, transpose_b=True) logit_blob = flow.nn.bias_add(logit_blob, output_bias_blob) logit_blob = (flow.reshape(logit_blob, in_shape[:-1] + (label_num, )) if in_num_axes > 2 else logit_blob) # pre_example_loss = flow.nn.sparse_softmax_cross_entropy_with_logits( # logits=logit_blob, labels=label_blob # ) # loss = pre_example_loss # return loss, pre_example_loss, logit_blob return logit_blob
def test_float_initializer(test_case): initializers = [ flow.random_normal_initializer(mean=3, stddev=4), flow.random_uniform_initializer(minval=-6, maxval=18), flow.truncated_normal_initializer(mean=-5, stddev=8), flow.xavier_uniform_initializer(data_format="NCHW"), flow.xavier_uniform_initializer(data_format="NHWC"), flow.xavier_normal_initializer(data_format="NCHW"), flow.xavier_normal_initializer(data_format="NHWC"), flow.constant_initializer(value=4), flow.ones_initializer(), flow.zeros_initializer(), ] kaiming_args = GenArgDict( OrderedDict( shape=[SHAPE], mode=["fan_in", "fan_out", "fan_avg"], distribution=["random_normal", "random_uniform"], data_format=["NCHW", "NHWC"], negative_slope=[0.5], )) vs_args = GenArgDict( OrderedDict( scale=[3.4], mode=["fan_in", "fan_out", "fan_avg"], distribution=[ "truncated_normal", "random_normal", "random_uniform" ], data_format=["NCHW", "NHWC"], )) for args in kaiming_args: initializers.append(flow.kaiming_initializer(**args)) for args in vs_args: initializers.append(flow.variance_scaling_initializer(**args)) for initializer in initializers: CompareTwoDistribution(test_case, flow.float32, initializer)
def _conv2d(inputs, filters, kernel_size, strides=1, padding="VALID", groups=1, use_bias=False, trainable=True, name=None): if padding != "SAME" and padding != "VALID": if isinstance(padding, list): inputs = flow.pad(inputs, (padding)) padding = "VALID" elif isinstance(padding, tuple): inputs = flow.pad(inputs, padding) padding = "VALID" else: raise ValueError("padding must be SAME, VALID or a list/tuple.") return flow.layers.conv2d( inputs, filters, kernel_size, strides, padding, data_format="NCHW", dilation_rate=1, groups=groups, activation=None, use_bias=use_bias, kernel_initializer=flow.random_normal_initializer(), bias_initializer=flow.zeros_initializer(), kernel_regularizer=None, bias_regularizer=None, trainable=trainable, name=name, weight_name=name + "-weight", bias_name=name + "-bias")
def broadcast_to_compatible_with_fn(x_def: oft.Numpy.Placeholder( x.shape, dtype=flow.float)): x_var = flow.get_variable( "x_var", shape=x.shape, dtype=flow.float, initializer=flow.constant_initializer(0), trainable=True, ) compatible_var = [ flow.get_variable( "compatible_var_{}".format(i), shape=cp_shape, dtype=flow.float, initializer=flow.random_normal_initializer(), trainable=False, ) for i, cp_shape in enumerate(compatible_shape) ] x_var = x_var + x_def y = flow.broadcast_to_compatible_with(x_var, compatible_var) flow.losses.add_loss(y) flow.watch_diff(x_var, dx_watcher) return y
def normal_(self, mean=0, std=1): initializer_conf = flow.random_normal_initializer(mean=mean, stddev=std) return self._init_by_initializer_conf(initializer_conf)
def kaiming_initializer( shape: Sequence[int], distribution: str = "random_normal", mode: str = "fan_in", nonlinearity: str = "leaky_relu", negative_slope: float = 0.0, data_format: str = "NCHW", ) -> None: r"""Initialize weight according to the method described in `Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification` - He, K. et al. (2015), using a normal or uniform distribution. When distribution is "random_normal" The equation is: .. math:: W \sim N(0, \sqrt{\frac{{2}}{{n}}}) When distribution is "random_uniform" The equation is: .. math:: W \sim U(-\sqrt{\frac{{6}}{{n}}}, \sqrt{\frac{{6}}{{n}}}) If mode is "fan_in", the "n" is the number of input units in the weight Blob. If mode is "fan_out", the "n" is the number of output units in the weight Blob. if mode is "fan_avg", the "n" is the average of the number of input and output units in the weight Blob Args: shape (Sequence[int]): Blob shape. distribution (str, optional): 'random_normal' or 'random_uniform'. Defaults to "random_normal". mode (str, optional): 'fan_in', 'fan_out' or 'fan_avg'. Defaults to "fan_in". nonlinearity (str, optional): None, 'tanh', 'sigmoid', 'relu' or 'leaky_relu'. Defaults to "leaky_relu". negative_slope (float, optional): The negative slope of leaky_relu. Defaults to 0.0. data_format (str, optional): 'NCHW', 'NHWC'. Defaults to "NCHW". Raises: NotImplementedError: Only support normal and uniform distribution Returns: [type]: flow.random_normal_initializer or flow.random_uniform_initializer For example: Example 1: .. code-block:: python import oneflow as flow import oneflow.typing as tp def watch_handler(y: tp.Numpy): print("out", y) @flow.global_function() def kaiming_Job() -> None: init = flow.kaiming_initializer(shape=(3, 3), mode="fan_avg", nonlinearity="relu") blob = flow.get_variable( "blob-weight", shape=(3, 3), initializer=init, trainable=True ) flow.watch(blob, watch_handler) checkpoint = flow.train.CheckPoint() checkpoint.init() kaiming_Job() # out [[ 0.54521346 0.32585594 1.3474437 ] # [ 0.30729076 -0.19158769 0.2709008 ] # [-0.95830524 -0.05093324 0.28178614]] Example 2: .. code-block:: python import oneflow as flow import numpy as np import oneflow.typing as tp @flow.global_function() def conv2d_kaiming_Job(x: tp.Numpy.Placeholder((1, 256, 32, 32)) ) -> tp.Numpy: initializer = flow.kaiming_initializer(shape=(1, 256, 32, 32)) conv2d = flow.layers.conv2d( x, filters=128, kernel_size=3, strides=1, padding='SAME', kernel_initializer=initializer, name="Conv2d" ) return conv2d x = np.random.randn(1, 256, 32, 32).astype(np.float32) out = conv2d_kaiming_Job(x) # out.shape (1, 128, 32, 32) """ assert isinstance(shape, tuple) # Kaiming Initialization only deals with FC, Conv and Deconv's weight assert len(shape) >= 2 elem_cnt = functools.reduce(lambda a, b: a * b, shape, 1) assert elem_cnt > 0 assert distribution in ["random_normal", "random_uniform"] assert mode in ["fan_in", "fan_out", "fan_avg"] assert nonlinearity in [None, "tanh", "sigmoid", "relu", "leaky_relu"] assert data_format in ["NCHW", "NHWC"] fan = _CalcFan(shape, mode, _get_data_format(data_format)) gain = _CalcGain(nonlinearity, negative_slope) std = gain / math.sqrt(fan) if distribution == "random_normal": return flow.random_normal_initializer(0.0, std) elif distribution == "random_uniform": bound = math.sqrt(3.0) * std return flow.random_uniform_initializer(-bound, bound) else: raise NotImplementedError( "Only support normal and uniform distribution")
def _get_initializer(): return flow.random_normal_initializer(mean=0.0, stddev=0.01)
def vgg(images, cfg, optimizer, trainable=True, need_transpose=False, training=True, wd=1.0 / 32768, model_weight=True, bn=True): if need_transpose: images = flow.transpose(images, name="transpose", perm=[0, 3, 1, 2]) conv1 = _conv_block(images, 0, cfg, 2, optimizer, model_weight, bn=bn) pool1 = flow.nn.max_pool2d(conv1[-1], 2, 2, "VALID", "NCHW", name="pool1") conv2 = _conv_block(pool1, 2, cfg, 2, optimizer, model_weight, bn=bn) pool2 = flow.nn.max_pool2d(conv2[-1], 2, 2, "VALID", "NCHW", name="pool2") conv3 = _conv_block(pool2, 4, cfg, 3, optimizer, model_weight, bn=bn) pool3 = flow.nn.max_pool2d(conv3[-1], 2, 2, "VALID", "NCHW", name="pool3") conv4 = _conv_block(pool3, 7, cfg, 3, optimizer, model_weight, bn=bn) pool4 = flow.nn.max_pool2d(conv4[-1], 2, 2, "VALID", "NCHW", name="pool4") conv5 = _conv_block(pool4, 10, cfg, 3, optimizer, model_weight, bn=bn) pool5 = flow.nn.max_pool2d(conv5[-1], 2, 2, "VALID", "NCHW", name="pool5") pool5 = flow.reshape(pool5, [pool5.shape[0], -1]) dense0 = flow.layers.dense( inputs=pool5, units=cfg[13], activation=flow.nn.relu, use_bias=True, kernel_initializer=flow.random_normal_initializer(mean=0, stddev=0.1), trainable=trainable, name="dense0", ) dense1 = flow.layers.dense( inputs=dense0, units=cfg[14], activation=flow.nn.relu, use_bias=True, kernel_initializer=flow.random_normal_initializer(mean=0, stddev=0.1), trainable=trainable, name="dense1", ) dense2 = flow.layers.dense( inputs=dense1, units=cfg[15], use_bias=True, kernel_initializer=flow.random_normal_initializer(mean=0, stddev=0.1), trainable=trainable, name="dense2", ) # flow.watch(fc8) def getTypeAndShape(inputs, units): in_shape = inputs.shape in_num_axes = len(in_shape) inputs = (flow.reshape(inputs, (-1, in_shape[-1])) if in_num_axes > 2 else inputs) shape = (units, inputs.shape[1]) dtype = inputs.dtype return shape, dtype if model_weight == True: shape_list = [] dtype_list = [] shape_weight, dtype = getTypeAndShape(pool5, cfg[13]) shape_list.append(shape_weight) dtype_list.append(dtype) shape_weight, dtype = getTypeAndShape(dense0, cfg[14]) shape_list.append(shape_weight) dtype_list.append(dtype) shape_weight, dtype = getTypeAndShape(dense1, cfg[15]) shape_list.append(shape_weight) dtype_list.append(dtype) modelWeight.addDense(dtype_old=dtype_list, shape=shape_list, optimizer=optimizer, dense_num=3) # shape_weight,dtype=getTypeAndShape(pool5,4096) # modelWeight.add('fc1'+'-weight',dtype,shape_weight) # modelWeight.add('fc1'+'-bias',dtype,(4096,)) # shape_weight,dtype=getTypeAndShape(fc6,4096) # modelWeight.add('fc2'+'-weight',dtype,shape_weight) # modelWeight.add('fc2'+'-bias',dtype,(4096,)) # shape_weight,dtype=getTypeAndShape(fc7,1000) # modelWeight.add('fc_final'+'-weight',dtype,shape_weight) # modelWeight.add('fc_final'+'-bias',dtype,(1000,)) return dense2
Copyright 2020 The OneFlow Authors. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import oneflow as flow import oneflow.distribute as distribute_util init = flow.random_normal_initializer(stddev=0.02) def conv2d( input, filters, size, name, strides=1, padding="same", trainable=True, reuse=False, const_init=False, use_bias=True, ): name_ = name if reuse == False else name + "_reuse"