def deconv_cfg(data_format='channels_first', activation=None, scale=1.0): return dict( padding='same', activation=activation, data_format=data_format, kernel_initializer=VarianceScaling(scale=2.0 * scale) )
def conv2dlstm_cfg(data_format='channels_first', scale=1.0): # TODO: check scale factor return dict( padding='same', data_format=data_format, kernel_initializer=VarianceScaling(scale=2.0 * scale) )
def build(self, input_shape): ##[ context:(batch_size,time_step,dim),query:(batch_size,time_step,dim)] # input_shape: [(None, ?, 128), (None, ?, 128)]F init = VarianceScaling(scale=1.0, mode='fan_avg', distribution='uniform') self.W0 = self.add_weight(name='W0', shape=(input_shape[0][-1], 1), initializer=init, regularizer=l2(3e-7), trainable=True) self.W1 = self.add_weight(name='W1', shape=(input_shape[1][-1], 1), initializer=init, regularizer=l2(3e-7), trainable=True) self.W2 = self.add_weight(name='W2', shape=(1, 1, input_shape[0][-1]), initializer=init, regularizer=l2(3e-7), trainable=True) self.bias = self.add_weight(name='linear_bias', shape=([1]), initializer='zero', regularizer=l2(3e-7), trainable=True) super(context2query_attention, self).build(input_shape)
def decoder_block(num_filers, conv1, conv2): up = layers.concatenate([layers.UpSampling2D(size=(2, 2))(conv1), conv2], axis=-1) # to reduce checkerboard effect conv = layers.Conv2D(num_filers, (3, 3), padding='same', kernel_initializer=VarianceScaling())(up) conv = layers.BatchNormalization()(conv) conv = layers.Activation('relu')(conv) conv = layers.Conv2D(num_filers, (3, 3), padding='same', kernel_initializer=VarianceScaling())(conv) conv = layers.BatchNormalization()(conv) conv = layers.Activation('relu')(conv) return conv
def vnet_block(filters, num_conv=3, subsample=False, upsample=False, upsample_mode='conv', skip=True, dropout=0., normalization=None, norm_kwargs=None, init=VarianceScaling(scale=3., mode='fan_avg'), weight_decay=None, nonlinearity='relu', ndim=3, name=None): name = _get_unique_name('vnet_block', name) if norm_kwargs is None: norm_kwargs = {} def f(input): output = input if subsample: output = Convolution(filters=filters, kernel_size=2, strides=2, ndim=ndim, kernel_initializer=init, padding='same', kernel_regularizer=_l2(weight_decay), name=name+"_downconv")(output) for i in range(num_conv): output = norm_nlin_conv(filters, kernel_size=5, normalization=normalization, weight_decay=weight_decay, norm_kwargs=norm_kwargs, init=init, nonlinearity=nonlinearity, ndim=ndim, name=name)(output) if dropout > 0: output = get_dropout(dropout, nonlinearity)(output) if skip: output = _shortcut(input, output, subsample=subsample, upsample=False, upsample_mode=upsample_mode, weight_decay=weight_decay, init=init, ndim=ndim, name=name) if upsample: # "up-convolution" also halves the number of feature maps. if normalization is not None: output = normalization(name=name+"_norm", **norm_kwargs)(output) output = get_nonlinearity(nonlinearity)(output) output = _upsample(output, mode=upsample_mode, ndim=ndim, filters=filters//2, kernel_size=2, kernel_initializer=init, kernel_regularizer=_l2(weight_decay), name=name+"_upconv") output = get_nonlinearity(nonlinearity)(output) return output return f
def assemble_network_structure(self): n_input = self.network_info.n_input width = self.network_info.hidden_layers height = self.network_info.neurons_hidden_layer kernel_regularizer = self.network_info.kernel_regularizer regularization_param = self.network_info.regularization_parameter activation = self.network_info.activation loss = self.network_info.loss_function learning_rate = self.network_info.learning_rate optimizer = self.network_info.optimizer output_activation = self.network_info.output_activation if optimizer == "adam": optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) dropout_value = self.network_info.dropout_value kernel_reg = tf.keras.regularizers.l2(regularization_param) if kernel_regularizer == "L1": kernel_reg = tf.keras.regularizers.l1(regularization_param) seed_random_number(42) model = tf.keras.Sequential() model.add( layers.Dense(height, activation=activation, input_shape=(n_input, ), kernel_regularizer=kernel_reg, kernel_initializer=VarianceScaling( scale=2, distribution="truncated_normal", mode="fan_in"))) for i in range(width): model.add( layers.Dense(height, kernel_regularizer=kernel_reg, activation=activation, kernel_initializer=VarianceScaling( scale=2, distribution="truncated_normal", mode="fan_in"))) model.add(layers.Dropout(dropout_value)) model.add(layers.Dense(1, activation=output_activation)) model.compile(optimizer=optimizer, loss=loss) # kernel_initializer=variance_scaling_initializer(factor=2, mode="FAN_IN", uniform=True), # kernel_initializer=VarianceScaling(scale=2, distribution="uniform",mode="fan_in"), return model
def __init__(self, rank: int, filters: int, depth: int, kernel_size: Union[int, Tuple, List], strides: Union[int, Tuple, List], padding: str, data_format: Optional[AnyStr], dilation_rate: Union[int, Tuple, List], kernel_regularizer: Optional[Union[Dict, AnyStr, Callable]], bias_regularizer: Optional[Union[Dict, AnyStr, Callable]], activity_regularizer: Optional[Union[Dict, AnyStr, Callable]], kernel_constraint: Optional[Union[Dict, AnyStr, Callable]], bias_constraint: Optional[Union[Dict, AnyStr, Callable]], **kwargs): # region Check parameters if rank not in (1, 2, 3): raise ValueError( "Rank must either be 1, 2 or 3. Received {}.".format(rank)) if depth <= 0: raise ValueError( "Depth must be strictly positive. Received {}.".format(depth)) # endregion super(ResBasicBlockND, self).__init__(**kwargs) self.rank = rank self.filters = filters self.depth = depth self.kernel_size = conv_utils.normalize_tuple(kernel_size, rank, "kernel_size") self.strides = conv_utils.normalize_tuple(strides, rank, "strides") self.padding = normalize_padding(padding) self.data_format = conv_utils.normalize_data_format(data_format) self.dilation_rate = conv_utils.normalize_tuple( dilation_rate, rank, "dilation_rate") self.kernel_initializer = VarianceScaling(mode="fan_in") self.kernel_regularizer = regularizers.get(kernel_regularizer) self.bias_regularizer = regularizers.get(bias_regularizer) self.activity_regularizer = regularizers.get(activity_regularizer) self.kernel_constraint = constraints.get(kernel_constraint) self.bias_constraint = constraints.get(bias_constraint) self.conv_layers: List[Layer] = [] self.projection_layer: Optional[Layer] = None self.residual_multiplier = None self.input_spec = InputSpec(ndim=self.rank + 2) self.init_layers()
def new_vgg16(): model = VGG16(include_top=False, input_shape=(224, 224, 3), weights='imagenet') # Fine-tuning: freeze some layers for layer in model.layers[0:-8]: layer.trainable = False # rebuild the model flat = layers.Flatten(name='flatten')(model.layers[-1].output) fc_1 = layers.Dense(128, name='fc_1', kernel_initializer=VarianceScaling(), kernel_regularizer=regularizers.l2(0.01))(flat) bn_1 = layers.BatchNormalization()( fc_1) # normalize the inputs of nonlinear layer(activation layer) act_1 = layers.Activation('relu')(bn_1) d_1 = layers.Dropout(0.5, name='drop1')(act_1) # fc_2 = layers.Dense(128, name='fc_2', # # kernel_initializer=TruncatedNormal(), # kernel_regularizer=regularizers.l2(0.01))(act_1) # bn_2 = layers.BatchNormalization()(fc_2) # act_2 = layers.Activation('relu')(bn_2) fc_3 = layers.Dense(2, name='fc_3', kernel_initializer=VarianceScaling(), kernel_regularizer=regularizers.l2(0.01))(d_1) # prediction = Activation("softmax", name="softmax")(bn_3) prediction = layers.Activation("sigmoid", name="sigmoid")( fc_3) # for binary classification model = tf.keras.Model(inputs=model.inputs, outputs=prediction) return model
def init_projection_layer(self): conv_layer_type = self.get_conv_layer_type() projection_kernel_size = conv_utils.normalize_tuple(1, self.rank, "projection_kernel_size") projection_kernel_initializer = VarianceScaling() self.projection_layer = conv_layer_type(filters=self.filters, kernel_size=projection_kernel_size, strides=self.strides, padding="same", data_format=self.data_format, dilation_rate=self.dilation_rate, use_bias=False, kernel_initializer=projection_kernel_initializer, kernel_regularizer=self.kernel_regularizer, activity_regularizer=self.activity_regularizer, kernel_constraint=self.kernel_constraint, bias_constraint=self.bias_constraint)
def default_weight_initializer(actf='linear', distribution='uniform', mode='fan_in', scale=None): inz = [] for i, af in enumerate(to_list(actf)): if distribution in ('uniform', 'normal'): tp = VarianceScaling(scale=eval_default_scale_factor(af, i) if scale is None else scale, mode=mode, distribution=distribution) elif distribution in ('constant', ): tp = default_constant_initializer(0.0 if scale is None else scale) else: raise ValueError( 'Undefined distribution: pick from ("uniform", "normal", "constant").' ) inz.append(tp) return inz
def build(self, input_shape): init_relu = VarianceScaling(scale=2.0, mode='fan_in', distribution='normal') self.depthwise_w = self.add_weight("depthwise_filter", shape=(self.kernel_size, 1, input_shape[-1], 1), initializer=init_relu, regularizer=l2(3e-7), trainable=True) self.pointwise_w = self.add_weight( "pointwise_filter", (1, 1, input_shape[-1], self.filter), initializer=init_relu, regularizer=l2(3e-7), trainable=True) self.bias = self.add_weight("bias", input_shape[-1], regularizer=l2(3e-7), initializer=tf.zeros_initializer()) super(DepthwiseConv1D, self).build(input_shape)
def res_block(self, inp, filters, kernel_size=3, padding="same", **kwargs): """ Residual block """ logger.debug("inp: %s, filters: %s, kernel_size: %s, kwargs: %s)", inp, filters, kernel_size, kwargs) name = self.get_name("residual_{}".format(inp.shape[1])) var_x = LeakyReLU(alpha=0.2, name="{}_leakyrelu_0".format(name))(inp) if self.use_reflect_padding: var_x = ReflectionPadding2D( stride=1, kernel_size=kernel_size, name="{}_reflectionpadding2d_0".format(name))(var_x) padding = "valid" var_x = self.conv2d(var_x, filters, kernel_size=kernel_size, padding=padding, name="{}_conv2d_0".format(name), **kwargs) var_x = LeakyReLU(alpha=0.2, name="{}_leakyrelu_1".format(name))(var_x) if self.use_reflect_padding: var_x = ReflectionPadding2D( stride=1, kernel_size=kernel_size, name="{}_reflectionpadding2d_1".format(name))(var_x) padding = "valid" if not self.use_convaware_init: original_init = self.switch_kernel_initializer( kwargs, VarianceScaling(scale=0.2, mode="fan_in", distribution="uniform")) var_x = self.conv2d(var_x, filters, kernel_size=kernel_size, padding=padding, **kwargs) if not self.use_convaware_init: self.switch_kernel_initializer(kwargs, original_init) var_x = Add()([var_x, inp]) var_x = LeakyReLU(alpha=0.2, name="{}_leakyrelu_3".format(name))(var_x) return var_x
def init_layers(self, input_shape): conv_layer_type = self.get_conv_layer_type() for i in range(self.depth): strides = self.strides if (i == 0) else 1 kernel_initializer = self.kernel_initializer if (i == 0) else tf.zeros_initializer conv_layer = conv_layer_type(filters=self.filters, kernel_size=self.kernel_size, strides=strides, padding="same", data_format=self.data_format, dilation_rate=self.dilation_rate, use_bias=False, kernel_initializer=kernel_initializer, kernel_regularizer=self.kernel_regularizer, activity_regularizer=self.activity_regularizer, kernel_constraint=self.kernel_constraint, bias_constraint=self.bias_constraint) self.conv_layers.append(conv_layer) if self.use_projection(input_shape): projection_kernel_size = conv_utils.normalize_tuple(1, self.rank, "projection_kernel_size") projection_kernel_initializer = VarianceScaling() self.projection_layer = conv_layer_type(filters=self.filters, kernel_size=projection_kernel_size, strides=self.strides, padding="same", data_format=self.data_format, dilation_rate=self.dilation_rate, use_bias=False, kernel_initializer=projection_kernel_initializer, kernel_regularizer=self.kernel_regularizer, activity_regularizer=self.activity_regularizer, kernel_constraint=self.kernel_constraint, bias_constraint=self.bias_constraint) self._layers = copy(self.conv_layers) if self.projection_layer is not None: self._layers.append(self.projection_layer)
from layers.multihead_attention import Attention as MultiHeadAttention from layers.position_embedding import Position_Embedding as PositionEmbedding from layers.layer_norm import LayerNormalization from layers.layer_dropout import LayerDropout from layers.QAoutputBlock import QAoutputBlock from layers.BatchSlice import BatchSlice from layers.DepthwiseConv1D import DepthwiseConv1D from layers.LabelPadding import LabelPadding from tensorflow.python.keras.initializers import VarianceScaling import tensorflow as tf # import keras.backend as K from tensorflow.python.keras.layers import Layer from tensorflow.python.keras import backend as K regularizer = l2(3e-7) init = VarianceScaling(scale=1.0, mode='fan_avg', distribution='uniform') init_relu = VarianceScaling(scale=2.0, mode='fan_in', distribution='normal') def mask_logits(inputs, mask, mask_value=-1e30): mask = tf.cast(mask, tf.float32) return inputs + mask_value * (1 - mask) def highway(highway_layers, x, num_layers=2, dropout=0.0): # reduce dim x = highway_layers[0](x) for i in range(num_layers): T = highway_layers[i * 2 + 1](x) H = highway_layers[i * 2 + 2](x) H = Dropout(dropout)(H)
from tensorflow.python.keras.layers import Conv2D, Dense, Dropout, Flatten from tensorflow.python.keras.models import Sequential import gym import gym_2048 import matplotlib.pyplot as plt def create_env(): '''Returns a custom environment for the agent''' env = gym.make('2048-4x4-v0') env = ClipReward(env) return OneChannel(env) dummy_env = create_env() initializer = VarianceScaling() model = Sequential([ Conv2D(8, 4, activation='elu', padding='same', input_shape=dummy_env.observation_space.shape, kernel_initializer=initializer), Conv2D(16, 2, activation='elu', padding='valid', input_shape=dummy_env.observation_space.shape, kernel_initializer=initializer), Flatten(), Dropout(0.5), Dense(512, activation='elu', kernel_initializer=initializer) ]) # Exploration and learning rate decay after each epoch eps = 0.2 eps_decay = 0.9
def main(): tf.random.set_seed(42) block_count = 4 basic_block_count = 8 input_shape = (32, 32, 3) layers_params = { "rank": 2, "head_size": 8, "head_count": 8, "basic_block_count": basic_block_count, "kernel_size": 3, "strides": 1, "dilation_rate": 1, "activation": "relu", "kernel_regularizer": None, "bias_regularizer": None, "activity_regularizer": None, "kernel_constraint": None, "bias_constraint": None, } layers = [ ResBlock2D(filters=16, basic_block_count=basic_block_count, kernel_size=7, input_shape=input_shape), MaxPooling2D(4) ] for i in range(1, block_count): layer = ResSASABlock(**layers_params) layers.append(layer) layers_params["head_size"] *= 2 layers.append(MaxPooling2D(2)) layers.append(Flatten()) layers.append( Dense(units=10, activation="softmax", kernel_initializer=VarianceScaling())) model = Sequential(layers=layers, name="StandAloneSelfAttentionBasedClassifier") model.summary() model.compile("adam", loss="categorical_crossentropy", metrics=["acc"]) # region Data (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train = x_train.astype(np.float32) / 255.0 x_test = x_test.astype(np.float32) / 255.0 y_train = to_categorical(y_train, num_classes=10) y_test = to_categorical(y_test, num_classes=10) generator = ImageDataGenerator(rotation_range=15, width_shift_range=5. / 32, height_shift_range=5. / 32, horizontal_flip=True) generator.fit(x_train) # endregion log_dir = "../../logs/tests/stand_alone_self_attention_cifar10/{}".format( int(time())) log_dir = os.path.normpath(log_dir) tensorboard = TensorBoard(log_dir=log_dir, profile_batch="500,520") model.fit(generator.flow(x_train, y_train, batch_size=64), steps_per_epoch=100, epochs=300, validation_data=(x_test, y_test), validation_steps=100, verbose=1, callbacks=[tensorboard])
def dense_cfg(activation=None, scale=1.0): return dict(activation=activation, kernel_initializer=VarianceScaling(scale=2.0 * scale))
def GetWeights(gain=math.sqrt(2)): return VarianceScaling(gain)
def DeepFMmmoe(linear_feature_columns, dnn_feature_columns, embedding_size=8, use_fm=True, dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary', task_net_size=(128, )): """Instantiates the DeepFM Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param embedding_size: positive integer,sparse feature embedding_size :param use_fm: bool,use FM part or not :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns(features,dnn_feature_columns, embedding_size, l2_reg_embedding,init_std, seed) # linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, # seed=seed, prefix='linear') # # fm_input = concat_fun(sparse_embedding_list, axis=1) # fm_logit = FM()(fm_input) dnn_input = combined_dnn_input(sparse_embedding_list,dense_value_list) #dnn_logit = tf.keras.layers.Dense( # 1, use_bias=False, activation=None)(dnn_out) mmoe_layers = MMoE(units=32, num_experts=4, num_tasks=2)(dnn_input) output_layers = [] target=['finish', 'like'] # Build tower layer from MMoE layer for index, task_layer in enumerate(mmoe_layers): tower_layer = Dense( units=128, activation='relu', kernel_initializer=VarianceScaling())(task_layer) output_layer = Dense( units=1, name=target[index], activation='sigmoid', kernel_initializer=VarianceScaling())(tower_layer) output_layers.append(output_layer) # finish_logit = tf.keras.layers.add( # [linear_logit, output_layers[0], fm_logit]) # like_logit = tf.keras.layers.add( # [linear_logit, output_layers[1], fm_logit]) # # output_finish = PredictionLayer(task, name='finish_')(finish_logit) # output_like = PredictionLayer(task, name='like_')(like_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=output_layers)#[output_finish, output_like]) return model
def get_fixup_initializer(model_depth: int) -> VarianceScaling: return VarianceScaling(scale=1 / np.sqrt(model_depth), mode="fan_in", distribution="normal")
def assemble_vnet(input_shape, num_classes, init_num_filters=32, num_pooling=4, short_skip=True, long_skip=True, long_skip_merge_mode='concat', upsample_mode='conv', dropout=0., normalization=None, norm_kwargs=None, init=VarianceScaling(scale=3., mode='fan_avg'), weight_decay=None, nonlinearity='prelu', ndim=3, verbose=True): """ input_shape : A tuple specifiying the image input shape. num_classes : The number of classes in the segmentation output. init_num_filters : The number of filters in the first pair and last pair of convolutions in the network. With every downsampling, the number of filters is doubled; with every upsampling, it is halved. num_pooling : The number of pooling (and thus upsampling) operations to perform in the network. short_skip : A boolean specifying whether to use ResNet-like shortcut connections from the input of each block to its output. The inputs are summed with the outputs. long_skip : A boolean specifying whether to use long skip connections from the downward path to the upward path. These can either concatenate or sum features across. long_skip_merge_mode : Either or 'sum', 'concat' features across skip. upsample_mode : Either 'repeat' or 'conv'. With 'repeat', rows and colums are repeated as in nearest neighbour interpolation. With 'conv', upscaling is done via transposed convolution. dropout : A float in [0, 1.], specifying dropout probability. normalization : The normalization to apply to layers (none by default). norm_kwargs : Keyword arguments to pass to normalization layers. If using BatchNormalization, kwargs are autoset with a momentum of 0.9. init : A string specifying (or a function defining) the initializer for layers. weight_decay : The weight decay (L2 penalty) used in every convolution (float). nonlinearity : The nonlinearity to use, passed as a string or a function. ndim : The spatial dimensionality of the input and output (either 2 or 3). verbose : A boolean specifying whether to print messages about model structure during construction (if True). """ ''' Determine channel axis. ''' channel_axis = get_channel_axis(ndim) ''' ndim must be only 2 or 3. ''' if ndim not in [2, 3]: raise ValueError("ndim must be either 2 or 3") ''' If BatchNormalization is used and norm_kwargs is not set, set default kwargs. ''' if norm_kwargs is None: if normalization == BatchNormalization: norm_kwargs = { 'momentum': 0.9, 'scale': True, 'center': True, 'axis': channel_axis(ndim) } else: norm_kwargs = {} ''' Constant kwargs passed to the init and main blocks. ''' block_kwargs = { 'skip': short_skip, 'weight_decay': weight_decay, 'normalization': normalization, 'norm_kwargs': norm_kwargs, 'init': init, 'nonlinearity': nonlinearity, 'upsample_mode': upsample_mode, 'dropout': dropout, 'ndim': ndim } ''' No sub/up-sampling at beginning, end. ''' kwargs = {'num_conv': 1} kwargs.update(block_kwargs) preprocessor = vnet_block(filters=init_num_filters, **kwargs) postprocessor = vnet_block(filters=init_num_filters, **kwargs) ''' Assemble all necessary blocks. ''' blocks_down = [] blocks_across = [] blocks_up = [] for i in range(1, num_pooling): kwargs = {'filters': init_num_filters * (2**i)} if i == 1: kwargs['num_conv'] = 2 else: kwargs['num_conv'] = 3 kwargs.update(block_kwargs) blocks_down.append((vnet_block, kwargs)) kwargs = {'filters': init_num_filters * (2**num_pooling), 'num_conv': 3} kwargs.update(block_kwargs) blocks_across.append((vnet_block, kwargs)) for i in range(num_pooling - 1, 0, -1): kwargs = {'filters': init_num_filters * (2**i)} if i == 1: kwargs['num_conv'] = 2 else: kwargs['num_conv'] = 3 kwargs.update(block_kwargs) blocks_up.append((vnet_block, kwargs)) blocks = blocks_down + blocks_across + blocks_up ''' Assemble model. ''' model = assemble_model(input_shape=input_shape, num_classes=num_classes, blocks=blocks, preprocessor=preprocessor, postprocessor=postprocessor, long_skip=long_skip, long_skip_merge_mode=long_skip_merge_mode, ndim=ndim, verbose=verbose) return model