def google_net(size=256, kernel=3): model = Sequential() model.add(Conv2D(32, (kernel, kernel), activation='relu', input_shape=(size, size, 3), strides=2, kernel_regularizer=regularizers.l2(0.01), name='cv1')) model.add(BatchNormalization()) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(64, (kernel, kernel), activation='relu', strides=2, kernel_regularizer=regularizers.l2(0.01), name='cv2')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(128, (kernel, kernel), activation='relu', strides=2, name='cv3.3')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(256, kernel_regularizer=regularizers.l2(0.01), name='features')) model.add(Activation('relu')) model.add(Dense(3, activation='softmax', name='denseout')) print(model.summary()) model.compile( loss='categorical_crossentropy', optimizer=RMSprop(lr=1e-4, decay=0.1e-6), metrics=['accuracy']) return model
def identity_block(input_tensor, kernel_size, filters, stage, block): """The identity block is the block that has no conv layer at shortcut. # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names # Returns Output tensor for the block. """ filters1, filters2, filters3 = filters if backend.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = layers.Conv2D(filters1, (1, 1), use_bias=False, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2a')(input_tensor) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2a')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(filters2, kernel_size, padding='same', use_bias=False, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2b')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2b')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(filters3, (1, 1), use_bias=False, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2c')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2c')(x) x = layers.add([x, input_tensor]) x = layers.Activation('relu')(x) return x
def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_decay=1e-4): ''' Apply BatchNorm, Relu, 3x3 Conv2D, optional bottleneck block and dropout Args: ip: Input keras tensor nb_filter: number of filters bottleneck: add bottleneck block dropout_rate: dropout rate weight_decay: weight decay factor Returns: keras tensor with batch_norm, relu and convolution2d added (optional bottleneck) ''' concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(ip) x = Activation('relu')(x) if bottleneck: inter_channel = nb_filter * 4 # Obtained from https://github.com/liuzhuang13/DenseNet/blob/master/densenet.lua x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) x = Activation('relu')(x) x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_normal', padding='same', use_bias=False)(x) if dropout_rate: x = Dropout(dropout_rate)(x) return x
def google_vgg16_finetune(classes=3, size=256): input_layer = Input(shape=(size, size, 3), name='image_input') base_model = VGG16(weights='imagenet', include_top=False, input_tensor=input_layer) x = Conv2D(name='squeeze', filters=256, kernel_size=(1, 1))(base_model.output) # squeeze channels x = Flatten(name='avgpool')(x) x = Dense(256, name='features', kernel_regularizer=regularizers.l2(0.01))(x) x = Activation('relu')(x) x = Dense(classes, activation='softmax', name='out')(x) model = Model(inputs=base_model.input, outputs=x) for layer in model.layers: if layer.name in ['block5_conv1', 'block5_conv2', 'block5_conv3', 'features', 'out']: layer.trainable = True else: layer.trainable = False print(model.summary()) model.compile( loss='categorical_crossentropy', optimizer=tf.keras.optimizers.RMSprop(lr=1e-4), metrics=['accuracy']) return model
def create_embedding_dict(feature_dim_dict, embedding_size, init_std, seed, l2_reg, prefix='sparse'): if embedding_size == 'auto': sparse_embedding = {feat: Embedding(feature_dim_dict["sparse"][feat], 6 * int(pow(feature_dim_dict["sparse"][feat], 0.25)), embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg), name=prefix+'_emb_' + str(i) + '-' + feat) for i, feat in enumerate(feature_dim_dict["sparse"])} else: sparse_embedding = {feat: Embedding(feature_dim_dict["sparse"][feat], embedding_size, embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg), name=prefix+'_emb_' + str(i) + '-' + feat) for i, feat in enumerate(feature_dim_dict["sparse"])} if 'sequence' in feature_dim_dict: count = len(sparse_embedding) sequence_dim_list = feature_dim_dict['sequence'] for feat in sequence_dim_list: if feat.name not in sparse_embedding: if embedding_size == "auto": sparse_embedding[feat.name] = Embedding(feat.dimension, 6 * int(pow(feat.dimension, 0.25)), embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2( l2_reg), name=prefix + '_emb_' + str(count) + '-' + feat.name) else: sparse_embedding[feat.name] = Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2( l2_reg), name=prefix+'_emb_' + str(count) + '-' + feat.name) count += 1 return sparse_embedding
def __init__(self, game: GridGame): super().__init__(game) # game params self.board_height = game.board_height self.board_width = game.board_width example_board = game.create_board() self.action_size = len(game.get_valid_moves(example_board)) self.epochs_completed = 0 self.epochs_to_train = 100 args = Namespace(lr=0.001, dropout=0.3, epochs=10, batch_size=64, num_channels=512) self.checkpoint_name = 'random weights' self.args = args num_channels = 512 kernel_size = [3, 3] dropout = 0.3 model = Sequential() # regularizer = regularizers.l2(0.00006) regularizer = regularizers.l2(0.0001) model.add(Conv2D(num_channels, kernel_size, padding='same', activation='relu', input_shape=(self.board_height, self.board_width, 1), activity_regularizer=regularizer)) model.add(Conv2D(num_channels, kernel_size, padding='same', activation='relu', activity_regularizer=regularizer)) model.add(Conv2D(num_channels, kernel_size, activation='relu', activity_regularizer=regularizer)) model.add(Conv2D(num_channels, kernel_size, activation='relu', activity_regularizer=regularizer)) model.add(Dropout(dropout)) model.add(Dropout(dropout)) model.add(Flatten()) model.add(Dense(self.action_size + 1)) model.compile('adam', 'mean_squared_error') self.model = model
def __transition_block(ip, nb_filter, compression=1.0, weight_decay=1e-4): ''' Apply BatchNorm, Relu 1x1, Conv2D, optional compression, dropout and Maxpooling2D Args: ip: keras tensor nb_filter: number of filters compression: calculated as 1 - reduction. Reduces the number of feature maps in the transition block. dropout_rate: dropout rate weight_decay: weight decay factor Returns: keras tensor, after applying batch_norm, relu-conv, dropout, maxpool ''' concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(ip) x = Activation('relu')(x) x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, kernel_regularizer=l2(weight_decay))(x) x = AveragePooling2D((2, 2), strides=(2, 2))(x) return x
def merge_dense_input(dense_input_, embed_list, embedding_size, l2_reg): dense_input = list(dense_input_.values()) if len(dense_input) > 0: if embedding_size == "auto": if len(dense_input) == 1: continuous_embedding_list = dense_input[0] else: continuous_embedding_list = Concatenate()(dense_input) continuous_embedding_list = Reshape( [1, len(dense_input)])(continuous_embedding_list) embed_list.append(continuous_embedding_list) else: continuous_embedding_list = list( map(Dense(embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg), ), dense_input)) continuous_embedding_list = list( map(Reshape((1, embedding_size)), continuous_embedding_list)) embed_list += continuous_embedding_list return embed_list
def __transition_up_block(ip, nb_filters, type='deconv', weight_decay=1E-4): ''' SubpixelConvolutional Upscaling (factor = 2) Args: ip: keras tensor nb_filters: number of layers type: can be 'upsampling', 'subpixel', 'deconv'. Determines type of upsampling performed weight_decay: weight decay factor Returns: keras tensor, after applying upsampling operation. ''' if type == 'upsampling': x = UpSampling2D()(ip) elif type == 'subpixel': x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(weight_decay), use_bias=False, kernel_initializer='he_normal')(ip) x = SubPixelUpscaling(scale_factor=2)(x) x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(weight_decay), use_bias=False, kernel_initializer='he_normal')(x) else: x = Conv2DTranspose(nb_filters, (3, 3), activation='relu', padding='same', strides=(2, 2), kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(ip) return x
def build_model(self): # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper. model = Sequential() weight_decay = self.weight_decay model.add( Conv2D(64, (3, 3), padding='same', input_shape=self.x_shape, kernel_regularizer=regularizers.l2(weight_decay), trainable=self.mode)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(Dropout(0.3)) model.add( Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), trainable=self.mode)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(MaxPooling2D(pool_size=(2, 2))) model.add( Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), trainable=self.mode)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add( Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), trainable=self.mode)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(MaxPooling2D(pool_size=(2, 2))) model.add( Conv2D(256, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), trainable=self.mode)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add( Conv2D(256, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), trainable=self.mode)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add( Conv2D(256, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), trainable=self.mode)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(MaxPooling2D(pool_size=(2, 2))) model.add( Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), trainable=self.mode)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add( Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), trainable=self.mode)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add( Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), trainable=self.mode)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(MaxPooling2D(pool_size=(2, 2))) model.add( Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), trainable=self.mode)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add( Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), trainable=self.mode)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(Dropout(0.4)) model.add( Conv2D(512, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), name='vgg', trainable=self.mode)) model.add(Activation('relu')) model.add(BatchNormalization()) if self.include_top: model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.5)) model.add(Flatten()) model.add( Dense(512, kernel_regularizer=regularizers.l2(weight_decay), trainable=self.mode)) model.add(Activation('relu')) model.add(BatchNormalization()) model.add(Dropout(0.5)) model.add(Dense(self.num_classes, trainable=self.mode)) model.add(Activation('softmax')) # model.add(MaxPooling2D(pool_size=(2, 2))) # model.add(Dropout(0.5)) # # model.add(Flatten()) # model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay),trainable=self.mode)) # model.add(Activation('relu')) # model.add(BatchNormalization()) # # model.add(Dropout(0.5)) # model.add(Dense(self.num_classes,trainable=self.mode)) # model.add(Activation('softmax')) return model
def VGG16_Places365(include_top=True, weights='places', input_tensor=None, input_shape=None, pooling=None, classes=365): """Instantiates the VGG16-places365 architecture. Optionally loads weights pre-trained on Places. Note that when using TensorFlow, for best performance you should set `image_data_format="channels_last"` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. # Arguments include_top: whether to include the 3 fully-connected layers at the top of the network. weights: one of `None` (random initialization), 'places' (pre-training on Places), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 244)` (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 48. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape """ if not (weights in {'places', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `places` ' '(pre-training on Places), ' 'or the path to the weights file to be loaded.') if weights == 'places' and include_top and classes != 365: raise ValueError('If using `weights` as places with `include_top`' ' as true, `classes` should be 365') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=48, data_format=K.image_data_format(), require_flatten=include_top) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor # Block 1 x = Conv2D(filters=64, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block1_conv1')(img_input) x = Conv2D(filters=64, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block1_conv2')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name="block1_pool", padding='valid')(x) # Block 2 x = Conv2D(filters=128, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block2_conv1')(x) x = Conv2D(filters=128, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block2_conv2')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name="block2_pool", padding='valid')(x) # Block 3 x = Conv2D(filters=256, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block3_conv1')(x) x = Conv2D(filters=256, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block3_conv2')(x) x = Conv2D(filters=256, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block3_conv3')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name="block3_pool", padding='valid')(x) # Block 4 x = Conv2D(filters=512, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block4_conv1')(x) x = Conv2D(filters=512, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block4_conv2')(x) x = Conv2D(filters=512, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block4_conv3')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name="block4_pool", padding='valid')(x) # Block 5 x = Conv2D(filters=512, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block5_conv1')(x) x = Conv2D(filters=512, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block5_conv2')(x) x = Conv2D(filters=512, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block5_conv3')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name="block5_pool", padding='valid')(x) if include_top: # Classification block x = Flatten(name='flatten')(x) x = Dense(4096, activation='relu', name='fc1')(x) x = Dropout(0.5, name='drop_fc1')(x) x = Dense(4096, activation='relu', name='fc2')(x) x = Dropout(0.5, name='drop_fc2')(x) x = Dense(365, activation='softmax', name="predictions")(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='vgg16-places365') # load weights if weights == 'places': if include_top: weights_path = get_file( 'vgg16-places365_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models') else: weights_path = get_file( 'vgg16-places365_weights_tf_dim_ordering_tf_kernels_notop.h5', WEIGHTS_PATH_NO_TOP, cache_subdir='models') model.load_weights(weights_path) if K.backend() == 'theano': layer_utils.convert_all_kernels_in_model(model) if K.image_data_format() == 'channels_first': if include_top: maxpool = model.get_layer(name='block5_pool') shape = maxpool.output_shape[1:] dense = model.get_layer(name='fc1') layer_utils.convert_dense_weights_data_format( dense, shape, 'channels_first') if K.backend() == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' 'image data format convention ' '(`image_data_format="channels_first"`). ' 'For best performance, set ' '`image_data_format="channels_last"` in ' 'your Keras config ' 'at ~/.keras/keras.json.') elif weights is not None: model.load_weights(weights) return model
def caltech_model3(n_classes: int, input_shape=None, input_tensor=None, weights_path: Union[None, str] = None) -> Sequential: """ Defines a caltech network. :param n_classes: the number of classes. We use this parameter even though we know its value, in order to be able to use the model in order to predict some of the classes. :param input_shape: the input shape of the network. Can be omitted if input_tensor is used. :param input_tensor: the input tensor of the network. Can be omitted if input_shape is used. :param weights_path: a path to a trained custom network's weights. :return: Keras Sequential Model. """ inputs = create_inputs(input_shape, input_tensor) # Define a weight decay for the regularisation. weight_decay = 1e-3 x = Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=input_shape, kernel_regularizer=l2(weight_decay))(inputs) x = BatchNormalization()(x) x = Dropout(0.3)(x) x = Conv2D(64, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization()(x) x = MaxPooling2D(pool_size=(2, 2))(x) x = Conv2D(64, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization()(x) x = Dropout(0.4)(x) x = Conv2D(128, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization()(x) x = MaxPooling2D(pool_size=(2, 2))(x) x = Conv2D(256, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization()(x) x = Dropout(0.4)(x) x = Conv2D(256, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization()(x) x = Dropout(0.5)(x) x = Conv2D(256, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization()(x) x = MaxPooling2D(pool_size=(2, 2))(x) x = Conv2D(512, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization()(x) x = Dropout(0.4)(x) x = Conv2D(512, (3, 3), padding='same', activation='relu', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization()(x) x = Dropout(0.4)(x) x = Flatten()(x) x = Dense(1024, kernel_regularizer=l2(weight_decay))(x) x = Dense(256, kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization()(x) x = Dropout(0.5)(x) outputs = Dense(n_classes, activation='softmax', name='softmax_outputs')(x) # Create model. model = Model(inputs, outputs, name='caltech_model3') # Load weights, if they exist. load_weights(weights_path, model) return model
def _classification_sub_net(num_classes, num_anchor=9): """Creates an object classification sub-network for the RetinaNet. Args: num_classes (int): number of classes. num_anchor (int, optional): number of anchor boxes. Defaults to 9. Returns: 'Model' object: classification sub-network. """ model = models.Sequential() model.add( layers.Conv2D( 256, kernel_size=3, strides=1, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0001), kernel_initializer=tf.random_normal_initializer(stddev=0.01))) model.add( layers.Conv2D( 256, kernel_size=3, strides=1, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0001), kernel_initializer=tf.random_normal_initializer(stddev=0.01))) model.add( layers.Conv2D( 256, kernel_size=3, strides=1, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0001), kernel_initializer=tf.random_normal_initializer(stddev=0.01))) model.add( layers.Conv2D( 256, kernel_size=3, strides=1, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0001), kernel_initializer=tf.random_normal_initializer(stddev=0.01))) model.add( layers.Conv2D( num_classes * num_anchor, kernel_size=3, strides=1, padding='same', activation='sigmoid', kernel_regularizer=regularizers.l2(0.0001), kernel_initializer=tf.random_normal_initializer(stddev=0.01), bias_initializer=tf.initializers.constant(np.log(1 / 99)))) model.add(layers.Reshape( (-1, num_classes))) # the output dimension is [batch, #anchor, #classes] return model
def _gen_l2_regularizer(use_l2_regularizer=True): return regularizers.l2(L2_WEIGHT_DECAY) if use_l2_regularizer else None
conv2_4 = standard_unit(up2_4, stage='24', num_filter=num_filter[1]) conv2_4 = Cropping2D(crop[3])(conv2_4) up1_5 = Conv2DTranspose(num_filter[0], (3, 3), strides=(1, 1), name='up15', padding='same')(conv2_4) conv1_5 = standard_unit(up1_5, stage='15', num_filter=num_filter[0]) conv1_5 = Cropping2D(crop[4])(conv1_5) x = standard_unit(conv1_5, stage='_out', num_filter=8) outputs = Conv2D(1, (1, 1), name='main_output', kernel_initializer='he_normal', padding='same', kernel_regularizer=l2(3e-4))(x) model = Model(inputs=inputs, outputs=outputs, name='FCN') model.summary() # training # https://github.com/keras-team/keras/issues/10842 # https://stackoverflow.com/questions/52932406/is-the-class-generator-inheriting-sequence-thread-safe-in-keras-tensorflow # use_multiprocessing not working on Windows if gpus <= 1: # 1 gpu model.compile(optimizer=Adam(lr=1e-3), loss='mean_squared_error', metrics=[r_squared]) original_weights = keras.backend.batch_get_value(model.weights) model.fit_generator(generator=training_generator, validation_data=validation_generator,
def ResNet50(num_classes): """Instantiates the ResNet50 architecture. Args: num_classes: `int` number of classes for image classification. Returns: A Keras model instance. """ # Determine proper input shape if backend.image_data_format() == 'channels_first': input_shape = (3, 224, 224) bn_axis = 1 else: input_shape = (224, 224, 3) bn_axis = 3 img_input = layers.Input(shape=input_shape) x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input) x = layers.Conv2D(64, (7, 7), strides=(2, 2), padding='valid', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='conv1')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name='bn_conv1')(x) x = layers.Activation('relu')(x) x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x) x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') x = layers.GlobalAveragePooling2D(name='avg_pool')(x) x = layers.Dense(num_classes, activation='softmax', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='fc1000')(x) # Create model. return models.Model(img_input, x, name='resnet50')
def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=-1, nb_layers_per_block=-1, bottleneck=False, reduction=0.0, dropout_rate=None, weight_decay=1e-4, subsample_initial_block=False, activation='softmax'): ''' Build the DenseNet model Args: nb_classes: number of classes img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels) include_top: flag to include the final Dense layer depth: number or layers nb_dense_block: number of dense blocks to add to end (generally = 3) growth_rate: number of filters to add per dense block nb_filter: initial number of filters. Default -1 indicates initial number of filters is 2 * growth_rate nb_layers_per_block: number of layers in each dense block. Can be a -1, positive integer or a list. If -1, calculates nb_layer_per_block from the depth of the network. If positive integer, a set number of layers per dense block. If list, nb_layer is used as provided. Note that list size must be (nb_dense_block + 1) bottleneck: add bottleneck blocks reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression dropout_rate: dropout rate weight_decay: weight decay rate subsample_initial_block: Set to True to subsample the initial convolution and add a MaxPool2D before the dense blocks are added. subsample_initial: activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. Note that if sigmoid is used, classes must be 1. Returns: keras tensor with nb_layers of conv_block appended ''' concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 if reduction != 0.0: assert reduction <= 1.0 and reduction > 0.0, 'reduction value must lie between 0.0 and 1.0' # layers in each dense block if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: nb_layers = list(nb_layers_per_block) # Convert tuple to list assert len(nb_layers) == (nb_dense_block), 'If list, nb_layer is used as provided. ' \ 'Note that list size must be (nb_dense_block)' final_nb_layer = nb_layers[-1] nb_layers = nb_layers[:-1] else: if nb_layers_per_block == -1: assert (depth - 4) % 3 == 0, 'Depth must be 3 N + 4 if nb_layers_per_block == -1' count = int((depth - 4) / 3) if bottleneck: count = count // 2 nb_layers = [count for _ in range(nb_dense_block)] final_nb_layer = count else: final_nb_layer = nb_layers_per_block nb_layers = [nb_layers_per_block] * nb_dense_block # compute initial nb_filter if -1, else accept users initial nb_filter if nb_filter <= 0: nb_filter = 2 * growth_rate # compute compression factor compression = 1.0 - reduction # Initial convolution if subsample_initial_block: initial_kernel = (7, 7) initial_strides = (2, 2) else: initial_kernel = (3, 3) initial_strides = (1, 1) x = Conv2D(nb_filter, initial_kernel, kernel_initializer='he_normal', padding='same', strides=initial_strides, use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) if subsample_initial_block: x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) # Add dense blocks for block_idx in range(nb_dense_block - 1): x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, bottleneck=bottleneck, dropout_rate=dropout_rate, weight_decay=weight_decay) # add transition_block x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay) nb_filter = int(nb_filter * compression) # The last dense_block does not have a transition_block x, nb_filter = __dense_block(x, final_nb_layer, nb_filter, growth_rate, bottleneck=bottleneck, dropout_rate=dropout_rate, weight_decay=weight_decay) x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) x = Activation('relu')(x) x = GlobalAveragePooling2D(name="global_avg_pooling")(x) if include_top: x = Dense(nb_classes, activation=activation)(x) return x
def resnet50(num_classes, dtype='float32'): # TODO(tfboyd): add training argument, just lik resnet56. """Instantiates the ResNet50 architecture. Args: num_classes: `int` number of classes for image classification. Returns: A Keras model instance. """ input_shape = (224, 224, 3) img_input = layers.Input(shape=input_shape, dtype=dtype) if backend.image_data_format() == 'channels_first': x = layers.Lambda(lambda x: backend.permute_dimensions(x, (0, 3, 1, 2)), name='transpose')(img_input) bn_axis = 1 else: # channels_last x = img_input bn_axis = 3 x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(x) x = layers.Conv2D(64, (7, 7), strides=(2, 2), padding='valid', use_bias=False, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='conv1')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name='bn_conv1')(x) x = layers.Activation('relu')(x) x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x) x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') x = layers.GlobalAveragePooling2D(name='avg_pool')(x) x = layers.Dense( num_classes, kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='fc1000')(x) # TODO(reedwm): Remove manual casts once mixed precision can be enabled with a # single line of code. x = backend.cast(x, 'float32') x = layers.Activation('softmax')(x) # Create model. return models.Model(img_input, x, name='resnet50')
from tensorflow.python.keras import regularizers from readfile import load_data, embed_and_token REG = 0.0001 DROP = 0.1 tweets, labels = load_data() data, labels, embedding_layer = embed_and_token(tweets, labels) print(data.shape) print(labels.shape) sequence_input = Input(shape=(data.shape[1],), dtype='int32') # (Batch size, embedded_sequences = embedding_layer(sequence_input) embedded_sequences = Dropout(DROP)(embedded_sequences) x = Conv1D(256, 5, activation='relu', padding='same', kernel_regularizer=regularizers.l2(REG))(embedded_sequences) a = MaxPooling1D(5, strides=1, padding='same')(x) a = Dense(256, activation='relu')(a) x = Dropout(DROP)(a) x = Conv1D(256, 5, activation='relu', padding='same', kernel_regularizer=regularizers.l2(REG))(x) b = MaxPooling1D(5, strides=1, padding='same')(x) b = Dense(256, activation='relu')(b) x = Dropout(DROP)(b) x = Conv1D(128, 5, activation='relu', padding='same', kernel_regularizer=regularizers.l2(REG))(x) x = MaxPooling1D(31, padding='same')(x) x = Flatten()(x) # a = Flatten()(a) # b = Flatten()(b) # x = Concatenate()([a, b, x]) x = Dense(128, activation='relu')(x)
def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, growth_rate=12, reduction=0.0, dropout_rate=None, weight_decay=1e-4, nb_layers_per_block=4, nb_upsampling_conv=128, upsampling_type='upsampling', init_conv_filters=48, input_shape=None, activation='deconv'): ''' Build the DenseNet model Args: nb_classes: number of classes img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels) include_top: flag to include the final Dense layer nb_dense_block: number of dense blocks to add to end (generally = 3) growth_rate: number of filters to add per dense block reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression dropout_rate: dropout rate weight_decay: weight decay nb_layers_per_block: number of layers in each dense block. Can be a positive integer or a list. If positive integer, a set number of layers per dense block. If list, nb_layer is used as provided. Note that list size must be (nb_dense_block + 1) nb_upsampling_conv: number of convolutional layers in upsampling via subpixel convolution upsampling_type: Can be one of 'upsampling', 'deconv' and 'subpixel'. Defines type of upsampling algorithm used. input_shape: Only used for shape inference in fully convolutional networks. activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. Note that if sigmoid is used, classes must be 1. Returns: keras tensor with nb_layers of conv_block appended ''' concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 if concat_axis == 1: # channels_first dim ordering _, rows, cols = input_shape else: rows, cols, _ = input_shape if reduction != 0.0: assert reduction <= 1.0 and reduction > 0.0, 'reduction value must lie between 0.0 and 1.0' # check if upsampling_conv has minimum number of filters # minimum is set to 12, as at least 3 color channels are needed for correct upsampling assert nb_upsampling_conv > 12 and nb_upsampling_conv % 4 == 0, 'Parameter `upsampling_conv` number of channels must ' \ 'be a positive number divisible by 4 and greater ' \ 'than 12' # layers in each dense block if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: nb_layers = list(nb_layers_per_block) # Convert tuple to list assert len(nb_layers) == (nb_dense_block + 1), 'If list, nb_layer is used as provided. ' \ 'Note that list size must be (nb_dense_block + 1)' bottleneck_nb_layers = nb_layers[-1] rev_layers = nb_layers[::-1] nb_layers.extend(rev_layers[1:]) else: bottleneck_nb_layers = nb_layers_per_block nb_layers = [nb_layers_per_block] * (2 * nb_dense_block + 1) # compute compression factor compression = 1.0 - reduction # Initial convolution x = Conv2D(init_conv_filters, (7, 7), kernel_initializer='he_normal', padding='same', name='initial_conv2D', use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) x = Activation('relu')(x) nb_filter = init_conv_filters skip_list = [] # Add dense blocks and transition down block for block_idx in range(nb_dense_block): x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay) # Skip connection skip_list.append(x) # add transition_block x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay) nb_filter = int(nb_filter * compression) # this is calculated inside transition_down_block # The last dense_block does not have a transition_down_block # return the concatenated feature maps without the concatenation of the input _, nb_filter, concat_list = __dense_block(x, bottleneck_nb_layers, nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay, return_concat_list=True) skip_list = skip_list[::-1] # reverse the skip list # Add dense blocks and transition up block for block_idx in range(nb_dense_block): n_filters_keep = growth_rate * nb_layers[nb_dense_block + block_idx] # upsampling block must upsample only the feature maps (concat_list[1:]), # not the concatenation of the input with the feature maps (concat_list[0]. l = concatenate(concat_list[1:], axis=concat_axis) t = __transition_up_block(l, nb_filters=n_filters_keep, type=upsampling_type, weight_decay=weight_decay) # concatenate the skip connection with the transition block x = concatenate([t, skip_list[block_idx]], axis=concat_axis) # Dont allow the feature map size to grow in upsampling dense blocks x_up, nb_filter, concat_list = __dense_block(x, nb_layers[nb_dense_block + block_idx + 1], nb_filter=growth_rate, growth_rate=growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay, return_concat_list=True, grow_nb_filters=False) if include_top: x = Conv2D(nb_classes, (1, 1), activation='linear', padding='same', use_bias=False)(x_up) if K.image_data_format() == 'channels_first': channel, row, col = input_shape else: row, col, channel = input_shape x = Reshape((row * col, nb_classes))(x) x = Activation(activation)(x) x = Reshape((row, col, nb_classes))(x) else: x = x_up return x
def get_embedding(region_num, region_feature_dim_dict, base_feature_dim_dict, bias_feature_dim_dict, init_std, seed, l2_reg_linear): region_embeddings = [[Embedding(region_feature_dim_dict["sparse"][feat], 1, embeddings_initializer=TruncatedNormal(stddev=init_std, seed=seed+j), embeddings_regularizer=l2(l2_reg_linear), name='region_emb_' + str(j)+'_' + str(i)) for i, feat in enumerate(region_feature_dim_dict['sparse'])] for j in range(region_num)] base_embeddings = [[Embedding(base_feature_dim_dict['sparse'][feat], 1, embeddings_initializer=TruncatedNormal(stddev=init_std, seed=seed + j), embeddings_regularizer=l2(l2_reg_linear), name='base_emb_' + str(j) + '_' + str(i)) for i, feat in enumerate(base_feature_dim_dict['sparse'])] for j in range(region_num)] bias_embedding = [Embedding(bias_feature_dim_dict['sparse'][feat], 1, embeddings_initializer=TruncatedNormal(stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_linear), name='embed_bias' + '_' + str(i)) for i, feat in enumerate(bias_feature_dim_dict['sparse'])] return region_embeddings, base_embeddings, bias_embedding
def build(self, input_shape): atom_hidden_shape, prot_hidden_shape, atom_splits_shape, prot_len_shape = input_shape atom_dim = atom_hidden_shape[-1] prot_dim = prot_hidden_shape[-1] self.W= self.add_weight('attention_weight', shape= (atom_dim, prot_dim), initializer= initializers.get(self.initializer), regularizer= l2(self.weight_decay))
def create_cnn_model(weights_path=None): # creates our cnn model #filters which total weights is “n*m*k*l” (Here the input has l=32 feature maps as inputs, k=64 feature maps as outputs) #Then there is a term called bias for each feature map. So, the total number of parameters are “(n*m*l+1)*k”. ''' PARAMETERS https://towardsdatascience.com/understanding-and-calculating-the-number-of-parameters-in-convolution-neural-networks-cnns-fc88790d530d https://medium.com/@shashikachamod4u/calculate-output-size-and-number-of-trainable-parameters-in-a-convolution-layer-1d64cae6c009 https://medium.com/@iamvarman/how-to-calculate-the-number-of-parameters-in-the-cnn-5bd55364d7ca https://cs231n.github.io/convolutional-networks/ ''' input = Input(shape=(1, IMG_WIDTH, IMG_HEIGHT)) input_pad = ZeroPadding2D(padding=(3, 3))(input) conv1_1_3x3_s1 = Conv2D(32, (3, 3), strides=(1, 1), padding='same', activation='relu', name='conv1_1/3x3_s1', kernel_regularizer=l2(l2_regulizer))(input_pad) conv1_2_3x3_s1 = Conv2D( 32, (3, 3), strides=(1, 1), padding='same', activation='relu', name='conv1_2/3x3_s1', kernel_regularizer=l2(l2_regulizer))(conv1_1_3x3_s1) conv1_zero_pad = ZeroPadding2D(padding=(1, 1))(conv1_2_3x3_s1) pool1_helper = PoolHelper()(conv1_zero_pad) pool1_2_2x2_s1 = MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same', name='pool1/2x2_s1')(pool1_helper) pool1_norm1 = LRN(name='pool1/norm1')(pool1_2_2x2_s1) conv2_1_3x3_reduce = Conv2D( 64, (1, 1), padding='same', activation='relu', name='conv2_1/3x3_reduce', kernel_regularizer=l2(l2_regulizer))(pool1_norm1) conv2_2_3x3 = Conv2D( 64, (3, 3), padding='same', activation='relu', name='conv2_2/3x3', kernel_regularizer=l2(l2_regulizer))(conv2_1_3x3_reduce) conv2_norm2 = LRN(name='conv2/norm2')(conv2_2_3x3) conv2_zero_pad = ZeroPadding2D(padding=(1, 1))(conv2_norm2) pool2_helper = PoolHelper()(conv2_zero_pad) pool2_3x3_s2 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same', name='pool2/3x3_s2')(pool2_helper) conv3_1_3x3_s1 = Conv2D(128, (3, 3), strides=(1, 1), padding='same', activation='relu', name='conv3_1/3x3_s1', kernel_regularizer=l2(l2_regulizer))(pool2_3x3_s2) conv3_2_3x3_s1 = Conv2D( 128, (3, 3), strides=(1, 1), padding='same', activation='relu', name='conv3_2/3x3_s1', kernel_regularizer=l2(l2_regulizer))(conv3_1_3x3_s1) conv3_zero_pad = ZeroPadding2D(padding=(1, 1))(conv3_2_3x3_s1) pool3_helper = PoolHelper()(conv3_zero_pad) pool3_2_2x2_s1 = MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same', name='pool3/2x2_s1')(pool3_helper) pool3_norm1 = LRN(name='pool3/norm1')(pool3_2_2x2_s1) conv4_1_3x3_reduce = Conv2D( 256, (1, 1), padding='same', activation='relu', name='conv4_1/3x3_reduce', kernel_regularizer=l2(l2_regulizer))(pool3_norm1) conv4_2_3x3 = Conv2D( 256, (3, 3), padding='same', activation='relu', name='conv4_2/3x3', kernel_regularizer=l2(l2_regulizer))(conv4_1_3x3_reduce) conv4_norm2 = LRN(name='conv4/norm2')(conv4_2_3x3) conv4_zero_pad = ZeroPadding2D(padding=(1, 1))(conv4_norm2) pool4_helper = PoolHelper()(conv4_zero_pad) pool4_3x3_s2 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same', name='pool4/3x3_s2')(pool4_helper) conv5_1_3x3_s1 = Conv2D(512, (3, 3), strides=(1, 1), padding='same', activation='relu', name='conv5_1/3x3_s1', kernel_regularizer=l2(l2_regulizer))(pool4_3x3_s2) conv5_2_3x3_s1 = Conv2D( 512, (3, 3), strides=(1, 1), padding='same', activation='relu', name='conv5_2/3x3_s1', kernel_regularizer=l2(l2_regulizer))(conv5_1_3x3_s1) conv5_zero_pad = ZeroPadding2D(padding=(1, 1))(conv5_2_3x3_s1) pool5_helper = PoolHelper()(conv5_zero_pad) pool5_2_2x2_s1 = MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same', name='pool5/2x2_s1')(pool5_helper) pool5_norm1 = LRN(name='pool5/norm1')(pool5_2_2x2_s1) conv6_1_3x3_reduce = Conv2D( 1024, (1, 1), padding='same', activation='relu', name='conv6_1/3x3_reduce', kernel_regularizer=l2(l2_regulizer))(pool5_norm1) conv6_2_3x3 = Conv2D( 1024, (3, 3), padding='same', activation='relu', name='conv6_2/3x3', kernel_regularizer=l2(l2_regulizer))(conv6_1_3x3_reduce) conv6_norm2 = LRN(name='conv6/norm2')(conv6_2_3x3) conv6_zero_pad = ZeroPadding2D(padding=(1, 1))(conv6_norm2) pool6_helper = PoolHelper()(conv6_zero_pad) pool6_3x3_s2 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same', name='pool6/3x3_s2')(pool6_helper) pool7_2x2_s1 = AveragePooling2D(pool_size=(2, 2), strides=(1, 1), name='pool7/2x2_s1')(pool6_3x3_s2) loss_flat = Flatten()(pool7_2x2_s1) pool7_drop_2x2_s1 = Dropout(rate=0.5)(loss_flat) loss_classifier = Dense( num_classes, name='loss3/classifier', kernel_regularizer=l2(l2_regulizer))(pool7_drop_2x2_s1) loss_classifier_act = Activation('softmax', name='prob')(loss_classifier) mynet = Model(inputs=input, outputs=[loss_classifier_act]) if weights_path: mynet.load_weights(weights_path) if keras.backend.backend() == 'tensorflow': # convert the convolutional kernels for tensorflow ops = [] for layer in mynet.layers: if layer.__class__.__name__ == 'Conv2D': original_w = K.get_value(layer.kernel) converted_w = convert_kernel(original_w) ops.append(tf.assign(layer.kernel, converted_w).op) K.get_session().run(ops) return mynet
def create_embedding_dict(sparse_feature_columns, varlen_sparse_feature_columns, embedding_size, init_std, seed, l2_reg, prefix='sparse_', seq_mask_zero=True): if embedding_size == 'auto': print("Notice:Do not use auto embedding in models other than DCN") sparse_embedding = { feat.embedding_name: Embedding(feat.dimension, 6 * int(pow(feat.dimension, 0.25)), embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg), name=prefix + '_emb_' + feat.name) for feat in sparse_feature_columns } else: sparse_embedding = { feat.embedding_name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg), name=prefix + '_emb_' + feat.name) for feat in sparse_feature_columns } if varlen_sparse_feature_columns and len( varlen_sparse_feature_columns) > 0: for feat in varlen_sparse_feature_columns: # if feat.name not in sparse_embedding: if embedding_size == "auto": sparse_embedding[feat.embedding_name] = Embedding( feat.dimension, 6 * int(pow(feat.dimension, 0.25)), embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg), name=prefix + '_seq_emb_' + feat.name, mask_zero=seq_mask_zero) else: sparse_embedding[feat.embedding_name] = Embedding( feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg), name=prefix + '_seq_emb_' + feat.name, mask_zero=seq_mask_zero) return sparse_embedding
def siamese_network(input_shape=(105, 105, 1), classes=1): """Network Architecture""" left_input = layers.Input(shape=input_shape) right_input = layers.Input(shape=input_shape) # Creating the convnet which shares weights between the left and right legs of Siamese network siamese_convnet = Sequential() siamese_convnet.add( layers.Conv2D(filters=64, kernel_size=10, strides=1, input_shape=input_shape, activation='relu', kernel_initializer=RandomNormal(mean=0, stddev=0.01), kernel_regularizer=l2(1e-2), bias_initializer=RandomNormal(mean=0.5, stddev=0.01))) siamese_convnet.add(layers.MaxPooling2D(pool_size=(2, 2))) siamese_convnet.add( layers.Conv2D(filters=128, kernel_size=7, strides=1, activation='relu', kernel_initializer=RandomNormal(mean=0, stddev=0.01), kernel_regularizer=l2(1e-2), bias_initializer=RandomNormal(mean=0.5, stddev=0.01))) siamese_convnet.add(layers.MaxPooling2D(pool_size=(2, 2))) siamese_convnet.add( layers.Conv2D(filters=128, kernel_size=4, strides=1, activation='relu', kernel_initializer=RandomNormal(mean=0, stddev=0.01), kernel_regularizer=l2(1e-2), bias_initializer=RandomNormal(mean=0.5, stddev=0.01))) siamese_convnet.add(layers.MaxPooling2D(pool_size=(2, 2))) siamese_convnet.add( layers.Conv2D(filters=256, kernel_size=4, strides=1, activation='relu', kernel_initializer=RandomNormal(mean=0, stddev=0.01), kernel_regularizer=l2(1e-2), bias_initializer=RandomNormal(mean=0.5, stddev=0.01))) siamese_convnet.add(layers.Flatten()) siamese_convnet.add( layers.Dense(4096, activation='sigmoid', kernel_initializer=RandomNormal(mean=0, stddev=0.2), kernel_regularizer=l2(1e-4), bias_initializer=RandomNormal(mean=0.5, stddev=0.01))) encoded_left_input = siamese_convnet(left_input) encoded_right_input = siamese_convnet(right_input) l1_encoded = layers.Lambda(lambda x: tf.abs(x[0] - x[1]))( [encoded_left_input, encoded_right_input]) output = layers.Dense(classes, activation='sigmoid', kernel_initializer=RandomNormal(mean=0, stddev=0.2), bias_initializer=RandomNormal( mean=0.5, stddev=0.01))(l1_encoded) return Model(inputs=[left_input, right_input], outputs=output)
model.add(Conv2D(64, (5, 5), activation='relu')) model.add(MaxPooling2D(pool_size=(4, 4))) # Layer 3 model.add(Conv2D(128, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) # Layer 4 model.add(Conv2D(256, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) # flatten model.add(Flatten(input_shape=input_shape)) # fc layers model.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(regularization_rate))) model.add(Dense(128, activation='relu', kernel_regularizer=regularizers.l2(regularization_rate))) model.add(Dense(category_count, activation='softmax', kernel_regularizer=regularizers.l2(regularization_rate))) # read image train_data, train_label = read_img_random(train_path, train_image_count) val_data, val_label = read_img_random(val_path, val_image_count) test_data, test_label = read_img_random(test_path, test_image_count) x_train = train_data y_train = train_label x_val = val_data y_val = val_label x_test = test_data y_test = test_label
for f in Features.keys(): Others.append(Input[:,Input_Indexs[f][0]:Input_Indexs[f][1]]) return User,Movie,Others def root_mean_squared_error(y_true, y_pred): return K.sqrt(K.mean(K.square(y_pred - y_true))) # Input is one big matrix because if put separetly training time grows very big. Conc=Keras.Input((Input_Shape,)) # Features are separeted U,M,G1,G2,A1,O,A2=Keras.Lambda(ReturnVektor)(Conc) # Embedding Layer. Reshape is for concatenate emb1=Keras.Dense(Embd_Size,activation='linear',kernel_initializer='normal',kernel_regularizer=regularizers.l2(Scale))(U) emb1=Keras.Reshape((1,Embd_Size))(emb1) emb2=Keras.Dense(Embd_Size,activation='linear',kernel_initializer='normal',kernel_regularizer=regularizers.l2(Scale))(M) emb2=Keras.Reshape((1,Embd_Size))(emb2) emb3=Keras.Dense(Embd_Size,activation='linear',kernel_initializer='normal',kernel_regularizer=regularizers.l2(Scale))(G1) emb3=Keras.Reshape((1,Embd_Size))(emb3) emb4=Keras.Dense(Embd_Size,activation='linear',kernel_initializer='normal',kernel_regularizer=regularizers.l2(Scale))(G2) emb4=Keras.Reshape((1,Embd_Size))(emb4) emb5=Keras.Dense(Embd_Size,activation='linear',kernel_initializer='normal',kernel_regularizer=regularizers.l2(Scale))(A1) emb5=Keras.Reshape((1,Embd_Size))(emb5) emb6=Keras.Dense(Embd_Size,activation='linear',kernel_initializer='normal',kernel_regularizer=regularizers.l2(Scale))(O) emb6=Keras.Reshape((1,Embd_Size))(emb6) emb7=Keras.Dense(Embd_Size,activation='linear',kernel_initializer='normal',kernel_regularizer=regularizers.l2(Scale))(A2) emb7=Keras.Reshape((1,Embd_Size))(emb7) # First Order Relations
def DIN( feature_dim_dict, seq_feature_list, embedding_size=4, hist_len_max=16, use_din=True, use_bn=True, hidden_size=[200, 80], activation=Dice(), att_hidden_size=[80, 40], att_activation='sigmoid', att_weight_normalization=True, l2_reg_deep=5e-5, l2_reg_embedding=0, final_activation='sigmoid', keep_prob=1, init_std=0.0001, seed=1024, ): """Instantiates the Deep Interest Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param use_din: bool, whether use din pooling or not.If set to ``False``,use **sum pooling** :param use_bn: bool. Whether use BatchNormalization before activation or not.in deep net :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param activation: Activation function to use in deep net :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_deep: float. L2 regularizer strength applied to deep net :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` :param keep_prob: float in (0,1]. keep_prob used in deep net :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :return: A Keras model instance. """ for feature_dim_dict in [feature_dim_dict]: if not isinstance( feature_dim_dict, dict ) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict: raise ValueError( "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}" ) if len(feature_dim_dict['dense']) > 0: raise ValueError('Now DIN only support sparse input') sparse_input, user_behavior_input, user_behavior_length = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = { feat: Embedding(feature_dim_dict["sparse"][feat], embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = [ sparse_embedding_dict[feat](sparse_input[feat]) for feat in seq_feature_list ] keys_emb_list = [ sparse_embedding_dict[feat](user_behavior_input[feat]) for feat in seq_feature_list ] deep_input_emb_list = [ sparse_embedding_dict[feat](sparse_input[feat]) for feat in feature_dim_dict["sparse"] ] query_emb = Concatenate()( query_emb_list) if len(query_emb_list) > 1 else query_emb_list[0] keys_emb = Concatenate()( keys_emb_list) if len(keys_emb_list) > 1 else keys_emb_list[0] deep_input_emb = Concatenate()(deep_input_emb_list) if len( deep_input_emb_list) > 1 else deep_input_emb_list[0] if use_din: hist = AttentionSequencePoolingLayer( att_hidden_size, att_activation, weight_normalization=att_weight_normalization)( [query_emb, keys_emb, user_behavior_length]) else: hist = SequencePoolingLayer(hist_len_max, 'sum')([keys_emb, user_behavior_length]) deep_input_emb = Concatenate()([deep_input_emb, hist]) output = MLP( hidden_size, activation, l2_reg_deep, keep_prob, use_bn, seed, )(deep_input_emb) output = Dense(1, final_activation)(output) output = Reshape([1])(output) model_input_list = list(sparse_input.values()) + list( user_behavior_input.values()) + [user_behavior_length] model = Model(inputs=model_input_list, outputs=output) return model
def DIN(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=16, dnn_use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_hidden_size=(80, 40), att_activation="dice", att_weight_normalization=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in seq_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse'], seq_feature_list, seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict, user_behavior_input, feature_dim_dict['sparse'], seq_feature_list, seq_feature_list) deep_input_emb_list = get_embedding_vec_list( sparse_embedding_dict, sparse_input, feature_dim_dict['sparse'], mask_feat_list=seq_feature_list) keys_emb = concat_fun(keys_emb_list) deep_input_emb = concat_fun(deep_input_emb_list) query_emb = concat_fun(query_emb_list) hist = AttentionSequencePoolingLayer( att_hidden_size, att_activation, weight_normalization=att_weight_normalization, supports_masking=True)([query_emb, keys_emb]) deep_input_emb = Concatenate()([NoMask()(deep_input_emb), hist]) deep_input_emb = Flatten()(deep_input_emb) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) model = Model(inputs=model_input_list, outputs=output) return model
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): """A block that has a conv layer at shortcut. # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names strides: Strides for the second conv layer in the block. # Returns Output tensor for the block. Note that from stage 3, the second conv layer at main path is with strides=(2, 2) And the shortcut should have strides=(2, 2) as well """ filters1, filters2, filters3 = filters if backend.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = layers.Conv2D(filters1, (1, 1), kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2a')(input_tensor) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2a')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(filters2, kernel_size, strides=strides, padding='same', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2b')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2b')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(filters3, (1, 1), kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2c')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2c')(x) shortcut = layers.Conv2D( filters3, (1, 1), strides=strides, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '1')(input_tensor) shortcut = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '1')(shortcut) x = layers.add([x, shortcut]) x = layers.Activation('relu')(x) return x
class KerasRegularizersTest(keras_parameterized.TestCase, parameterized.TestCase): def create_model(self, kernel_regularizer=None, activity_regularizer=None): model = keras.models.Sequential() model.add(keras.layers.Dense(NUM_CLASSES, kernel_regularizer=kernel_regularizer, activity_regularizer=activity_regularizer, input_shape=(DATA_DIM,))) return model def get_data(self): (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( train_samples=10, test_samples=10, input_shape=(DATA_DIM,), num_classes=NUM_CLASSES) y_train = np_utils.to_categorical(y_train, NUM_CLASSES) y_test = np_utils.to_categorical(y_test, NUM_CLASSES) return (x_train, y_train), (x_test, y_test) def create_multi_input_model_from(self, layer1, layer2): input_1 = keras.layers.Input(shape=(DATA_DIM,)) input_2 = keras.layers.Input(shape=(DATA_DIM,)) out1 = layer1(input_1) out2 = layer2(input_2) out = keras.layers.Average()([out1, out2]) model = keras.models.Model([input_1, input_2], out) model.add_loss(keras.backend.mean(out2)) model.add_loss(math_ops.reduce_sum(input_1)) return model @keras_parameterized.run_all_keras_modes @parameterized.named_parameters([ ('l1', regularizers.l1()), ('l2', regularizers.l2()), ('l1_l2', regularizers.l1_l2()), ]) def test_kernel_regularization(self, regularizer): (x_train, y_train), _ = self.get_data() model = self.create_model(kernel_regularizer=regularizer) model.compile( loss='categorical_crossentropy', optimizer='sgd', run_eagerly=testing_utils.should_run_eagerly()) self.assertEqual(len(model.losses), 1) model.fit(x_train, y_train, batch_size=10, epochs=1, verbose=0) @keras_parameterized.run_all_keras_modes @parameterized.named_parameters([ ('l1', regularizers.l1()), ('l2', regularizers.l2()), ('l1_l2', regularizers.l1_l2()), ('l2_zero', keras.regularizers.l2(0.)), ]) def test_activity_regularization(self, regularizer): (x_train, y_train), _ = self.get_data() model = self.create_model(activity_regularizer=regularizer) model.compile( loss='categorical_crossentropy', optimizer='sgd', run_eagerly=testing_utils.should_run_eagerly()) self.assertEqual(len(model.losses), 1 if context.executing_eagerly() else 1) model.fit(x_train, y_train, batch_size=10, epochs=1, verbose=0) @keras_parameterized.run_all_keras_modes @keras_parameterized.run_with_all_model_types def test_zero_regularization(self): # Verifies that training with zero regularization works. x, y = np.ones((10, 10)), np.ones((10, 3)) model = testing_utils.get_model_from_layers( [keras.layers.Dense(3, kernel_regularizer=keras.regularizers.l2(0))], input_shape=(10,)) model.compile( 'sgd', 'mse', run_eagerly=testing_utils.should_run_eagerly()) model.fit(x, y, batch_size=5, epochs=1) def test_custom_regularizer_saving(self): def my_regularizer(weights): return math_ops.reduce_sum(math_ops.abs(weights)) inputs = keras.Input((10,)) outputs = keras.layers.Dense(1, kernel_regularizer=my_regularizer)(inputs) model = keras.Model(inputs, outputs) model2 = model.from_config( model.get_config(), custom_objects={'my_regularizer': my_regularizer}) self.assertEqual(model2.layers[1].kernel_regularizer, my_regularizer) @keras_parameterized.run_all_keras_modes @parameterized.named_parameters([ ('l1', regularizers.l1()), ('l2', regularizers.l2()), ('l1_l2', regularizers.l1_l2()), ]) def test_regularization_shared_layer(self, regularizer): dense_layer = keras.layers.Dense( NUM_CLASSES, kernel_regularizer=regularizer, activity_regularizer=regularizer) model = self.create_multi_input_model_from(dense_layer, dense_layer) model.compile( loss='categorical_crossentropy', optimizer='sgd', run_eagerly=testing_utils.should_run_eagerly()) self.assertLen(model.losses, 5) @keras_parameterized.run_all_keras_modes @parameterized.named_parameters([ ('l1', regularizers.l1()), ('l2', regularizers.l2()), ('l1_l2', regularizers.l1_l2()), ]) def test_regularization_shared_model(self, regularizer): dense_layer = keras.layers.Dense( NUM_CLASSES, kernel_regularizer=regularizer, activity_regularizer=regularizer) input_tensor = keras.layers.Input(shape=(DATA_DIM,)) dummy_model = keras.models.Model(input_tensor, dense_layer(input_tensor)) model = self.create_multi_input_model_from(dummy_model, dummy_model) model.compile( loss='categorical_crossentropy', optimizer='sgd', run_eagerly=testing_utils.should_run_eagerly()) self.assertLen(model.losses, 6) @keras_parameterized.run_all_keras_modes @parameterized.named_parameters([ ('l1', regularizers.l1()), ('l2', regularizers.l2()), ('l1_l2', regularizers.l1_l2()), ]) def test_regularization_shared_layer_in_different_models(self, regularizer): shared_dense = keras.layers.Dense( NUM_CLASSES, kernel_regularizer=regularizer, activity_regularizer=regularizer) models = [] for _ in range(2): input_tensor = keras.layers.Input(shape=(DATA_DIM,)) unshared_dense = keras.layers.Dense( NUM_CLASSES, kernel_regularizer=regularizer) out = unshared_dense(shared_dense(input_tensor)) models.append(keras.models.Model(input_tensor, out)) model = self.create_multi_input_model_from( layer1=models[0], layer2=models[1]) model.compile( loss='categorical_crossentropy', optimizer='sgd', run_eagerly=testing_utils.should_run_eagerly()) # We expect to see 9 losses on the model: # - 2 from the 2 add_loss calls on the outer model. # - 3 from the weight regularizers on the shared_dense layer, unshared_dense # in inner model 1, unshared_dense in inner model 2. # - 4 from activity regularizers on the shared_dense layer. self.assertLen(model.losses, 9)
def RetinaNet(input_shape, num_classes, num_anchor=9): """Creates the RetinaNet. RetinaNet is composed of an FPN, a classification sub-network and a localization regression sub-network. Args: input_shape (tuple): shape of input image. num_classes (int): number of classes. num_anchor (int, optional): number of anchor boxes. Defaults to 9. Returns: 'Model' object: RetinaNet. """ inputs = tf.keras.Input(shape=input_shape) # FPN resnet50 = tf.keras.applications.ResNet50(weights="imagenet", include_top=False, input_tensor=inputs, pooling=None) assert resnet50.layers[80].name == "conv3_block4_out" C3 = resnet50.layers[80].output assert resnet50.layers[142].name == "conv4_block6_out" C4 = resnet50.layers[142].output assert resnet50.layers[-1].name == "conv5_block3_out" C5 = resnet50.layers[-1].output P5 = layers.Conv2D(256, kernel_size=1, strides=1, padding='same', kernel_regularizer=regularizers.l2(0.0001))(C5) P5_upsampling = layers.UpSampling2D()(P5) P4 = layers.Conv2D(256, kernel_size=1, strides=1, padding='same', kernel_regularizer=regularizers.l2(0.0001))(C4) P4 = layers.Add()([P5_upsampling, P4]) P4_upsampling = layers.UpSampling2D()(P4) P3 = layers.Conv2D(256, kernel_size=1, strides=1, padding='same', kernel_regularizer=regularizers.l2(0.0001))(C3) P3 = layers.Add()([P4_upsampling, P3]) P6 = layers.Conv2D(256, kernel_size=3, strides=2, padding='same', name="P6", kernel_regularizer=regularizers.l2(0.0001))(C5) P7 = layers.Activation('relu')(P6) P7 = layers.Conv2D(256, kernel_size=3, strides=2, padding='same', name="P7", kernel_regularizer=regularizers.l2(0.0001))(P7) P5 = layers.Conv2D(256, kernel_size=3, strides=1, padding='same', name="P5", kernel_regularizer=regularizers.l2(0.0001))(P5) P4 = layers.Conv2D(256, kernel_size=3, strides=1, padding='same', name="P4", kernel_regularizer=regularizers.l2(0.0001))(P4) P3 = layers.Conv2D(256, kernel_size=3, strides=1, padding='same', name="P3", kernel_regularizer=regularizers.l2(0.0001))(P3) # classification subnet cls_subnet = _classification_sub_net(num_classes=num_classes, num_anchor=num_anchor) P3_cls = cls_subnet(P3) P4_cls = cls_subnet(P4) P5_cls = cls_subnet(P5) P6_cls = cls_subnet(P6) P7_cls = cls_subnet(P7) cls_output = layers.Concatenate(axis=-2)( [P3_cls, P4_cls, P5_cls, P6_cls, P7_cls]) # localization subnet loc_subnet = _regression_sub_net(num_anchor=num_anchor) P3_loc = loc_subnet(P3) P4_loc = loc_subnet(P4) P5_loc = loc_subnet(P5) P6_loc = loc_subnet(P6) P7_loc = loc_subnet(P7) loc_output = layers.Concatenate(axis=-2)( [P3_loc, P4_loc, P5_loc, P6_loc, P7_loc]) return tf.keras.Model(inputs=inputs, outputs=[cls_output, loc_output])
def Nest_Net(img_rows, img_cols, color_type=1, num_class=1, deep_supervision=False): nb_filter = [8, 16, 32, 64, 128] pool_size = (1, 2) kernel_size = (1, 7) padding_size = ((0, 0), unet_padding_size(img_cols, pool_size[1])) img_input = Input(shape=(img_rows, img_cols, color_type), name='main_input') zpad = ZeroPadding2D(padding_size)(img_input) conv1_1 = standard_unit(zpad, stage='11', nb_filter=nb_filter[0], kernel_size=kernel_size) pool1 = MaxPooling2D(pool_size=pool_size, name='pool1')(conv1_1) conv2_1 = standard_unit(pool1, stage='21', nb_filter=nb_filter[1], kernel_size=kernel_size) pool2 = MaxPooling2D(pool_size=pool_size, name='pool2')(conv2_1) up1_2 = Conv2DTranspose(nb_filter[0], kernel_size, strides=pool_size, name='up12', padding='same')(conv2_1) conv1_2 = concatenate([up1_2, conv1_1], name='merge12', axis=3) conv1_2 = standard_unit(conv1_2, stage='12', nb_filter=nb_filter[0], kernel_size=kernel_size) conv3_1 = standard_unit(pool2, stage='31', nb_filter=nb_filter[2], kernel_size=kernel_size) pool3 = MaxPooling2D(pool_size=pool_size, name='pool3')(conv3_1) up2_2 = Conv2DTranspose(nb_filter[1], kernel_size, strides=pool_size, name='up22', padding='same')(conv3_1) conv2_2 = concatenate([up2_2, conv2_1], name='merge22', axis=3) conv2_2 = standard_unit(conv2_2, stage='22', nb_filter=nb_filter[1], kernel_size=kernel_size) up1_3 = Conv2DTranspose(nb_filter[0], kernel_size, strides=pool_size, name='up13', padding='same')(conv2_2) conv1_3 = concatenate([up1_3, conv1_1, conv1_2], name='merge13', axis=3) conv1_3 = standard_unit(conv1_3, stage='13', nb_filter=nb_filter[0], kernel_size=kernel_size) conv4_1 = standard_unit(pool3, stage='41', nb_filter=nb_filter[3], kernel_size=kernel_size) pool4 = MaxPooling2D(pool_size=pool_size, name='pool4')(conv4_1) up3_2 = Conv2DTranspose(nb_filter[2], kernel_size, strides=pool_size, name='up32', padding='same')(conv4_1) conv3_2 = concatenate([up3_2, conv3_1], name='merge32', axis=3) conv3_2 = standard_unit(conv3_2, stage='32', nb_filter=nb_filter[2], kernel_size=kernel_size) up2_3 = Conv2DTranspose(nb_filter[1], kernel_size, strides=pool_size, name='up23', padding='same')(conv3_2) conv2_3 = concatenate([up2_3, conv2_1, conv2_2], name='merge23', axis=3) conv2_3 = standard_unit(conv2_3, stage='23', nb_filter=nb_filter[1], kernel_size=kernel_size) up1_4 = Conv2DTranspose(nb_filter[0], kernel_size, strides=pool_size, name='up14', padding='same')(conv2_3) conv1_4 = concatenate([up1_4, conv1_1, conv1_2, conv1_3], name='merge14', axis=3) conv1_4 = standard_unit(conv1_4, stage='14', nb_filter=nb_filter[0], kernel_size=kernel_size) conv5_1 = standard_unit(pool4, stage='51', nb_filter=nb_filter[4], kernel_size=kernel_size) up4_2 = Conv2DTranspose(nb_filter[3], kernel_size, strides=pool_size, name='up42', padding='same')(conv5_1) conv4_2 = concatenate([up4_2, conv4_1], name='merge42', axis=3) conv4_2 = standard_unit(conv4_2, stage='42', nb_filter=nb_filter[3], kernel_size=kernel_size) up3_3 = Conv2DTranspose(nb_filter[2], kernel_size, strides=pool_size, name='up33', padding='same')(conv4_2) conv3_3 = concatenate([up3_3, conv3_1, conv3_2], name='merge33', axis=3) conv3_3 = standard_unit(conv3_3, stage='33', nb_filter=nb_filter[2], kernel_size=kernel_size) up2_4 = Conv2DTranspose(nb_filter[1], kernel_size, strides=pool_size, name='up24', padding='same')(conv3_3) conv2_4 = concatenate([up2_4, conv2_1, conv2_2, conv2_3], name='merge24', axis=3) conv2_4 = standard_unit(conv2_4, stage='24', nb_filter=nb_filter[1], kernel_size=kernel_size) up1_5 = Conv2DTranspose(nb_filter[0], kernel_size, strides=pool_size, name='up15', padding='same')(conv2_4) conv1_5 = concatenate([up1_5, conv1_1, conv1_2, conv1_3, conv1_4], name='merge15', axis=3) conv1_5 = standard_unit(conv1_5, stage='15', nb_filter=nb_filter[0], kernel_size=kernel_size) nestnet_output_1 = Conv2D(num_class, (1, 1), activation='sigmoid', name='output_1', kernel_initializer='he_normal', padding='same', kernel_regularizer=l2(1e-4))(conv1_2) nestnet_output_2 = Conv2D(num_class, (1, 1), activation='sigmoid', name='output_2', kernel_initializer='he_normal', padding='same', kernel_regularizer=l2(1e-4))(conv1_3) nestnet_output_3 = Conv2D(num_class, (1, 1), activation='sigmoid', name='output_3', kernel_initializer='he_normal', padding='same', kernel_regularizer=l2(1e-4))(conv1_4) nestnet_output_4 = Conv2D(num_class, (1, 1), activation='sigmoid', name='output_4', kernel_initializer='he_normal', padding='same', kernel_regularizer=l2(1e-4))(conv1_5) crop1 = Cropping2D(padding_size)(nestnet_output_1) crop2 = Cropping2D(padding_size)(nestnet_output_2) crop3 = Cropping2D(padding_size)(nestnet_output_3) crop4 = Cropping2D(padding_size)(nestnet_output_4) if deep_supervision: model = Model(inputs=img_input, outputs=[crop1, crop2, crop3, crop4]) else: model = Model(inputs=img_input, outputs=[crop4]) return model
def build_model(input_shape=None): input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=24, data_format=K.image_data_format(), require_flatten=False, weights='None') img_input = Input(shape=input_shape) channel_axis = 3 reg = regularizers.l2(0.001) # layer 0 x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid', reg=reg) x = conv2d_bn(x, 32, 3, 3, padding='valid', reg=reg) x = conv2d_bn(x, 64, 3, 3, reg=reg) x = MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv2d_bn(x, 80, 1, 1, padding='valid', reg=reg) x = conv2d_bn(x, 192, 3, 3, padding='valid', reg=reg) x = MaxPooling2D((3, 3), strides=(2, 2))(x) # mixed 0, 1, 2: 35 x 35 x 256 branch1x1 = conv2d_bn(x, 64, 1, 1) branch5x5 = conv2d_bn(x, 48, 1, 1, reg=reg) branch5x5 = conv2d_bn(branch5x5, 64, 5, 5, reg=reg) branch3x3dbl = conv2d_bn(x, 64, 1, 1, reg=reg) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3, reg=reg) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3, reg=reg) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 32, 1, 1, reg=reg) x = layers.concatenate([branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed0') # mixed 2: 35 x 35 x 256 branch1x1 = conv2d_bn(x, 64, 1, 1, reg=reg) branch5x5 = conv2d_bn(x, 48, 1, 1, reg=reg) branch5x5 = conv2d_bn(branch5x5, 64, 5, 5, reg=reg) branch3x3dbl = conv2d_bn(x, 64, 1, 1, reg=reg) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3, reg=reg) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3, reg=reg) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 64, 1, 1, reg=reg) x = layers.concatenate([branch1x1, branch5x5, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed2') # mixed 5, 6: 17 x 17 x 768 for i in range(1): branch1x1 = conv2d_bn(x, 192, 1, 1, reg=reg) branch7x7 = conv2d_bn(x, 160, 1, 1, reg=reg) branch7x7 = conv2d_bn(branch7x7, 160, 1, 7, reg=reg) branch7x7 = conv2d_bn(branch7x7, 192, 7, 1, reg=reg) branch7x7dbl = conv2d_bn(x, 160, 1, 1, reg=reg) branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1, reg=reg) branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7, reg=reg) branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1, reg=reg) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7, reg=reg) branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1, reg=reg) x = layers.concatenate( [branch1x1, branch7x7, branch7x7dbl, branch_pool], axis=channel_axis, name='mixed' + str(5 + i)) x = GlobalMaxPooling2D()(x) x = Flatten()(x) x = Dense(384, activation='relu')(x) x = Dense(64, activation='relu')(x) x = Dense(num_classes, activation='softmax')(x) model = Model(img_input, x, name='micro_xception_v1') opt = optimizers.Adam(lr=0.001, decay=0.001) model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) return model
def U_Net(img_rows, img_cols, color_type=1, num_class=1): nb_filter = [8, 11, 16, 22, 32] # nb_filter = [8, 16, 32, 64, 128] pool_size = (1, 4) kernel_size = (1, 7) padding_size = ((0, 0), unet_padding_size(img_cols, pool_size[1])) img_input = Input(shape=(img_rows, img_cols, color_type), name='main_input') zpad = ZeroPadding2D(padding_size)(img_input) conv1_1 = standard_unit(zpad, stage='11', nb_filter=nb_filter[0], kernel_size=kernel_size) pool1 = MaxPooling2D(pool_size=pool_size, name='pool1')(conv1_1) conv2_1 = standard_unit(pool1, stage='21', nb_filter=nb_filter[1], kernel_size=kernel_size) pool2 = MaxPooling2D(pool_size=pool_size, name='pool2')(conv2_1) conv3_1 = standard_unit(pool2, stage='31', nb_filter=nb_filter[2], kernel_size=kernel_size) pool3 = MaxPooling2D(pool_size=pool_size, name='pool3')(conv3_1) conv4_1 = standard_unit(pool3, stage='41', nb_filter=nb_filter[3], kernel_size=kernel_size) pool4 = MaxPooling2D(pool_size=pool_size, name='pool4')(conv4_1) conv5_1 = standard_unit(pool4, stage='51', nb_filter=nb_filter[4], kernel_size=kernel_size) up4_2 = Conv2DTranspose(nb_filter[3], kernel_size, strides=pool_size, name='up42', padding='same')(conv5_1) conv4_2 = concatenate([up4_2, conv4_1], name='merge42', axis=3) conv4_2 = standard_unit(conv4_2, stage='42', nb_filter=nb_filter[3], kernel_size=kernel_size) up3_3 = Conv2DTranspose(nb_filter[2], kernel_size, strides=pool_size, name='up33', padding='same')(conv4_2) conv3_3 = concatenate([up3_3, conv3_1], name='merge33', axis=3) conv3_3 = standard_unit(conv3_3, stage='33', nb_filter=nb_filter[2], kernel_size=kernel_size) up2_4 = Conv2DTranspose(nb_filter[1], kernel_size, strides=pool_size, name='up24', padding='same')(conv3_3) conv2_4 = concatenate([up2_4, conv2_1], name='merge24', axis=3) conv2_4 = standard_unit(conv2_4, stage='24', nb_filter=nb_filter[1], kernel_size=kernel_size) up1_5 = Conv2DTranspose(nb_filter[0], kernel_size, strides=pool_size, name='up15', padding='same')(conv2_4) conv1_5 = concatenate([up1_5, conv1_1], name='merge15', axis=3) conv1_5 = standard_unit(conv1_5, stage='15', nb_filter=nb_filter[0], kernel_size=kernel_size) unet_output = Conv2D(num_class, (1, 1), activation='sigmoid', name='output', kernel_initializer='he_normal', padding='same', kernel_regularizer=l2(3e-4))(conv1_5) crop = Cropping2D(padding_size)(unet_output) model = Model(inputs=img_input, outputs=crop) return model
def FNN( feature_dim_dict, embedding_size=8, hidden_size=(128, 128), l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_deep=0, init_std=0.0001, seed=1024, keep_prob=1, activation='relu', final_activation='sigmoid', ): """Instantiates the Factorization-supported Neural Network architecture. :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :param embedding_size: positive integer,sparse feature embedding_size :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_linear: float. L2 regularizer strength applied to linear weight :param l2_reg_deep: float . L2 regularizer strength applied to deep net :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param keep_prob: float in (0,1]. keep_prob used in deep net :param activation: Activation function to use in deep net :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` :return: A Keras model instance. """ if not isinstance( feature_dim_dict, dict ) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict: raise ValueError( "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}" ) sparse_input, dense_input = get_input(feature_dim_dict, None) # sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size, # embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed), # embeddings_regularizer=l2( l2_reg_embedding),name='sparse_emb_' + str(i) + '-' + feat) for i, feat in # enumerate(feature_dim_dict["sparse"])] sparse_embedding, linear_embedding, = get_embeddings( feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding, l2_reg_linear) embed_list = [ sparse_embedding[i](sparse_input[i]) for i in range(len(feature_dim_dict["sparse"])) ] linear_term = [ linear_embedding[i](sparse_input[i]) for i in range(len(sparse_input)) ] if len(linear_term) > 1: linear_term = add(linear_term) elif len(linear_term) > 0: linear_term = linear_term[0] #linear_term = add([linear_embedding[i](sparse_input[i]) for i in range(len(feature_dim_dict["sparse"]))]) if len(dense_input) > 0: continuous_embedding_list = list( map( Dense( embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding), ), dense_input)) continuous_embedding_list = list( map(Reshape((1, embedding_size)), continuous_embedding_list)) embed_list += continuous_embedding_list dense_input_ = dense_input[0] if len( dense_input) == 1 else Concatenate()(dense_input) linear_dense_logit = Dense( 1, activation=None, use_bias=False, kernel_regularizer=l2(l2_reg_linear))(dense_input_) linear_term = add([linear_dense_logit, linear_term]) num_inputs = len(dense_input) + len(sparse_input) deep_input = Reshape([num_inputs * embedding_size ])(Concatenate()(embed_list)) deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, False, seed)(deep_input) deep_logit = Dense(1, use_bias=False, activation=None)(deep_out) final_logit = add([deep_logit, linear_term]) output = PredictionLayer(final_activation)(final_logit) model = Model(inputs=sparse_input + dense_input, outputs=output) return model
test_labels.append(int(content)) test_labels = np.array(test_labels) data = data.item() names = np.array(data['names']) smiles = np.array(data['onehots']).reshape(-1, 72, 398, 1) test_data = test_data.item() test_names = np.array(test_data['names']) test_smiles = np.array(test_data['onehots']).reshape(-1, 72, 398, 1) # print(smiles) model = Sequential() model.add(Conv2D(8, (2, 2), input_shape=smiles.shape[1:], kernel_regularizer=regularizers.l2(0.02))) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(8, (2, 2), kernel_regularizer=regularizers.l2(0.02))) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(8, (2, 2), kernel_regularizer=regularizers.l2(0.02))) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.5))
def create_embedding_dict(feature_dim_dict, embedding_size, init_std, seed, l2_reg, prefix='sparse', seq_mask_zero=True): if embedding_size == 'auto': print("Notice:Do not use auto embedding in models other than DCN") sparse_embedding = { feat.name: Embedding(feat.dimension, 6 * int(pow(feat.dimension, 0.25)), embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg), name=prefix + '_emb_' + str(i) + '-' + feat.name) for i, feat in enumerate(feature_dim_dict["sparse"]) } else: sparse_embedding = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg), name=prefix + '_emb_' + str(i) + '-' + feat.name) for i, feat in enumerate(feature_dim_dict["sparse"]) } if 'sequence' in feature_dim_dict: count = len(sparse_embedding) sequence_dim_list = feature_dim_dict['sequence'] for feat in sequence_dim_list: # if feat.name not in sparse_embedding: if embedding_size == "auto": sparse_embedding[feat.name] = Embedding( feat.dimension, 6 * int(pow(feat.dimension, 0.25)), embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg), name=prefix + '_emb_' + str(count) + '-' + feat.name, mask_zero=seq_mask_zero) else: sparse_embedding[feat.name] = Embedding( feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg), name=prefix + '_emb_' + str(count) + '-' + feat.name, mask_zero=seq_mask_zero) count += 1 return sparse_embedding
def build_vgg(img_rows: int = 224, img_cols: int = 224, num_classes: int = 1000): vgg = Sequential() vgg.add( Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005), input_shape=(img_rows, img_cols, 3))) vgg.add( Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005), input_shape=(img_rows, img_cols, 3))) vgg.add(MaxPooling2D()) # initial size /2 vgg.add( Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005))) vgg.add( Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005))) vgg.add(MaxPooling2D()) # initial size /4 vgg.add( Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005))) vgg.add( Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005))) vgg.add( Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005))) vgg.add( Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005))) vgg.add(MaxPooling2D()) # initial size /8 vgg.add( Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005))) vgg.add( Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005))) vgg.add( Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005))) vgg.add( Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005))) vgg.add(MaxPooling2D()) # initial size /16 vgg.add( Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005))) vgg.add( Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005))) vgg.add( Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005))) vgg.add( Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.0005))) vgg.add(MaxPooling2D()) # initial size /32 vgg.add(Flatten()) vgg.add(Dense(4096, activation='relu', kernel_regularizer=l2(0.0005))) vgg.add(Dense(4096, activation='relu', kernel_regularizer=l2(0.0005))) vgg.add(Dropout(0.5)) vgg.add( Dense(num_classes, activation='softmax', kernel_regularizer=l2(0.0005))) return vgg
def xDeepFM(feature_dim_dict, embedding_size=8, hidden_size=(256, 256), cin_layer_size=( 128, 128, ), cin_split_half=True, cin_activation='relu', l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_deep=0, init_std=0.0001, seed=1024, keep_prob=1, activation='relu', final_activation='sigmoid', use_bn=False): """Instantiates the xDeepFM architecture. :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :param embedding_size: positive integer,sparse feature embedding_size :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param cin_layer_size: list,list of positive integer or empty list, the feature maps in each hidden layer of Compressed Interaction Network :param cin_split_half: bool.if set to False, half of the feature maps in each hidden will connect to output unit :param cin_activation: activation function used on feature maps :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: L2 regularizer strength applied to embedding vector :param l2_reg_deep: L2 regularizer strength applied to deep net :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param keep_prob: float in (0,1]. keep_prob used in deep net :param activation: Activation function to use in deep net :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` :param use_bn: bool. Whether use BatchNormalization before activation or not.in deep net :return: A Keras model instance. """ if not isinstance( feature_dim_dict, dict ) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict: raise ValueError( "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}" ) sparse_input, dense_input = get_input(feature_dim_dict, None) sparse_embedding, linear_embedding, = get_share_embeddings( feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding, l2_reg_linear) embed_list = [ sparse_embedding[i](sparse_input[i]) for i in range(len(sparse_input)) ] linear_term = [ linear_embedding[i](sparse_input[i]) for i in range(len(sparse_input)) ] if len(linear_term) > 1: linear_term = add(linear_term) elif len(linear_term) > 0: linear_term = linear_term[0] else: linear_term = 0 if len(dense_input) > 0: continuous_embedding_list = list( map( Dense( embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding), ), dense_input)) continuous_embedding_list = list( map(Reshape((1, embedding_size)), continuous_embedding_list)) embed_list += continuous_embedding_list dense_input_ = dense_input[0] if len( dense_input) == 1 else Concatenate()(dense_input) linear_dense_logit = Dense( 1, activation=None, use_bias=False, kernel_regularizer=l2(l2_reg_linear))(dense_input_) linear_term = add([linear_dense_logit, linear_term]) linear_logit = linear_term fm_input = Concatenate( axis=1)(embed_list) if len(embed_list) > 1 else embed_list[0] if len(cin_layer_size) > 0: exFM_out = CIN(cin_layer_size, cin_activation, cin_split_half, seed)(fm_input) exFM_logit = Dense( 1, activation=None, )(exFM_out) deep_input = Flatten()(fm_input) deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, use_bn, seed)(deep_input) deep_logit = Dense(1, use_bias=False, activation=None)(deep_out) if len(hidden_size) == 0 and len(cin_layer_size) == 0: # only linear final_logit = linear_logit elif len(hidden_size) == 0 and len(cin_layer_size) > 0: # linear + CIN final_logit = add([linear_logit, exFM_logit]) elif len(hidden_size) > 0 and len(cin_layer_size) == 0: # linear + Deep final_logit = add([linear_logit, deep_logit]) elif len(hidden_size) > 0 and len( cin_layer_size) > 0: # linear + CIN + Deep final_logit = add([linear_logit, deep_logit, exFM_logit]) else: raise NotImplementedError output = PredictionLayer(final_activation)(final_logit) model = Model(inputs=sparse_input + dense_input, outputs=output) return model
def AFM( feature_dim_dict, embedding_size=8, use_attention=True, attention_factor=8, l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_att=1e-5, keep_prob=1.0, init_std=0.0001, seed=1024, final_activation='sigmoid', ): """Instantiates the Attentonal Factorization Machine architecture. :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :param embedding_size: positive integer,sparse feature embedding_size :param use_attention: bool,whether use attention or not,if set to ``False``.it is the same as **standard Factorization Machine** :param attention_factor: positive integer,units in attention net :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_att: float. L2 regularizer strength applied to attention net :param keep_prob: float in (0,1]. keep_prob after attention net :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` :return: A Keras model instance. """ if not isinstance( feature_dim_dict, dict ) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict: raise ValueError( "feature_dim_dict must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}" ) if not isinstance(feature_dim_dict["sparse"], dict): raise ValueError("feature_dim_dict['sparse'] must be a dict,cur is", type(feature_dim_dict['sparse'])) if not isinstance(feature_dim_dict["dense"], list): raise ValueError("feature_dim_dict['dense'] must be a list,cur is", type(feature_dim_dict['dense'])) sparse_input, dense_input = get_input(feature_dim_dict, None) sparse_embedding, linear_embedding, = get_share_embeddings( feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding, l2_reg_linear) embed_list = [ sparse_embedding[i](sparse_input[i]) for i in range(len(sparse_input)) ] linear_term = [ linear_embedding[i](sparse_input[i]) for i in range(len(sparse_input)) ] if len(linear_term) > 1: linear_term = add(linear_term) elif len(linear_term) > 0: linear_term = linear_term[0] if len(dense_input) > 0: continuous_embedding_list = list( map( Dense( embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding), ), dense_input)) continuous_embedding_list = list( map(Reshape((1, embedding_size)), continuous_embedding_list)) embed_list += continuous_embedding_list dense_input_ = dense_input[0] if len( dense_input) == 1 else Concatenate()(dense_input) linear_dense_logit = Dense( 1, activation=None, use_bias=False, kernel_regularizer=l2(l2_reg_linear))(dense_input_) linear_term = add([linear_dense_logit, linear_term]) fm_input = Concatenate(axis=1)(embed_list) if use_attention: fm_out = AFMLayer(attention_factor, l2_reg_att, keep_prob, seed)(embed_list) else: fm_out = FM()(fm_input) final_logit = add([linear_term, fm_out]) output = PredictionLayer(final_activation)(final_logit) model = Model(inputs=sparse_input + dense_input, outputs=output) return model
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): """A block that has a conv layer at shortcut. # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names strides: Strides for the second conv layer in the block. # Returns Output tensor for the block. Note that from stage 3, the second conv layer at main path is with strides=(2, 2) And the shortcut should have strides=(2, 2) as well """ filters1, filters2, filters3 = filters if backend.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = layers.Conv2D(filters1, (1, 1), use_bias=False, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2a')(input_tensor) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2a')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(filters2, kernel_size, strides=strides, padding='same', use_bias=False, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2b')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2b')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(filters3, (1, 1), use_bias=False, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2c')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2c')(x) shortcut = layers.Conv2D(filters3, (1, 1), strides=strides, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '1')(input_tensor) shortcut = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '1')(shortcut) x = layers.add([x, shortcut]) x = layers.Activation('relu')(x) return x
def __init__(self, num_classes=10, dtype="float32", batch_size=None): super(CustomModel, self).__init__(name="resnet50") if backend.image_data_format() == "channels_first": self._lambda = layers.Lambda( lambda x: backend.permute_dimensions(x, (0, 3, 1, 2)), name="transpose", ) bn_axis = 1 data_format = "channels_first" else: bn_axis = 3 data_format = "channels_last" self._padding = layers.ZeroPadding2D(padding=(3, 3), data_format=data_format, name="zero_pad") self._conv2d_1 = layers.Conv2D( 64, (7, 7), strides=(2, 2), padding="valid", use_bias=False, kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name="conv1", ) self._bn_1 = layers.BatchNormalization( axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name="bn_conv1", ) self._activation_1 = layers.Activation("relu") self._maxpooling2d = layers.MaxPooling2D((3, 3), strides=(2, 2), padding="same") self._conv_block_1 = ConvBlock(3, [64, 64, 256], stage=2, block="a", strides=(1, 1)) self._identity_block_1 = IdentityBlock(3, [64, 64, 256], stage=2, block="b") self._identity_block_2 = IdentityBlock(3, [64, 64, 256], stage=2, block="c") self._conv_block_2 = ConvBlock(3, [128, 128, 512], stage=3, block="a") self._identity_block_3 = IdentityBlock(3, [128, 128, 512], stage=3, block="b") self._identity_block_4 = IdentityBlock(3, [128, 128, 512], stage=3, block="c") self._identity_block_5 = IdentityBlock(3, [128, 128, 512], stage=3, block="d") self._conv_block_3 = ConvBlock(3, [256, 256, 1024], stage=4, block="a") self._identity_block_6 = IdentityBlock(3, [256, 256, 1024], stage=4, block="b") self._identity_block_7 = IdentityBlock(3, [256, 256, 1024], stage=4, block="c") self._identity_block_8 = IdentityBlock(3, [256, 256, 1024], stage=4, block="d") self._identity_block_9 = IdentityBlock(3, [256, 256, 1024], stage=4, block="e") self._identity_block_10 = IdentityBlock(3, [256, 256, 1024], stage=4, block="f") self._conv_block_4 = ConvBlock(3, [512, 512, 2048], stage=5, block="a") self._identity_block_11 = IdentityBlock(3, [512, 512, 2048], stage=5, block="b") self._identity_block_12 = IdentityBlock(3, [512, 512, 2048], stage=5, block="c") rm_axes = ([1, 2] if backend.image_data_format() == "channels_last" else [2, 3]) self._lamba_2 = layers.Lambda(lambda x: backend.mean(x, rm_axes), name="reduce_mean") self._dense = layers.Dense( num_classes, kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name="fc1000", ) self._activation_2 = layers.Activation("softmax")
train_x = input_x[2*constants.DATA_A_DAY:, :] train_y = input_y[2*constants.DATA_A_DAY:] n_samples = len(train_y) valid_x = np.reshape(input_x[-6*constants.DATA_A_DAY:-5*constants.DATA_A_DAY, :], (int(1*constants.DATA_A_DAY / n_outputs), sum(constants.FEAT_LEN) * n_outputs)) valid_y = np.reshape(input_y[-6*constants.DATA_A_DAY:-5*constants.DATA_A_DAY], (int(1*constants.DATA_A_DAY / n_outputs), n_outputs)) else: pred_x = readTestData(test_file) mlp = Sequential() mlp.add(InputLayer(input_shape=(n_inputs * n_outputs,))) for n in n_mlp: mlp.add(Dense(units=n, activation=tf.nn.relu, kernel_regularizer=l2(l2_param), bias_regularizer=l2(l2_param))) mlp.add(Dense(units=n_outputs, activation=tf.nn.relu, kernel_regularizer=l2(l2_param), bias_regularizer=l2(l2_param))) def validate(mlp): pred_y = mlp.predict(valid_x) mae_ignore_type = np.mean(np.abs(flowIgnoreType(np.reshape(pred_y, (-1,1))) - flowIgnoreType(np.reshape(valid_y, (-1,1))))) print(mae_ignore_type) return mae_ignore_type def predict(mlp): input_x = nextBatch(pred_x, n_outputs) y_pred = mlp.predict(input_x)
def DIN(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=16, use_din=True, use_bn=False, hidden_size=(200, 80), activation='relu', att_hidden_size=(80, 40), att_activation=Dice, att_weight_normalization=False, l2_reg_deep=0, l2_reg_embedding=1e-5, final_activation='sigmoid', keep_prob=1, init_std=0.0001, seed=1024, ): """Instantiates the Deep Interest Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param use_din: bool, whether use din pooling or not.If set to ``False``,use **sum pooling** :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param activation: Activation function to use in deep net :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_deep: float. L2 regularizer strength applied to deep net :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` :param keep_prob: float in (0,1]. keep_prob used in deep net :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :return: A Keras model instance. """ for feature_dim_dict in [feature_dim_dict]: if not isinstance(feature_dim_dict, dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict: raise ValueError( "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}") if len(feature_dim_dict['dense']) > 0: raise ValueError('Now DIN only support sparse input') sparse_input, user_behavior_input, user_behavior_length = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = {feat: Embedding(feature_dim_dict["sparse"][feat], embedding_size, embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2( l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat) for i, feat in enumerate(feature_dim_dict["sparse"])} query_emb_list = [sparse_embedding_dict[feat]( sparse_input[feat]) for feat in seq_feature_list] keys_emb_list = [sparse_embedding_dict[feat]( user_behavior_input[feat]) for feat in seq_feature_list] deep_input_emb_list = [sparse_embedding_dict[feat]( sparse_input[feat]) for feat in feature_dim_dict["sparse"]] query_emb = concat_fun(query_emb_list) keys_emb = concat_fun(keys_emb_list) deep_input_emb = concat_fun(deep_input_emb_list) if use_din: hist = AttentionSequencePoolingLayer(att_hidden_size, att_activation, weight_normalization=att_weight_normalization)([ query_emb, keys_emb, user_behavior_length]) else: hist = SequencePoolingLayer(hist_len_max, 'sum')( [keys_emb, user_behavior_length]) deep_input_emb = Concatenate()([deep_input_emb, hist]) output = MLP(hidden_size, activation, l2_reg_deep, keep_prob, use_bn, seed)(deep_input_emb) output = Dense(1, final_activation)(output) output = Reshape([1])(output) model_input_list = list(sparse_input.values( ))+list(user_behavior_input.values()) + [user_behavior_length] model = Model(inputs=model_input_list, outputs=output) return model