def build_model(self, input_shape): hidden_dim = self.hidden_dim output_dim = self.output_dim ''' Input :- This returns a tensor. input_shape = (number_of_times_unfolded,dimension_of_each_ouptu) ''' x = Input(batch_shape=input_shape) h_tm1 = Input(batch_shape=(input_shape[0], hidden_dim)) c_tm1 = Input(batch_shape=(input_shape[0], hidden_dim)) W1 = Dense(hidden_dim * 4, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer, use_bias=False) W2 = Dense(output_dim, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer,) U = Dense(hidden_dim * 4, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer,) z = add([W1(x), U(h_tm1)]) z0, z1, z2, z3 = get_slices(z, 4) i = Activation(self.recurrent_activation)(z0) f = Activation(self.recurrent_activation)(z1) c = add([multiply([f, c_tm1]), multiply([i, Activation(self.activation)(z2)])]) o = Activation(self.recurrent_activation)(z3) h = multiply([o, Activation(self.activation)(c)]) y = Activation(self.activation)(W2(h)) return Model([x, h_tm1, c_tm1], [y, h, c]) #h_tm1 --> h(t-1) i.e h of previous timestep.
def build_generator(self): model = Sequential() model.add(Dense(128 * 7 * 7, activation="relu", input_dim=100)) model.add(Reshape((7, 7, 128))) model.add(BatchNormalization(momentum=0.8)) model.add(UpSampling2D()) model.add(Conv2D(128, kernel_size=3, padding="same")) model.add(Activation("relu")) model.add(BatchNormalization(momentum=0.8)) model.add(UpSampling2D()) model.add(Conv2D(64, kernel_size=3, padding="same")) model.add(Activation("relu")) model.add(BatchNormalization(momentum=0.8)) model.add(Conv2D(self.channels, kernel_size=3, padding='same')) model.add(Activation("tanh")) model.summary() noise = Input(shape=(100,)) label = Input(shape=(1,), dtype='int32') label_embedding = Flatten()(Embedding(self.num_classes, 100)(label)) input = multiply([noise, label_embedding]) img = model(input) return Model([noise, label], img)
def build_discriminator(self): model = Sequential() model.add(Dense(512, input_dim=np.prod(self.img_shape))) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(512)) model.add(LeakyReLU(alpha=0.2)) model.add(Dropout(0.4)) model.add(Dense(512)) model.add(LeakyReLU(alpha=0.2)) model.add(Dropout(0.4)) model.add(Dense(1, activation='sigmoid')) model.summary() img = Input(shape=self.img_shape) label = Input(shape=(1,), dtype='int32') label_embedding = Flatten()(Embedding(self.num_classes, np.prod(self.img_shape))(label)) flat_img = Flatten()(img) model_input = multiply([flat_img, label_embedding]) validity = model(model_input) return Model([img, label], validity)
def build_generator(self): model = Sequential() model.add(Dense(256, input_dim=self.latent_dim)) model.add(LeakyReLU(alpha=0.2)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(512)) model.add(LeakyReLU(alpha=0.2)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(1024)) model.add(LeakyReLU(alpha=0.2)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(np.prod(self.img_shape), activation='tanh')) model.add(Reshape(self.img_shape)) model.summary() noise = Input(shape=(self.latent_dim,)) label = Input(shape=(1,), dtype='int32') label_embedding = Flatten()(Embedding(self.num_classes, self.latent_dim)(label)) model_input = multiply([noise, label_embedding]) img = model(model_input) return Model([noise, label], img)
def build_model(len_words, embedding_matrix): f_input=Input(shape=(maxlen_words,)) f_emb=Embedding(output_dim=vec_size, input_dim=len_words+1, input_length=maxlen_words, mask_zero=True, weights=[embedding_matrix], trainable=False)(f_input) f_full=Dense(vec_size,activation='relu')(f_emb) f_layer=LSTM(128)(f_full) r_input=Input(shape=(maxlen_words,)) r_emb=Embedding(output_dim=vec_size, input_dim=len_words+1, input_length=maxlen_words, mask_zero=True, weights=[embedding_matrix], trainable=False)(r_input) r_full=Dense(vec_size,activation='relu')(r_emb) r_layer=LSTM(128)(r_full) merged_layer=multiply([f_layer, r_layer]) out_full=Dense(vec_size,activation='relu')(merged_layer) out_layer=Dense(vec_size,activation='relu')(out_full) my_model=Model([f_input, r_input], out_layer) optimizer = RMSprop() my_model.compile(loss='mean_squared_error', optimizer=optimizer) return my_model
def squeeze_excite_block(input, ratio=16): init = input channel_axis = 1 if K.image_data_format() == "channels_first" else -1 # compute channel axis filters = init._keras_shape[channel_axis] # infer input number of filters se_shape = (1, 1, filters) if K.image_data_format() == 'channels_last' else (filters, 1, 1) # determine Dense matrix shape se = GlobalAveragePooling2D()(init) se = Reshape(se_shape)(se) se = Dense(filters // ratio, activation='relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay), use_bias=False)(se) se = Dense(filters, activation='sigmoid', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(weight_decay), use_bias=False)(se) x = multiply([init, se]) return x
def eltwise(layer, layer_in, layerId): out = {} if (layer['params']['layer_type'] == 'Multiply'): # This input reverse is to handle visualization out[layerId] = multiply(layer_in[::-1]) elif (layer['params']['layer_type'] == 'Sum'): out[layerId] = add(layer_in[::-1]) elif (layer['params']['layer_type'] == 'Average'): out[layerId] = average(layer_in[::-1]) elif (layer['params']['layer_type'] == 'Dot'): out[layerId] = dot(layer_in[::-1], -1) else: out[layerId] = maximum(layer_in[::-1]) return out
def test_merge_multiply(): i1 = layers.Input(shape=(4, 5)) i2 = layers.Input(shape=(4, 5)) i3 = layers.Input(shape=(4, 5)) o = layers.multiply([i1, i2, i3]) assert o._keras_shape == (None, 4, 5) model = models.Model([i1, i2, i3], o) x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) x3 = np.random.random((2, 4, 5)) out = model.predict([x1, x2, x3]) assert out.shape == (2, 4, 5) assert_allclose(out, x1 * x2 * x3, atol=1e-4)
def build_generator(latent_size): # we will map a pair of (z, L), where z is a latent vector and L is a # label drawn from P_c, to image space (..., 1, 28, 28) cnn = Sequential() cnn.add(Dense(1024, input_dim=latent_size, activation='relu')) cnn.add(Dense(128 * 7 * 7, activation='relu')) cnn.add(Reshape((128, 7, 7))) # upsample to (..., 14, 14) cnn.add(UpSampling2D(size=(2, 2))) cnn.add(Conv2D(256, 5, padding='same', activation='relu', kernel_initializer='glorot_normal')) # upsample to (..., 28, 28) cnn.add(UpSampling2D(size=(2, 2))) cnn.add(Conv2D(128, 5, padding='same', activation='relu', kernel_initializer='glorot_normal')) # take a channel axis reduction cnn.add(Conv2D(1, 2, padding='same', activation='tanh', kernel_initializer='glorot_normal')) # this is the z space commonly refered to in GAN papers latent = Input(shape=(latent_size, )) # this will be our label image_class = Input(shape=(1,), dtype='int32') # 10 classes in MNIST cls = Flatten()(Embedding(10, latent_size, embeddings_initializer='glorot_normal')(image_class)) # hadamard product between z-space and a class conditional embedding h = layers.multiply([latent, cls]) fake_image = cnn(h) return Model([latent, image_class], fake_image)
def senet_se_block(input_tensor, stage, block, compress_rate=16, bias=False): conv1_down_name = 'conv' + str(stage) + "_" + str( block) + "_1x1_down" conv1_up_name = 'conv' + str(stage) + "_" + str( block) + "_1x1_up" num_channels = int(input_tensor.shape[-1]) bottle_neck = int(num_channels // compress_rate) se = GlobalAveragePooling2D()(input_tensor) se = Reshape((1, 1, num_channels))(se) se = Conv2D(bottle_neck, (1, 1), use_bias=bias, name=conv1_down_name)(se) se = Activation('relu')(se) se = Conv2D(num_channels, (1, 1), use_bias=bias, name=conv1_up_name)(se) se = Activation('sigmoid')(se) x = input_tensor x = multiply([x, se]) return x
def build_generator(latent_size): # we will map a pair of (z, L), where z is a latent vector and L is a # label drawn from P_c, to image space (..., 28, 28, 1) cnn = Sequential() cnn.add(Dense(3 * 3 * 384, input_dim=latent_size, activation='relu')) cnn.add(Reshape((3, 3, 384))) # upsample to (7, 7, ...) cnn.add(Conv2DTranspose(192, 5, strides=1, padding='valid', activation='relu', kernel_initializer='glorot_normal')) cnn.add(BatchNormalization()) # upsample to (14, 14, ...) cnn.add(Conv2DTranspose(96, 5, strides=2, padding='same', activation='relu', kernel_initializer='glorot_normal')) cnn.add(BatchNormalization()) # upsample to (28, 28, ...) cnn.add(Conv2DTranspose(1, 5, strides=2, padding='same', activation='tanh', kernel_initializer='glorot_normal')) # this is the z space commonly referred to in GAN papers latent = Input(shape=(latent_size, )) # this will be our label image_class = Input(shape=(1,), dtype='int32') cls = Flatten()(Embedding(num_classes, latent_size, embeddings_initializer='glorot_normal')(image_class)) # hadamard product between z-space and a class conditional embedding h = layers.multiply([latent, cls]) fake_image = cnn(h) return Model([latent, image_class], fake_image)
img_input = Input(shape=input_shape) # We push the 'where' masks to the following list wheres = [None] * nlayers y = img_input for i in range(nlayers): y_prepool = convresblock(y, nfeats=nfeats_all[i + 1], ksize=ksize) y = MaxPooling2D(pool_size=(pool_sizes[i], pool_sizes[i]))(y_prepool) wheres[i] = layers.Lambda( getwhere, output_shape=lambda x: x[0])([y_prepool, y]) # Now build the decoder, and use the stored 'where' masks to place the features for i in range(nlayers): ind = nlayers - 1 - i y = UpSampling2D(size=(pool_sizes[ind], pool_sizes[ind]))(y) y = layers.multiply([y, wheres[ind]]) y = convresblock(y, nfeats=nfeats_all[ind], ksize=ksize) # Use hard_simgoid to clip range of reconstruction y = Activation('hard_sigmoid')(y) # Define the model and it's mean square error loss, and compile it with Adam model = Model(img_input, y) model.compile('adam', 'mse') # Fit the model model.fit(x_train, x_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, x_test))
def model(): input_1 = Input(shape=(7, 7, 176)) input_2 = Input(shape=(27, 27, 30)) CAB_conv1 = Conv2D( 16, (3, 3), padding='same', strides=(1, 1), kernel_initializer='glorot_uniform', kernel_regularizer=l2(1e-4), # use_bias=False )(input_1) CAB_bn1 = BatchNormalization()(CAB_conv1) CAB_relu1 = PReLU()(CAB_bn1) CAB_avg_pool1 = AveragePooling2D()(CAB_relu1) CAB_conv4 = Conv2D( 32, (3, 3), padding='same', strides=(1, 1), kernel_initializer='glorot_uniform', kernel_regularizer=l2(1e-4), # use_bias=False )(CAB_avg_pool1) CAB_bn4 = BatchNormalization()(CAB_conv4) CAB_relu4 = PReLU()(CAB_bn4) CAB_conv5 = Conv2D( 32, (3, 3), padding='same', strides=(1, 1), kernel_initializer='glorot_uniform', kernel_regularizer=l2(1e-4), # use_bias=False )(CAB_relu4) CAB_bn5 = BatchNormalization()(CAB_conv5) CAB_relu5 = PReLU()(CAB_bn5) CAB_global_pool = GlobalAveragePooling2D()(CAB_relu5) # =================================================================================================================== CAB_reshape = Reshape( (1, CAB_global_pool._keras_shape[1]))(CAB_global_pool) CAB_conv6 = Conv1D(44, (32), padding='same', strides=(1), kernel_initializer='glorot_uniform', use_bias=False)(CAB_reshape) CAB_bn6 = BatchNormalization()(CAB_conv6) CAB_relu6 = PReLU()(CAB_bn6) CAB_conv7 = Conv1D(176, (44), padding='same', strides=(1), kernel_initializer='glorot_uniform', use_bias=False)(CAB_relu6) CAB_bn7 = BatchNormalization()(CAB_conv7) CAB_sigmoid = Activation('sigmoid')(CAB_bn7) # ================================================================================================================== CAB_mul = multiply([input_1, CAB_sigmoid]) input_spe = Reshape((CAB_mul._keras_shape[1], CAB_mul._keras_shape[2], CAB_mul._keras_shape[3], 1))(CAB_mul) # input_spe = Reshape((input_1._keras_shape[1], input_1._keras_shape[2], input_1._keras_shape[3], 1))(input_1) conv_spe1 = Conv3D(32, (1, 1, 7), padding='valid', strides=(1, 1, 2))(input_spe) print('conv_spe shape:', conv_spe1.shape) bn_spe1 = BatchNormalization()(conv_spe1) relu_spe1 = PReLU()(bn_spe1) conv_spe11 = Conv3D(num_filters_spe, (1, 1, 7), padding='same', strides=(1, 1, 1))(relu_spe1) bn_spe11 = BatchNormalization()(conv_spe11) relu_spe11 = PReLU()(bn_spe11) blockconv_spe1 = Conv3D(num_filters_spe, (1, 1, 7), padding='same', strides=(1, 1, 1))(relu_spe11) print('blockconv_spe1:', blockconv_spe1.shape) blockbn_spe1 = BatchNormalization()(blockconv_spe1) blockrelu_spe1 = PReLU()(blockbn_spe1) conv_spe2 = Conv3D(num_filters_spe, (1, 1, 7), padding='same', strides=(1, 1, 1))(blockrelu_spe1) print('conv_spe2 shape:', conv_spe2.shape) add_spe1 = add([relu_spe11, conv_spe2]) bn_spe2 = BatchNormalization()(add_spe1) relu_spe2 = PReLU()(bn_spe2) blockconv_spe2 = Conv3D(num_filters_spe, (1, 1, 7), padding='same', strides=(1, 1, 1))(relu_spe2) print('blockconv_spe2 shape:', blockconv_spe2.shape) blockbn_spe2 = BatchNormalization()(blockconv_spe2) blockrelu_spe2 = PReLU()(blockbn_spe2) conv_spe4 = Conv3D(num_filters_spe, (1, 1, 7), padding='same', strides=(1, 1, 1))(blockrelu_spe2) print('conv_spe_4 shape:', conv_spe4.shape) add_spe2 = add([relu_spe2, conv_spe4]) bn_spe4 = BatchNormalization()(add_spe2) relu_spe4 = PReLU()(bn_spe4) blockconv_spe3 = Conv3D(num_filters_spe, (1, 1, 7), padding='same', strides=(1, 1, 1))(relu_spe4) blockbn_spe3 = BatchNormalization()(blockconv_spe3) blockrelu_spe3 = PReLU()(blockbn_spe3) conv_spe41 = Conv3D(num_filters_spe, (1, 1, 7), padding='same', strides=(1, 1, 1))(blockrelu_spe3) add_spe3 = add([relu_spe4, conv_spe41]) # =================================================================================================== bn_spe41 = BatchNormalization()(add_spe3) relu_spe41 = PReLU()(bn_spe41) add_all_spe = add([relu_spe2, relu_spe4, relu_spe41]) conv_spe6 = Conv3D(8, (1, 1, 85), padding='valid', strides=(1, 1, 1))(add_all_spe) print('conv_spe_3 shape:', conv_spe6.shape) bn_spe6 = BatchNormalization()(conv_spe6) relu_spe6 = PReLU()(bn_spe6) input_spa = Reshape((input_2._keras_shape[1], input_2._keras_shape[2], input_2._keras_shape[3], 1))(input_2) conv_spa1 = Conv3D(16, (5, 5, 30), padding='valid', strides=(1, 1, 1))(input_spa) print('conv_spa1 shape:', conv_spa1.shape) bn_spa1 = BatchNormalization()(conv_spa1) relu_spa1 = PReLU()(bn_spa1) reshape_spa1 = Reshape( (relu_spa1._keras_shape[1], relu_spa1._keras_shape[2], relu_spa1._keras_shape[4], relu_spa1._keras_shape[3]))(relu_spa1) conv_spa11 = Conv3D(num_filters_spa, (3, 3, 1), padding='same', strides=(1, 1, 1))(reshape_spa1) bn_spa11 = BatchNormalization()(conv_spa11) relu_spa11 = PReLU()(bn_spa11) VIS_conv1 = Conv3D(16, (1, 1, 16), padding='valid', strides=(1, 1, 1), kernel_initializer='glorot_uniform', kernel_regularizer=l2(1e-4))(relu_spa11) VIS_BN1 = BatchNormalization()(VIS_conv1) VIS_relu1 = Activation('relu')(VIS_BN1) VIS_SHAPE1 = Reshape( (VIS_relu1._keras_shape[1] * VIS_relu1._keras_shape[2], VIS_relu1._keras_shape[4]))(VIS_relu1) VIS_conv2 = Conv3D(16, (1, 1, 16), padding='valid', strides=(1, 1, 1), kernel_initializer='glorot_uniform', kernel_regularizer=l2(1e-4))(relu_spa11) VIS_BN2 = BatchNormalization()(VIS_conv2) VIS_relu2 = Activation('relu')(VIS_BN2) VIS_SHAPE2 = Reshape( (VIS_relu2._keras_shape[1] * VIS_relu2._keras_shape[2], VIS_relu2._keras_shape[4]))(VIS_relu2) trans_VIS_SHAPE2 = Permute((2, 1))(VIS_SHAPE2) VIS_conv3 = Conv3D(16, (1, 1, 16), padding='valid', strides=(1, 1, 1), kernel_initializer='glorot_uniform', kernel_regularizer=l2(1e-4))(relu_spa11) VIS_BN3 = BatchNormalization()(VIS_conv3) VIS_relu3 = Activation('relu')(VIS_BN3) VIS_SHAPE3 = Reshape( (VIS_relu3._keras_shape[1] * VIS_relu3._keras_shape[2], VIS_relu3._keras_shape[4]))(VIS_relu3) VIS_mul1 = dot([VIS_SHAPE1, trans_VIS_SHAPE2], axes=(2, 1)) VIS_sigmoid = Activation('sigmoid')(VIS_mul1) VIS_mul2 = dot([VIS_sigmoid, VIS_SHAPE3], axes=(2, 1)) VIS_SHAPEall = Reshape((23, 23, 16, 1))(VIS_mul2) VIS_conv4 = Conv3D(16, (16, 1, 1), padding='same', strides=(1), kernel_initializer='glorot_uniform', kernel_regularizer=l2(1e-4))(VIS_SHAPEall) VIS_BN4 = BatchNormalization()(VIS_conv4) VIS_ADD = add([relu_spa11, VIS_BN4]) blockconv_spa1 = Conv3D(num_filters_spa, (3, 3, 1), padding='same', strides=(1, 1, 1))(VIS_ADD) blockbn_spa1 = BatchNormalization()(blockconv_spa1) blockrelu_spa1 = PReLU()(blockbn_spa1) conv_spa2 = Conv3D(num_filters_spa, (3, 3, 1), padding='same', strides=(1))(blockrelu_spa1) print('conv_spa_2 shape:', conv_spa2.shape) add_spa1 = add([VIS_ADD, conv_spa2]) bn_spa2 = BatchNormalization()(add_spa1) relu_spa2 = PReLU()(bn_spa2) blockconv_spa2 = Conv3D(num_filters_spa, (3, 3, 1), padding='same', strides=(1))(relu_spa2) print('blockconv_spa12', blockconv_spa2.shape) blockbn_spa2 = BatchNormalization()(blockconv_spa2) blockrelu_spa2 = PReLU()(blockbn_spa2) conv_spa4 = Conv3D(num_filters_spa, (3, 3, 1), padding='same', strides=(1))(blockrelu_spa2) print('conv_spa4 shape:', conv_spa4.shape) add_spa2 = add([relu_spa2, conv_spa4]) bn_spa4 = BatchNormalization()(add_spa2) relu_spa4 = PReLU()(bn_spa4) blockconv_spa3 = Conv3D(num_filters_spa, (3, 3, 1), padding='same', strides=(1))(relu_spa4) blockbn_spa3 = BatchNormalization()(blockconv_spa3) blockrelu_spa3 = PReLU()(blockbn_spa3) conv_spa41 = Conv3D(num_filters_spa, (3, 3, 1), padding='same', strides=(1))(blockrelu_spa3) add_spa3 = add([relu_spa4, conv_spa41]) bn_spa41 = BatchNormalization()(add_spa3) relu_spa41 = PReLU()(bn_spa41) add_all_spa = add([relu_spa2, relu_spa4, relu_spa41]) conv_spa6 = Conv3D(num_filters_spa, (5, 5, 1), padding='valid', strides=(1, 1, 1))(add_all_spa) bn_spa6 = BatchNormalization()(conv_spa6) relu_spa6 = PReLU()(bn_spa6) conv_spa7 = Conv3D(num_filters_spa, (5, 5, 1), padding='valid', strides=(1, 1, 1))(relu_spa6) bn_spa7 = BatchNormalization()(conv_spa7) relu_spa7 = PReLU()(bn_spa7) conv_spa8 = Conv3D(num_filters_spa, (5, 5, 1), padding='valid', strides=(1, 1, 1))(relu_spa7) bn_spa8 = BatchNormalization()(conv_spa8) relu_spa8 = PReLU()(bn_spa8) conv_spa81 = Conv3D(num_filters_spa, (5, 5, 1), padding='valid', strides=(1, 1, 1))(relu_spa8) bn_spa81 = BatchNormalization()(conv_spa81) relu_spa81 = PReLU()(bn_spa81) conv_spa9 = Conv3D(8, (1, 1, 16), padding='valid', strides=(1, 1, 1))(relu_spa81) bn_spa9 = BatchNormalization()(conv_spa9) relu_spa9 = PReLU()(bn_spa9) feature_fusion = concatenate([relu_spe6, relu_spa9]) reshape_all = Reshape( (feature_fusion._keras_shape[1], feature_fusion._keras_shape[2], feature_fusion._keras_shape[4], feature_fusion._keras_shape[3]))(feature_fusion) conv_all1 = Conv3D(16, (3), padding='same', strides=(1, 1, 1))(reshape_all) print('convall1 shape:', conv_all1.shape) bn_all1 = BatchNormalization()(conv_all1) relu_all1 = PReLU()(bn_all1) VIS_conv11 = Conv3D(16, (1, 1, 16), padding='valid', strides=(1, 1, 1), kernel_initializer='glorot_uniform', kernel_regularizer=l2(1e-4))(relu_all1) VIS_BN11 = BatchNormalization()(VIS_conv11) VIS_relu11 = Activation('relu')(VIS_BN11) VIS_SHAPE11 = Reshape( (VIS_relu11._keras_shape[1] * VIS_relu11._keras_shape[2], VIS_relu11._keras_shape[4]))(VIS_relu11) VIS_conv21 = Conv3D(16, (1, 1, 16), padding='valid', strides=(1, 1, 1), kernel_initializer='glorot_uniform', kernel_regularizer=l2(1e-4))(relu_all1) VIS_BN21 = BatchNormalization()(VIS_conv21) VIS_relu21 = Activation('relu')(VIS_BN21) VIS_SHAPE21 = Reshape( (VIS_relu21._keras_shape[1] * VIS_relu21._keras_shape[2], VIS_relu21._keras_shape[4]))(VIS_relu21) trans_VIS_SHAPE21 = Permute((2, 1))(VIS_SHAPE21) VIS_conv31 = Conv3D(16, (1, 1, 16), padding='valid', strides=(1, 1, 1), kernel_initializer='glorot_uniform', kernel_regularizer=l2(1e-4))(relu_all1) VIS_BN31 = BatchNormalization()(VIS_conv31) VIS_relu31 = Activation('relu')(VIS_BN31) VIS_SHAPE31 = Reshape( (VIS_relu31._keras_shape[1] * VIS_relu31._keras_shape[2], VIS_relu31._keras_shape[4]))(VIS_relu31) VIS_mul11 = dot([VIS_SHAPE11, trans_VIS_SHAPE21], axes=(2, 1)) VIS_sigmoid1 = Activation('sigmoid')(VIS_mul11) VIS_mul21 = dot([VIS_sigmoid1, VIS_SHAPE31], axes=(2, 1)) VIS_SHAPEall1 = Reshape((7, 7, 16, 1))(VIS_mul21) VIS_conv41 = Conv3D(16, (16, 1, 1), padding='same', strides=(1), kernel_initializer='glorot_uniform', kernel_regularizer=l2(1e-4))(VIS_SHAPEall1) VIS_BN41 = BatchNormalization()(VIS_conv41) VIS_ADD1 = add([relu_all1, VIS_BN41]) conv_all2 = Conv3D(16, (3), padding='valid', strides=(1, 1, 1))(VIS_ADD1) bn_all2 = BatchNormalization()(conv_all2) relu_all2 = PReLU()(bn_all2) flatten = Flatten()(relu_all2) dense = Dense(units=512, activation="relu", kernel_initializer="he_normal")(flatten) drop = Dropout(0.6)(dense) dense_2 = Dense(units=256, activation="relu", kernel_initializer="he_normal")(drop) drop1 = Dropout(0.6)(dense_2) dense_3 = Dense(units=nb_classes, activation="softmax", kernel_initializer="he_normal")(drop1) model = Model(inputs=[input_1, input_2], outputs=dense_3) sgd = SGD(lr=0.0005, momentum=0.9) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) model.summary() return model
def initialize_model(self): self.model_name = 'siamese_attention_dnn' lstm_network = Sequential(layers=[ Embedding(self.word_embedding.vocabulary_size, self.word_embedding.dimensions, weights=[self.word_embedding.embedding_matrix], trainable=True, mask_zero=False), BatchNormalization(), Bidirectional(LSTM(256, return_sequences=True)), ]) question1_input = Input(shape=(self.qqp_df.seq_len,), name='question1_input') question2_input = Input(shape=(self.qqp_df.seq_len,), name='question2_input') question1_lstm = lstm_network(question1_input) question2_lstm = lstm_network(question2_input) # Attention q1_aligned, q2_aligned = soft_attention_alignment(question1_lstm, question2_lstm) # Compose q1q2_sub = subtract([question1_lstm, q2_aligned]) q1q2_mult = multiply([question1_lstm, q2_aligned]) q1q2_submult = Concatenate()([q1q2_sub, q1q2_mult]) q2q1_sub = subtract([question2_lstm, q1_aligned]) q2q1_mult = multiply([question2_lstm, q1_aligned]) q2q1_submult = Concatenate()([q2q1_sub, q2q1_mult]) q1q2_combined = Concatenate()([question1_lstm, q2_aligned, q1q2_submult]) q2q1_combined = Concatenate()([question2_lstm, q1_aligned, q2q1_submult]) compose = Bidirectional(LSTM(256, return_sequences=True)) q1q2_compare = compose(q1q2_combined) q2q1_compare = compose(q2q1_combined) # Aggregate q1q2_avg_pool = GlobalAvgPool1D()(q1q2_compare) q1q2_max_pool = GlobalMaxPool1D()(q1q2_compare) q1q2_concat = Concatenate()([q1q2_avg_pool, q1q2_max_pool]) q2q1_avg_pool = GlobalAvgPool1D()(q2q1_compare) q2q1_max_pool = GlobalMaxPool1D()(q2q1_compare) q2q1_concat = Concatenate()([q2q1_avg_pool, q2q1_max_pool]) # Classifier merged = Concatenate()([q1q2_concat, q2q1_concat]) dense = BatchNormalization()(merged) dense = Dense(256, activation='relu')(dense) dense = BatchNormalization()(dense) dense = Dropout(0.4)(dense) dense = Dense(128, activation='relu')(dense) dense = BatchNormalization()(dense) dense = Dropout(0.4)(dense) out = Dense(1, activation='sigmoid')(dense) self.model = Model(inputs=[question1_input, question2_input], outputs=out) self.model.compile(optimizer=Adam(lr=1e-3), loss='binary_crossentropy', metrics=['binary_crossentropy', 'binary_accuracy'])
def getModel(x_dim, meta_dim): # Input xc, xp, xt --> hct1, hP1, hP2 XC = Input(shape=x_dim) XP = Input(shape=x_dim) XT = Input(shape=x_dim) shared_model = Sequential() shared_model.add( ConvLSTM2D(filters=32, kernel_size=(3, 3), padding='same', return_sequences=True, input_shape=x_dim)) shared_model.add( ConvLSTM2D(filters=32, kernel_size=(3, 3), padding='same', return_sequences=True)) shared_model.add( ConvLSTM2D(filters=32, kernel_size=(3, 3), padding='same', return_sequences=False)) hct1 = shared_model(XC) hP1 = shared_model(XP) hP2 = shared_model(XT) # Weighting based fusion # daily concate1 = Concatenate()([hct1, hP1]) conv1 = Conv2D(filters=32, kernel_size=(1, 1), padding='same')(concate1) flat1 = Flatten()(conv1) ej1 = Dense(1)(flat1) # weekly concate2 = Concatenate()([hct1, hP2]) conv2 = Conv2D(filters=32, kernel_size=(1, 1), padding='same')(concate2) flat2 = Flatten()(conv2) ej2 = Dense(1)(flat2) aj1 = Lambda(softmax)([ej1, ej2]) aj2 = Lambda(softmax)([ej2, ej1]) hPallt = Add()([multiply([aj1, hP1]), multiply([aj2, hP2])]) hft = Hadamard_fusion()([hct1, hPallt]) # transform shape hft_reshap = Conv2D(filters=CHANNEL, kernel_size=(HEIGHT, WIDTH), activation='relu', padding='same')(hft) # metadata fusion Xmeta = Input(shape=(meta_dim, )) dens1 = Dense(units=10, activation='relu')(Xmeta) dens2 = Dense(units=WIDTH * HEIGHT * CHANNEL, activation='relu')(dens1) hmeta = Reshape((HEIGHT, WIDTH, CHANNEL))(dens2) add2 = Add()([hft_reshap, hmeta]) X_hat = Activation('relu')(add2) model = Model(inputs=[XC, XP, XT, Xmeta], outputs=X_hat) return model
z = make_noise(method, input_channel, (w, h)) g = Hourglass((w, h), input_channel, c, num_up=[128, 128, 128, 128, 128], num_down=[128, 128, 128, 128, 128], num_skip=[0, 0, 0, 4, 128], k_up=[3, 3, 3, 3, 3], k_down=[3, 3, 3, 3, 3], k_skip=[0, 0, 0, 1, 1], upsample_mode='bilinear') input = g.input mask_input = Input((w, h, c)) x = g.output output = multiply([x, mask_input]) model = Model(inputs=[input, mask_input], outputs=output, name='g_trainer') model.compile(optimizer=Adam(lr=lr), loss=mse) model.summary() losses = [] for i in range(num_iter + 1): loss = model.train_on_batch([add_noise(z, sigma), mask], miss) losses.append(loss) if i % 100 == 0: print('iter %d loss %f' % (i, loss)) y = g.predict_on_batch(z) postprocess(y[0]).save(save_path + '%d.png' % i)
def getModel(x_dim, meta_dim): # Input xc, xp, xt --> hct1, hP1, hP2 XC = Input(shape=x_dim) XP = Input(shape=x_dim) XT = Input(shape=x_dim) shared_model = Sequential() shared_model.add( ConvLSTM2D(filters=32, kernel_size=(3, 3), padding='same', return_sequences=True, input_shape=x_dim)) shared_model.add( ConvLSTM2D(filters=32, kernel_size=(3, 3), padding='same', return_sequences=True)) shared_model.add( ConvLSTM2D(filters=32, kernel_size=(3, 3), padding='same', return_sequences=False)) hct = shared_model(XC) hP1 = shared_model(XP) hP2 = shared_model(XT) # Weighting based fusion # daily concate1 = Concatenate()([hct, hP1]) conv1 = Conv2D(filters=32, kernel_size=(1, 1), padding='same')(concate1) # weekly concate2 = Concatenate()([hct, hP2]) conv2 = Conv2D(filters=32, kernel_size=(1, 1), padding='same')(concate2) x1 = Lambda(lambda x: x[:, :, :, :, np.newaxis])(conv1) x2 = Lambda(lambda x: x[:, :, :, :, np.newaxis])(conv2) conv = Concatenate()([x1, x2]) a = Dense(2, activation='softmax')(conv) ax = multiply([conv, a]) ax1 = Lambda(lambda x: x[:, :, :, :, 0])(ax) ax2 = Lambda(lambda x: x[:, :, :, :, 1])(ax) hPallt = add([ax1, ax2]) # hadamard fusion hft = Hadamard_fusion()([hct, hPallt]) # transform shape hft_reshap = Conv2D(filters=CHANNEL, kernel_size=(3, 3), activation='relu', padding='same')(hft) # metadata fusion Xmeta = Input(shape=(meta_dim, )) dens1 = Dense(units=10, activation='relu')(Xmeta) dens2 = Dense(units=WIDTH * HEIGHT * CHANNEL, activation='relu')(dens1) hmeta = Reshape((HEIGHT, WIDTH, CHANNEL))(dens2) add2 = Add()([hft_reshap, hmeta]) X_hat = Activation('relu')(add2) model = Model(inputs=[XC, XP, XT, Xmeta], outputs=X_hat) return model
else: slot_lstm_out = LSTM(args.emb_size, dropout=args.dropout, recurrent_dropout=args.dropout, return_sequences=True, name='slot LSTM', recurrent_regularizer=r_reg)(embedding) if args.batch_norm: slot_lstm_out = BatchNormalization()(slot_lstm_out) # [LSTM for intent] if args.attention: intent_lstm_out = LSTM(args.emb_size, dropout=args.dropout, recurrent_dropout=args.dropout, name='intent LSTM', return_sequences=True, recurrent_regularizer=r_reg)(slot_lstm_out) attn = TimeDistributed(Dense(1, activation=args.activation))(intent_lstm_out) attn = Flatten()(attn) attn = Activation('softmax')(attn) attn = RepeatVector(args.emb_size)(attn) attn = Permute([2, 1])(attn) intent_lstm_out = multiply([intent_lstm_out, attn]) intent_lstm_out = AveragePooling1D(max_seq_len)(intent_lstm_out) intent_lstm_out = Flatten()(intent_lstm_out) else: intent_lstm_out = LSTM(args.emb_size, dropout=args.dropout, recurrent_dropout=args.dropout, name='intent LSTM')(slot_lstm_out) # [transformation for slot] x = TimeDistributed(Dense(args.emb_size), name='slot transformation 1')(slot_lstm_out) x = Activation(args.activation)(x) #TODO deeper feed-forward layers # [output layer for slot] x = TimeDistributed(Dense(len(idx2label)), name='slot transformation 2')(x) slot_output = Activation('softmax', name='slot')(x)
x = LSTM(1024, return_sequences = True)(i) x = TimeDistributed(Reshape((1, X.shape[3], X.shape[4])))(x) model = Model(inputs=i, outputs=x, name='Decoder') return model print(X.shape) ## Start constructing the circuit i = Input(shape=X.shape[1:]) R = representation_rnn() C = consciousness_rnn() G = generator_rnn() D = decoder_rnn() h = R(i) # Get h from R c_A, c_B, c_A_soft, c_B_soft = C(h) # Get masks c_A and c_B from C b = multiply([h, c_B], name = 'b') # Get b through elementwise multiplication a_hat = G([c_A, c_B, b]) # Send c_A, c_B and b to G to get a_hat a_hat = Lambda(lambda x: x[:,:-1,:], output_shape=(X.shape[1]-1, latent_dim))(a_hat) # Slice dimensions to align vectors h_A = Lambda(lambda x: x[:,1:,:], output_shape=(X.shape[1]-1, latent_dim))(h) # Slice dimensions to align vectors c_A = Lambda(lambda x: x[:,:-1,:], output_shape=(X.shape[1]-1, latent_dim))(c_A) # Slice dimensions to align vectors h_A = multiply([h_A, c_A]) # Calculate h[A] to compare against a_hat a_hat = multiply([a_hat, c_A]) # Mask a_hat consciousness_error = subtract([a_hat, h_A]) consciousness_error = Regularize(L1L2(l1 = 0., l2 =1. * reg_lambda), name='Consciousness_Generator_Error')(consciousness_error) b_transformed = Dense(latent_dim, activation='linear')(b) # Create a layer that attempts to make b independent from h[A] b_transformed = Lambda(lambda x: x[:,:-1,:], output_shape=(X.shape[1]-1, latent_dim))(b_transformed) b_transformed = multiply([b_transformed, c_A])
exp_conv = AveragePooling2D(pool_size=1, strides=1)(exp_conv) exp_conv = Flatten()(exp_conv) exp_conv = Dense(units=256, activation='relu')(exp_conv) exp_conv = Dense(units=64, activation='relu')(exp_conv) # Second, exp model which uses the sam exp data as input exp_emb = Embedding(input_dim=inputDim, output_dim=outputDim, embeddings_regularizer=reg2, name='EXP_input2')(exp_input) exp_gap = GlobalAvgPool3D()(exp_emb) exp_gap = Dense(units=64, activation='relu')(exp_gap) # combine the SNP layers combined1 = add([exp_conv, exp_gap]) combined2 = multiply([exp_conv, exp_gap]) combined = concatenate([combined1, combined2]) combined = Dense(units=64, activation='relu')(combined) combined = Dropout(rate=DROPOUT)(combined) combined_output = Dense(units=numClasses, activation='softmax')(combined) classifier = Model(inputs=exp_input, outputs=combined_output) # summarize layers print("Model summary: \n", classifier.summary()) # compile the model classifier.compile( optimizer=adagrad, loss='categorical_crossentropy', metrics=['categorical_accuracy', tf.keras.metrics.AUC()])
#%% print('Build model...') diff_input = Input(shape=input_shape) rawf_input = Input(shape=input_shape) d1 = Conv2D(nb_filters1, kernel_size, padding='same', activation='tanh')(diff_input) d2 = Conv2D(nb_filters1, kernel_size, activation='tanh')(d1) r1 = Conv2D(nb_filters1, kernel_size, padding='same', activation='tanh')(rawf_input) r2 = Conv2D(nb_filters1, kernel_size, activation='tanh')(r1) g1 = Conv2D(1, (1, 1), padding='same', activation='sigmoid')(r2) g1 = Lambda(masknorm, output_shape=masknorm_shape)(g1) gated1 = multiply([d2, g1]) d3 = AveragePooling2D(pool_size)(gated1) d4 = Dropout(dropout_rate1)(d3) r3 = AveragePooling2D(pool_size)(r2) r4 = Dropout(dropout_rate1)(r3) d5 = Conv2D(nb_filters2, kernel_size, padding='same', activation='tanh')(d4) d6 = Conv2D(nb_filters2, kernel_size, activation='tanh')(d5) r5 = Conv2D(nb_filters2, kernel_size, padding='same', activation='tanh')(r4) r6 = Conv2D(nb_filters2, kernel_size, activation='tanh')(r5) g2 = Conv2D(1, (1, 1), padding='same', activation='sigmoid')(r6) g2 = Lambda(masknorm, output_shape=masknorm_shape)(g2)
def lstm_train(num_encoder_tokens, latent_dim, batch_size, epochs, encoder_input_data, decoder_input_data, decoder_target_data, temporal_horizon, max_length): # Define an input sequence and process it. #all_inputs = Input(shape=(None, num_encoder_tokens)) all_inputs = tf.placeholder(tf.float32, (None,None, num_encoder_tokens), name='all_inputs') encoder_split = Lambda(lambda x: tf.split(x, num_or_size_splits= max_length * temporal_horizon, axis=-2))(all_inputs) state_list = [] output_list = [] # We set up our decoder to return full output sequences, # and to return internal states as well. We don't use the # return states in the training model, but we will use them in inference. mid_layer1 = Dense(latent_dim, activation='relu') mid_layer2 = Dense(latent_dim, activation='relu') mid_layer3 = Dense(latent_dim, activation='relu') mid_layer4 = Dense(latent_dim, kernel_initializer='zeros', trainable=False) encoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True) Zero_state = mid_layer4(encoder_split[0]) _, Zero_state, Zero_output = encoder_lstm(Zero_state) state_list.append(Zero_state) for i in range(temporal_horizon): for j in range(max_length): if i == 0: last_time_state = Zero_state last_time_output = Zero_output else: last_time_state = state_list[(i - 1)*max_length + j + 1] last_time_output = output_list[(i - 1)*max_length + j] if j == 0: last_spat_state = Zero_state last_spat_output = Zero_output else: last_spat_state = state_list[(i)*max_length + j-1 + 1] last_spat_output = output_list[(i)*max_length + j-1] all_state = mid_layer2(concatenate([last_time_state, last_spat_state], axis=-1)) all_output = mid_layer3(concatenate([last_time_output, last_spat_output], axis=-1)) if i == 0 and j == 0: encoder_outputs, state_h, state_c = \ encoder_lstm(mid_layer1(encoder_split[i*max_length + j]), initial_state=[all_state,all_output]) else: encoder_outputs, state_h, state_c = \ encoder_lstm(mid_layer1(encoder_split[i * max_length + j]), initial_state=[all_state, all_output]) state_list.append(state_h) output_list.append(state_c) decoder_inputs = tf.placeholder(tf.float32, (None, None,1), name='decoder_inputs') decoder_split = Lambda(lambda x: tf.split(x, num_or_size_splits=max_length + 1, axis=-2))(decoder_inputs) attention_probs = Dense(temporal_horizon**2, activation='softmax', name='attention_vec') decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True) decoder_lstm_sp = LSTM(latent_dim, return_sequences=True, return_state=True) decoder_dense = Dense(1, activation='sigmoid') mdn_decoder_dense = Dense(MIXTURE*3, activation='sigmoid') decoder_output_list = [] mkn_output_list = [] de_state_h = None de_state_c = None for i in range(max_length + 1): temp_list = [] add_list = [] if i == 0: decoder_outputs, de_state_h, de_state_c = decoder_lstm(decoder_split[i], initial_state=[state_list[-1], output_list[-1]]) else: decoder_outputs, de_state_h, de_state_c = decoder_lstm(decoder_split[i], initial_state=[de_state_h, de_state_c]) for m in range(temporal_horizon): if i == 0: temp_list.append(state_list[m*max_length + i + 1]) temp_list.append(state_list[m*max_length + i + 1]) temp_list.append(state_list[m*max_length + i + 1]) if i == 1: temp_list.append(state_list[m * max_length + i]) temp_list.append(state_list[m * max_length + i]) temp_list.append(state_list[m * max_length + i + 1]) if i!=0 and i!=1 and i!=max_length: temp_list.append(state_list[m*max_length + i-1]) temp_list.append(state_list[m*max_length + i]) temp_list.append(state_list[m*max_length + i + 1]) if i == max_length: temp_list.append(state_list[m*max_length + i-1]) temp_list.append(state_list[m*max_length + i]) temp_list.append(state_list[m*max_length + i]) attention_input = concatenate(temp_list, axis=-1) if i == 4: look_attetion = attention_probs(attention_input) attention_split = Lambda(lambda x: tf.split(x, num_or_size_splits=temporal_horizon**2, axis=-1))(attention_probs(attention_input)) # for m in range(temporal_horizon**2): # add_list.append(multiply([attention_split[m], temp_list[m]])) for m in range(temporal_horizon**2): add_list.append(multiply([attention_split[m], temp_list[m]])) add_layer = concatenate([add(add_list), de_state_h], axis=-1) decoder_finaloutput = decoder_dense(add_layer) decoder_output_list.append(decoder_finaloutput) mkn_output_list.append(mdn_decoder_dense(add_layer)) encoder_lstm_sp = LSTM(latent_dim, return_sequences=True, return_state=True) mid_layer_sp = Dense(latent_dim, activation='relu') decoder_dense_sp = Dense(1, activation='sigmoid') mkn_dense_sp = Dense(MIXTURE*3, activation='sigmoid') for i in range(temporal_horizon): if i == 0: _, state_h, state_c = encoder_lstm_sp(mid_layer_sp(encoder_split[i*max_length])) else: _, state_h, state_c = encoder_lstm_sp(mid_layer_sp(encoder_split[i * max_length]), initial_state=[state_h,state_c]) decoder_outputs, de_state_h, de_state_c = decoder_lstm_sp(decoder_split[0], initial_state=[state_h, state_c]) decoder_output_list.append(decoder_dense_sp(de_state_h)) mkn_output_list.append(mkn_dense_sp(de_state_h)) weight_layer = Dense(MIXTURE, activation='softmax', name='attention') all_parameters_list = [] for i in range(len(mkn_output_list)): all_parameters = mkn_output_list[i] weight, mu_out, sigma = tf.split(all_parameters, 3, -1) weight_out = weight_layer(weight) sigma_out = tf.exp(sigma, name='sigma') all_parameters_list.append([weight_out, mu_out, sigma_out]) see_all = tf.concat(all_parameters_list, axis=-2, name='see_all') all_outputs = tf.placeholder(tf.float32, (None, None, 1), name='all_outputs') def mkn_loss(all_parameters_list, all_outputs): loss_final = 0 for i in range(len(all_parameters_list)): all_parameters = all_parameters_list[i] weight_out = all_parameters[0] mu_out = all_parameters[1] sigma_out = all_parameters[2] factor = 1 / math.sqrt(2 * math.pi) epsilon = 1e-5 tmp = - tf.square((all_outputs[i] - mu_out)) / (2 * tf.square(tf.maximum(sigma_out, epsilon))) y_normal = factor * tf.exp(tmp) / tf.maximum(sigma_out, epsilon) loss = tf.reduce_sum(tf.multiply(y_normal, weight_out), keepdims=True) loss = -tf.log(tf.maximum(loss, epsilon)) loss_final += tf.reduce_mean(loss) return loss_final loss = mkn_loss(all_parameters_list, all_outputs) train_step = tf.train.AdamOptimizer(learning_rate=0.02).minimize(loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(30): _, lossval = sess.run([train_step, loss], feed_dict={all_inputs: encoder_input_data, decoder_inputs: decoder_input_data, all_outputs: decoder_target_data}) print(lossval) saver = tf.train.Saver() saver.save(sess, './all_model.ckpt')
def attention_block(inputs, time_steps): x = Permute((2, 1))(inputs) x = Dense(time_steps, activation="softmax")(x) x = Permute((2, 1), name="attention_prob")(x) x = multiply([inputs, x]) return x
def build_model(self, input_shape): #input shape in None,input_len,hidden_dimension input_dim = input_shape[-1] output_dim = self.output_dim input_length = input_shape[1] hidden_dim = self.hidden_dim x = Input(batch_shape=input_shape) h_tm1 = Input(batch_shape=(input_shape[0], hidden_dim)) c_tm1 = Input(batch_shape=(input_shape[0], hidden_dim)) W1 = Dense(hidden_dim * 4, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer) W2 = Dense(output_dim, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer) W3 = Dense(1, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer) U = Dense(hidden_dim * 4, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer) ''' 1. Lambda() returns a function 2. It is a keras thing. It executes lambda expressions. **Parameters** >> output_shape: how do you want your output. >> masks... lambda x: K.repeat(x, input_length) lambda: declaration x:y -> f(x) = y Inputlength: number of encoder unfoldings x = one (maybe the last one) encoder output. ''' C = Lambda(lambda x: K.repeat(x, input_length), output_shape=(input_length, input_dim))(c_tm1) _xC = concatenate([x, C]) _xC = Lambda(lambda x: K.reshape(x, (-1, input_dim + hidden_dim)), output_shape=(input_dim + hidden_dim,))(_xC) #essentially transpose ''' alpha is softmax over input length ''' alpha = W3(_xC) alpha = Lambda(lambda x: K.reshape(x, (-1, input_length)), output_shape=(input_length,))(alpha) alpha = Activation('softmax')(alpha) _x = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=(1, 1)), output_shape=(input_dim,))([alpha, x]) z = add([W1(_x), U(h_tm1)]) z0, z1, z2, z3 = get_slices(z, 4) i = Activation(self.recurrent_activation)(z0) f = Activation(self.recurrent_activation)(z0) c = add([multiply([f, c_tm1]), multiply([i, Activation(self.activation)(z2)])]) o = Activation(self.recurrent_activation)(z3) h = multiply([o, Activation(self.activation)(c)]) y = Activation(self.activation)(W2(h)) return Model([x, h_tm1, c_tm1], [y, h, c])
def channel_attention(input_feature, ratio=8): # channel_axis = 1 if K.image_data_format() == "channels_first" else -1 # channel = input_feature._keras_shape[channel_axis] # # shared_layer_one = Dense(channel//ratio, # activation='relu', # kernel_initializer='he_normal', # use_bias=True, # bias_initializer='zeros') # shared_layer_two = Dense(channel, # kernel_initializer='he_normal', # use_bias=True, # bias_initializer='zeros') # # avg_pool = GlobalAveragePooling2D()(input_feature) # avg_pool = Reshape((1,1,channel))(avg_pool) # assert avg_pool._keras_shape[1:] == (1,1,channel) # avg_pool = shared_layer_one(avg_pool) # assert avg_pool._keras_shape[1:] == (1,1,channel//ratio) # avg_pool = shared_layer_two(avg_pool) # assert avg_pool._keras_shape[1:] == (1,1,channel) # # max_pool = GlobalMaxPooling2D()(input_feature) # max_pool = Reshape((1,1,channel))(max_pool) # assert max_pool._keras_shape[1:] == (1,1,channel) # max_pool = shared_layer_one(max_pool) # assert max_pool._keras_shape[1:] == (1,1,channel//ratio) # max_pool = shared_layer_two(max_pool) # assert max_pool._keras_shape[1:] == (1,1,channel) # # cbam_feature = Add()([avg_pool,max_pool]) # cbam_feature = Activation('sigmoid')(cbam_feature) # # if K.image_data_format() == "channels_first": # cbam_feature = Permute((3, 1, 2))(cbam_feature) # # return multiply([input_feature, cbam_feature]) # get channel channel_axis = 1 if K.image_data_format() == "channels_first" else 3 channel = int(input_feature.shape[channel_axis]) maxpool_channel = GlobalMaxPooling2D()(input_feature) maxpool_channel = Reshape((1, 1, channel))(maxpool_channel) avgpool_channel = GlobalAveragePooling2D()(input_feature) avgpool_channel = Reshape((1, 1, channel))(avgpool_channel) Dense_One = Dense(units=int(channel / ratio), activation='relu', kernel_initializer='he_normal', use_bias=True, bias_initializer='zeros') Dense_Two = Dense(units=int(channel), activation='relu', kernel_initializer='he_normal', use_bias=True, bias_initializer='zeros') # max path mlp_1_max = Dense_One(maxpool_channel) mlp_2_max = Dense_Two(mlp_1_max) mlp_2_max = Reshape(target_shape=(1, 1, int(channel)))(mlp_2_max) # avg path mlp_1_avg = Dense_One(avgpool_channel) mlp_2_avg = Dense_Two(mlp_1_avg) mlp_2_avg = Reshape(target_shape=(1, 1, int(channel)))(mlp_2_avg) channel_attention_feature = Add()([mlp_2_max, mlp_2_avg]) channel_attention_feature = Activation('sigmoid')( channel_attention_feature) return multiply([channel_attention_feature, input_feature])
def resnet99_avg_se(band, imx, ncla1, l=1): input1 = Input(shape=(imx, imx, band)) # define network conv0x = Conv2D(32, kernel_size=(3, 3), padding='valid', kernel_initializer=RandomNormal(mean=0.0, stddev=0.01)) conv0 = Conv2D(32, kernel_size=(3, 3), padding='valid', kernel_initializer=RandomNormal(mean=0.0, stddev=0.01)) bn11 = BatchNormalization(axis=-1, momentum=0.9, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones') conv11 = Conv2D(64, kernel_size=(3, 3), padding='same', kernel_initializer=RandomNormal(mean=0.0, stddev=0.01)) conv12 = Conv2D(64, kernel_size=(3, 3), padding='same', kernel_initializer=RandomNormal(mean=0.0, stddev=0.01)) fc11 = Dense(4, activation=None, kernel_initializer=RandomNormal(mean=0.0, stddev=0.01)) fc12 = Dense(64, activation=None, kernel_initializer=RandomNormal(mean=0.0, stddev=0.01)) bn21 = BatchNormalization(axis=-1, momentum=0.9, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones') conv21 = Conv2D(64, kernel_size=(3, 3), padding='same', kernel_initializer=RandomNormal(mean=0.0, stddev=0.01)) conv22 = Conv2D(64, kernel_size=(3, 3), padding='same', kernel_initializer=RandomNormal(mean=0.0, stddev=0.01)) fc21 = Dense(4, activation=None, kernel_initializer=RandomNormal(mean=0.0, stddev=0.01)) fc22 = Dense(64, activation=None, kernel_initializer=RandomNormal(mean=0.0, stddev=0.01)) fc1 = Dense(ncla1, activation='softmax', name='output1', kernel_initializer=RandomNormal(mean=0.0, stddev=0.01)) # x1 x1 = conv0(input1) x1x = conv0x(input1) # x1 = MaxPooling2D(pool_size=(2,2))(x1) # x1x = MaxPooling2D(pool_size=(2,2))(x1x) x1 = concatenate([x1, x1x], axis=-1) x11 = bn11(x1) x11 = Activation('relu')(x11) x11 = conv11(x11) x11 = Activation('relu')(x11) x11 = conv12(x11) x12 = GlobalAveragePooling2D()(x11) x12 = fc11(x12) x12 = fc12(x12) x12 = Activation('sigmoid')(x12) x11 = multiply([x11, x12]) x1 = Add()([x1, x11]) if l == 2: x11 = bn21(x1) x11 = Activation('relu')(x11) x11 = conv21(x11) x11 = Activation('relu')(x11) x11 = conv22(x11) x12 = GlobalAveragePooling2D()(x11) x12 = fc11(x12) x12 = fc12(x12) x12 = Activation('sigmoid')(x12) x11 = multiply([x11, x12]) x1 = Add()([x1, x11]) x1 = GlobalAveragePooling2D()(x1) # x1 = Flatten()(x1) pre1 = fc1(x1) model1 = Model(inputs=input1, outputs=pre1) return model1
def _augment_model(self, model, score, reweighting): # Extract some info from the model loss = model.loss optimizer = model.optimizer.__class__(**model.optimizer.get_config()) output_shape = K.int_shape(model.output)[1:] if isinstance(loss, str) and loss.startswith("sparse"): output_shape = output_shape[:-1] + (1, ) # Make sure that some stuff look ok assert not isinstance(loss, list) # We need to create two more inputs # 1. the targets # 2. the predicted scores y_true = Input(shape=output_shape) pred_score = Input(shape=(reweighting.weight_size, )) # Create a loss layer and a score layer loss_tensor = LossLayer(loss)([y_true, model.output]) score_tensor = _get_scoring_layer(score, y_true, model.output, loss, self.layer, model) # Create the sample weights weights = reweighting.weight_layer()([score_tensor, pred_score]) # Create the output weighted_loss = weighted_loss_model = multiply([loss_tensor, weights]) for l in model.losses: weighted_loss += l weighted_loss_mean = K.mean(weighted_loss) # Create the metric layers metrics = model.metrics or [] metrics = [ MetricLayer(metric)([y_true, model.output]) for metric in metrics ] # Create a model for plotting and providing access to things such as # trainable_weights etc. new_model = Model(inputs=_tolist(model.input) + [y_true, pred_score], outputs=[weighted_loss_model]) # Build separate on_batch keras functions for scoring and training updates = optimizer.get_updates(weighted_loss_mean, new_model.trainable_weights) metrics_updates = [] if hasattr(model, "metrics_updates"): metrics_updates = model.metrics_updates learning_phase = [] if weighted_loss_model._uses_learning_phase: learning_phase.append(K.learning_phase()) inputs = _tolist(model.input) + [y_true, pred_score] + learning_phase outputs = [ weighted_loss_mean, loss_tensor, weighted_loss, score_tensor ] + metrics train_on_batch = K.function(inputs=inputs, outputs=outputs, updates=updates + model.updates + metrics_updates) evaluate_on_batch = K.function(inputs=inputs, outputs=outputs, updates=model.state_updates + metrics_updates) self.model = new_model self.optimizer = optimizer self.model.optimizer = optimizer self._train_on_batch = train_on_batch self._evaluate_on_batch = evaluate_on_batch
def attention_block_3d(inputs, TIME_STEPS=12): a = Permute((2, 1))(inputs) a_dense = Dense(TIME_STEPS, activation='softmax')(a) a_probs = Permute((2, 1), name='attention_vec')(a_dense) out_att_mul = multiply([inputs, a_probs], name='att_mul') return out_att_mul
x = models.ConcatFeature(using_cues, face_temporal, head_temporal, upperbody_temporal, body_temporal, frame_temporal) feat_count = len(using_cues) # multi-cue modeling if args.spatial_merge == 'none' or args.spatial_merge == 'weighted': x = Permute((2, 1))(x) x = Flatten()(x) elif args.spatial_merge == 'mcam': a = Dense(feat_count, activation='softmax')(x) a = Lambda(lambda x: keras.backend.mean(x, axis=1), name='dim_reduction')(a) a = RepeatVector(2048)(a) a = Permute((2, 1), name='attention_vec')(a) x = Permute((2, 1))(x) x = multiply([x, a]) x = Flatten()(x) # classification if args.mlp: x = Dropout(0.5)(x) x = Dense(2048, activation='relu')(x) x = BatchNormalization()(x) x = Dropout(0.5)(x) output = Dense(class_num, activation='softmax')(x) # define, compile, and fit the model input = models.InputLayerList(using_cues, face_input, head_input, upperbody_input, body_input, frame_input) model = Model(inputs=input, output=output)
x = ResNet50(weights='imagenet', include_top=False)(x) x = Dropout(0.25)(x) x = Flatten()(x) feature_map = Dense(256, activation='relu')(x) # split and define detection head detect = Dense(64, activation='relu')(feature_map) detect = Dense(2, activation='softmax', name='detect_output')(detect) # split and define regression head regress = Dense(64, activation='relu')(feature_map) regress = Dense(5, name='regress_output')(regress) # merge and define final loss detected = Dense(1, activation='relu')(detect) final = multiply([detected, regress], name='final_output') final_model = Model(inputs=[main_input], outputs=[detect, regress, final]) else: final_model = load_model(MODEL_NAME) final_model.compile(optimizer='adam', loss={ 'detect_output': 'binary_crossentropy', 'regress_output': 'mse', 'final_output': 'mse' }, loss_weights={ 'final_output': 1, 'detect_output': 1, 'regress_output': 1 },
def _build_graph(self): embedding_matrix = self._load_embed() encoder_inputs = Input(shape=(self.FLAGS.sentence_len,), name='Encoder_input') enc_e = Embedding(self.FLAGS.vocab_len, self.FLAGS.embedding_size, weights=[embedding_matrix], trainable=False, name='EncoderEmbedding')(encoder_inputs) decoder_inputs = Input(shape=(self.FLAGS.timesteps,), name='Decoder_input') dec_e = Embedding(self.FLAGS.vocab_len, self.FLAGS.embedding_size, weights=[embedding_matrix], trainable=False, name='DecoderEmbedding')(decoder_inputs) encoder_outputs, f_h, f_c, b_h, b_c = Bidirectional( LSTM(self.FLAGS.units, return_sequences=True, return_state=True))(enc_e) encoder_outputs = Dense(self.FLAGS.units)(encoder_outputs) state_h = Concatenate()([f_h, b_h]) state_c = Concatenate()([f_c, b_c]) encoder_states = [state_h, state_c] decoder_outputs = LSTM(2*self.FLAGS.units, return_sequences=True, name='Decoder')(dec_e, initial_state=encoder_states) decoder_state = decoder_outputs = Dense(self.FLAGS.units)(decoder_outputs) # units == embed_dim # summary_len == timesteps # encoder_outputs : (batch_size, sentence_len, units) # dec_e : (batch_size, timesteps, embed_dim) # decoder_outputs : (batch_size, timesteps, units) h = Permute((2, 1))(encoder_outputs) h = Dense(self.FLAGS.timesteps)(h) h = Permute((2, 1))(h) # (batch_size, timesteps, units) s = add([h, decoder_outputs]) tanh = Activation('tanh')(s) tanh = Permute((2, 1))(tanh) tanh = Dense(self.FLAGS.sentence_len)(tanh) tanh = Permute((2, 1))(tanh) # (batch_size, sentence_len, units) a = Activation('softmax')(tanh) m = multiply([a, encoder_outputs]) def context(m): context_vector = K.sum(m, axis=1) # (batch_size, units) context_vector = RepeatVector(self.FLAGS.timesteps)(context_vector) return context_vector # (batch_size, timesteps, units) def expand_dimension(x): return K.expand_dims(x, axis=-1) layer = Lambda(context) expand = Lambda(expand_dimension) context_vector = layer(m) # (batch_size, timesteps, units) if self.FLAGS.multi_concat: decoder_outputs = multiply([context_vector, decoder_outputs]) else: decoder_outputs = Concatenate()([context_vector, decoder_outputs]) # (batch_size, timesteps, max_depth) decoder_outputs = Dense(self.FLAGS.max_depth)(decoder_outputs) # (batch_size, timesteps, max_depth, 1) decoder_outputs = expand(decoder_outputs) # (batch_size, timesteps, max_depth, 3) p_vocab = Dense(3, activation='softmax', name='Dense')(decoder_outputs) return Model([encoder_inputs, decoder_inputs], p_vocab)
def create_cost_module(inputs, adjustable): """Implements the cost module of the siamese network. :param inputs: list containing feature tensor from each siamese head :return: some type of distance """ def subtract(x): output = x[0] - x[1] return output def divide(x): output = x[0] / x[1] return output def absolute(x): output = abs(x[0] - x[1]) return output # unused def the_shape(shapes): shape1, shape2 = shapes a_shape = shape1 return a_shape if adjustable.cost_module_type == 'neural_network': if adjustable.neural_distance == 'concatenate': features = layers.concatenate(inputs) elif adjustable.neural_distance == 'add': features = layers.add(inputs) elif adjustable.neural_distance == 'multiply': features = layers.multiply(inputs) elif adjustable.neural_distance == 'subtract': # features = layers.merge(inputs=inputs, mode=subtract, output_shape=the_shape) features = layers.Lambda(subtract)(inputs) elif adjustable.neural_distance == 'divide': # features = layers.merge(inputs=inputs, mode=divide, output_shape=the_shape) features = layers.Lambda(divide)(inputs) elif adjustable.neural_distance == 'absolute': # features = layers.merge(inputs=inputs, mode=absolute, output_shape=the_shape) features = layers.Lambda(absolute)(inputs) else: features = None dense_layer = layers.Dense(adjustable.neural_distance_layers[0], name='dense_1', trainable=adjustable.trainable_cost_module)(features) activation = layers.Activation(adjustable.activation_function)(dense_layer) if adjustable.activation_function == 'selu': dropout_layer = layers.AlphaDropout(adjustable.dropout_rate)(activation) else: dropout_layer = layers.Dropout(adjustable.dropout_rate)(activation) dense_layer = layers.Dense(adjustable.neural_distance_layers[1], name='dense_2', trainable=adjustable.trainable_cost_module)(dropout_layer) activation = layers.Activation(adjustable.activation_function)(dense_layer) if adjustable.activation_function == 'selu': dropout_layer = layers.AlphaDropout(adjustable.dropout_rate)(activation) else: dropout_layer = layers.Dropout(adjustable.dropout_rate)(activation) output_layer = layers.Dense(pc.NUM_CLASSES, name='ouput')(dropout_layer) softmax = layers.Activation('softmax')(output_layer) if not adjustable.weights_name == None: softmax.load_weights(os.path.join(pc.SAVE_LOCATION_MODEL_WEIGHTS, adjustable.weights_name), by_name=True) return softmax elif adjustable.cost_module_type == 'euclidean': # distance = layers.Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)(inputs) distance = layers.Lambda(euclidean_distance)(inputs) return distance elif adjustable.cost_module_type == 'euclidean_fc': distance = layers.Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)(inputs) dense_layer = layers.Dense(1, name='dense_1')(distance) activation = layers.Activation(adjustable.activation_function)(dense_layer) output_layer = layers.Dense(pc.NUM_CLASSES, name='ouput')(activation) softmax = layers.Activation('softmax')(output_layer) return softmax elif adjustable.cost_module_type == 'cosine': distance = layers.Lambda(cosine_distance, output_shape=cos_dist_output_shape)(inputs) return distance
def _construct_q_network(self): """Constructs the desired deep q learning network This extends the network architecture found in DeepMind paper. Dueling-DQN approach is implemented (see `policy` layer). BatchNormalization and Dropout are generaly helpful and were tested. Empirically they did not perfrom well (drove Q to very high/low values). I do not know why. https://www.nature.com/articles/nature14236 ... paper on DQN https://arxiv.org/pdf/1511.06581.pdf ... dueling DQN Notes: Using Batch Normlization requires same size of batch on training and test. This cannot be easily implemented in RL scenario with PER. """ # Mask that allows updating of only action that was observed mask_input = Input((self.action_size, ), name='mask') # Preprocess data on input, allows storing as uint8 frames_input = Input(self.img_size + (self.num_frames, ), name='frames') # Scale by 142 instead of 255, because for BreakOut the max val is 142 x = (Lambda(lambda x: x / 142.0)(frames_input)) x = ( Convolution2D( filters=32, kernel_size=(8, 8), strides=(4, 4), # input_shape = self.img_size + (self.num_frames, ), kernel_regularizer=l2(0.1), kernel_initializer='he_normal'))(x) x = (Activation('relu'))(x) x = (Convolution2D(filters=64, kernel_size=(4, 4), strides=(2, 2), kernel_regularizer=l2(0.1), kernel_initializer='he_normal'))(x) x = (Activation('relu'))(x) x = (Convolution2D(filters=64, kernel_size=(3, 3), strides=(1, 1), kernel_regularizer=l2(0.01)))(x) x = (Activation('relu'))(x) flatten = (Flatten())(x) # Dueling DQN -- decompose output to Advantage and Value parts # V(s): how good it is to be in any given state. # A(a): how much better taking a certain action would be compared to the others fc1 = Dense(units=512, activation=None, kernel_regularizer=l2(0.1), kernel_initializer='he_normal')(flatten) advantage = Dense(self.action_size, activation=None, kernel_regularizer=l2(0.1), kernel_initializer='he_normal')(fc1) fc2 = Dense(units=512, activation=None, kernel_regularizer=l2(0.01))(flatten) value = Dense(1, kernel_regularizer=l2(0.01))(fc2) # dueling_type == 'avg' # Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-Avg_a(A(s,a;theta))) policy = Lambda(lambda x: x[0] - K.mean(x[0]) + x[1], output_shape=(self.action_size, ))([advantage, value]) filtered_policy = multiply([policy, mask_input]) self.model = Model(inputs=[frames_input, mask_input], outputs=[filtered_policy]) # Create identical copy of model, make sure they dont point to same object config = self.model.get_config() self.target_model = Model.from_config(config) self.target_update() # Assure weights are identical. losses = [clipped_masked_error(mask_input)] # losses = ["MSE"] metrics = ["mae", mean_q] # optimizer = Adam( lr = self.learn_rate, # epsilon = 0.01, # decay = 1e-5, # clipnorm = 1.) optimizer = RMSprop(lr=self.learn_rate, epsilon=0.00, rho=0.99, decay=1e-6, clipnorm=1.) self.model.compile(loss=losses, optimizer=optimizer, metrics=metrics) # Loss, optimizer and metrics just dummy as never trained self.target_model.compile(loss='MSE', optimizer=Adam(), metrics=[]) print(self.model.summary()) print("Successfully constructed networks.")
aspect_embedding = Embedding(MAX_NUM_ASPECT_WORDS, EMBEDDING_DIM, mask_zero=MASK_ZEROS, # this needs to be True trainable=True) sentence_ip = Input(shape=(MAX_SENTENCE_LENGTH,), dtype='int32') aspect_ip = Input(shape=(MAX_SENTENCE_LENGTH,), dtype='int32') sentence_embedding = sentence_embedding(sentence_ip) # Note: these are two different embeddings aspect_embedding = aspect_embedding(aspect_ip) # Note: these are two different embeddings # Create the attention vector for the aspect embeddings aspect_attention = Dense(EMBEDDING_DIM, activation='softmax', use_bias=False, name='aspect_attention')(aspect_embedding) # dampen the aspect embeddings according to the attention with an element-wise multiplication aspect_embedding = multiply([aspect_embedding, aspect_attention]) # augment the sample embedding with information from the attended aspect embedding sentence_embedding = add([sentence_embedding, aspect_embedding]) # now you can continue with whatever layer other than CNNs #x = MaskedGlobalAveragePooling1D()(sentence_embedding) #x = MaskableFlatten()(sentence_embedding) x = LSTM(100)(sentence_embedding) x = Dense(NUM_CLASSES, activation='softmax')(x) model = Model(inputs=[sentence_ip, aspect_ip], outputs=x) model.summary()
attn_layer = Conv2D(64, kernel_size=(1, 1), padding='same', activation='relu')(Dropout(0.5)(bn_features)) attn_layer = Conv2D(16, kernel_size=(1, 1), padding='same', activation='relu')(attn_layer) attn_layer = Conv2D(8, kernel_size=(1, 1), padding='same', activation='relu')(attn_layer) attn_layer = Conv2D(1, kernel_size=(1, 1), padding='valid', activation='sigmoid')(attn_layer) # fan it out to all of the channels up_c2_w = np.ones((1, 1, 1, pt_depth)) up_c2 = Conv2D(pt_depth, kernel_size=(1, 1), padding='same', activation='linear', use_bias=False, weights=[up_c2_w]) up_c2.trainable = False attn_layer = up_c2(attn_layer) mask_features = multiply([attn_layer, bn_features]) gap_features = GlobalAveragePooling2D()(mask_features) gap_mask = GlobalAveragePooling2D()(attn_layer) # to account for missing values from the attention model gap = Lambda(lambda x: x[0] / x[1], name='RescaleGAP')([gap_features, gap_mask]) gap_dr = Dropout(0.25)(gap) dr_steps = Dropout(0.25)(Dense(128, activation='relu')(gap_dr)) out_layer = Dense(t_y.shape[-1], activation='softmax')(dr_steps) retina_model = Model(inputs=[in_lay], outputs=[out_layer]) from keras.metrics import top_k_categorical_accuracy def top_2_accuracy(in_gt, in_pred): return top_k_categorical_accuracy(in_gt, in_pred, k=2)
def cell_net(input_dim, args, useMulGpu=False): lr = args.init_lr weight_decay = args.init_lr momentum = args.momentum data_input = Input(shape=input_dim, dtype='float32', name='input') conv1 = Conv2D(36, kernel_size=(4, 4), kernel_regularizer=l2(weight_decay), activation='relu')(data_input) print('Conv1 .shape') print(conv1.shape) conv1 = MaxPooling2D((2, 2))(conv1) conv2 = Conv2D(48, kernel_size=(3, 3), kernel_regularizer=l2(weight_decay), activation='relu')(conv1) conv2 = MaxPooling2D((2, 2))(conv2) x = Flatten()(conv2) fc1 = Dense(512, activation='relu', kernel_regularizer=l2(weight_decay), name='fc1')(x) fc1 = Dropout(0.5)(fc1) fc2 = Dense(512, activation='relu', kernel_regularizer=l2(weight_decay), name='fc2')(fc1) fc2 = Dropout(0.5)(fc2) # fp = Feature_pooling(output_dim=1, kernel_regularizer=l2(0.0005), pooling_mode='max', # name='fp')(fc2) alpha = Mil_Attention(L_dim=128, output_dim=1, kernel_regularizer=l2(weight_decay), name='alpha', use_gated=args.useGated)(fc2) x_mul = multiply([alpha, fc2]) out = Last_Sigmoid(output_dim=1, name='FC1_sigmoid')(x_mul) # model = Model(inputs=[data_input], outputs=[out]) model.summary() if useMulGpu == True: parallel_model = multi_gpu_model(model, gpus=2) parallel_model.compile(optimizer=Adam(lr=lr, beta_1=0.9, beta_2=0.999), loss=bag_loss, metrics=[bag_accuracy]) else: model.compile(optimizer=Adam(lr=lr, beta_1=0.9, beta_2=0.999), loss=bag_loss, metrics=[bag_accuracy]) parallel_model = model return parallel_model
dim_embedddings = 30 bias = 1 # books book_input = Input(shape=[1], name='Book') book_embedding = Embedding(n_books + 1, dim_embedddings, name="Book-Embedding")(book_input) book_bias = Embedding(n_users + 1, bias, name="Book-Bias")(book_input) # users user_input = Input(shape=[1], name='User') user_embedding = Embedding(n_users + 1, dim_embedddings, name="User-Embedding")(user_input) user_bias = Embedding(n_users + 1, bias, name="User-Bias")(user_input) matrix_product = multiply([book_embedding, user_embedding]) matrix_product = Dropout(0.2)(matrix_product) input_terms = concatenate([matrix_product, user_bias, book_bias]) input_terms = Flatten()(input_terms) ## add dense layers dense_1 = Dense(50, activation="relu", name="Dense1")(input_terms) dense_1 = Dropout(0.2)(dense_1) dense_2 = Dense(20, activation="relu", name="Dense2")(dense_1) dense_2 = Dropout(0.2)(dense_2) result = Dense(1, activation='relu', name='Activation')(dense_2) ## define model with 2 inputs and 1 output model_mf = Model(inputs=[book_input, user_input], outputs=result)
def create_model(num_class,num_bit,image_rows,image_cols,DIM_ORDERING,WEIGHT_DECAY,USE_BN,DROPOUT): # Define image input layer if DIM_ORDERING == 'th': INP_SHAPE = (num_class, image_rows, image_cols) # 3 - Number of RGB Colours img_input = Input(shape=INP_SHAPE) CONCAT_AXIS = 1 dim_org='channels_first' elif DIM_ORDERING == 'tf': INP_SHAPE = (image_rows, image_cols, num_class) # 3 - Number of RGB Colours 224 img_input = Input(shape=INP_SHAPE) CONCAT_AXIS = 3 dim_org='channels_last' else: raise Exception('Invalid dim ordering: ' + str(DIM_ORDERING)) # Channel 1 - Conv Net Layer 1 x = conv2D_bn(img_input, 3, 11, 11, subsample=(1, 1), border_mode='same', dim_ordering=dim_org,weight_decay=WEIGHT_DECAY,batch_norm=USE_BN) x = MaxPooling2D( strides=( 4, 4), pool_size=( 4, 4), data_format=dim_org)(x) x = ZeroPadding2D(padding=(1, 1), data_format=dim_org)(x) # Channel 2 - Conv Net Layer 1 y = conv2D_bn(img_input, 3, 11, 11, subsample=(1, 1), border_mode='same') y = MaxPooling2D( strides=( 4, 4), pool_size=( 4, 4), data_format=dim_org)(y) y = ZeroPadding2D(padding=(1, 1), data_format=dim_org)(y) # Channel 1 - Conv Net Layer 2 x = conv2D_bn(x, 48, 55, 55, subsample=(1, 1), border_mode='same') x = MaxPooling2D( strides=( 2, 2), pool_size=( 2, 2), data_format=dim_org)(x) x = ZeroPadding2D(padding=(1, 1), data_format=dim_org)(x) # Channel 2 - Conv Net Layer 2 y = conv2D_bn(y, 48, 55, 55, subsample=(1, 1), border_mode='same') y = MaxPooling2D( strides=( 2, 2), pool_size=( 2, 2), data_format=dim_org)(y) y = ZeroPadding2D(padding=(1, 1), data_format=dim_org)(y) # Channel 1 - Conv Net Layer 3 x = conv2D_bn(x, 128, 27, 27, subsample=(1, 1), border_mode='same') x = MaxPooling2D( strides=( 2, 2), pool_size=( 2, 2), data_format=dim_org)(x) x = ZeroPadding2D(padding=(1, 1), data_format=dim_org)(x) # Channel 2 - Conv Net Layer 3 y = conv2D_bn(y, 128, 27, 27, subsample=(1, 1), border_mode='same') y = MaxPooling2D( strides=( 2, 2), pool_size=( 2, 2), data_format=dim_org)(y) y = ZeroPadding2D(padding=(1, 1), data_format=dim_org)(y) # Channel 1 - Conv Net Layer 4 # x1 = merge([x, y], mode='concat', concat_axis=CONCAT_AXIS) x1 = concatenate([x, y], axis=CONCAT_AXIS) x1 = ZeroPadding2D(padding=(1, 1), data_format=dim_org)(x1) x1 = conv2D_bn(x1, 192, 13, 13, subsample=(1, 1), border_mode='same') # Channel 2 - Conv Net Layer 4 # y1 = merge([x, y], mode='concat', concat_axis=CONCAT_AXIS) y1 = concatenate([x, y], axis=CONCAT_AXIS) y1 = ZeroPadding2D(padding=(1, 1), data_format=dim_org)(y1) y1 = conv2D_bn(y1, 192, 13, 13, subsample=(1, 1), border_mode='same') # Channel 1 - Conv Net Layer 5 # x2 = merge([x1, y1], mode='concat', concat_axis=CONCAT_AXIS) x2 = concatenate([x1, y1], axis=CONCAT_AXIS) x2 = ZeroPadding2D(padding=(1, 1), data_format=dim_org)(x2) x2 = conv2D_bn(x2, 192, 13, 13, subsample=(1, 1), border_mode='same') # Channel 2 - Conv Net Layer 5 # y2 = merge([x1, y1], mode='concat', concat_axis=CONCAT_AXIS) y2 = concatenate([x1, y1], axis=CONCAT_AXIS) y2 = ZeroPadding2D(padding=(1, 1), data_format=dim_org)(y2) y2 = conv2D_bn(y2, 192, 13, 13, subsample=(1, 1), border_mode='same') # Channel 1 - Cov Net Layer 6 x3 = conv2D_bn(x2, 128, 27, 27, subsample=(1, 1), border_mode='same') x3 = MaxPooling2D( strides=( 2, 2), pool_size=( 2, 2), data_format=dim_org)(x3) x3 = ZeroPadding2D(padding=(1, 1), data_format=dim_org)(x3) # Channel 2 - Cov Net Layer 6 y3 = conv2D_bn(y2, 128, 27, 27, subsample=(1, 1), border_mode='same') y3 = MaxPooling2D( strides=( 2, 2), pool_size=( 2, 2), data_format=dim_org)(y3) y3 = ZeroPadding2D(padding=(1, 1), data_format=dim_org)(y3) # Channel 1 - Cov Net Layer 7 # x4 = merge([x3, y3], mode='mul', concat_axis=CONCAT_AXIS) x4 = multiply([x3, y3]) x4 = Flatten()(x4) x4 = Dense(2048, activation='relu')(x4) x4 = Dropout(DROPOUT)(x4) # Channel 2 - Cov Net Layer 7 # y4 = merge([x3, y3], mode='mul', concat_axis=CONCAT_AXIS) y4 = multiply([x3, y3]) y4 = Flatten()(y4) y4 = Dense(2048, activation='relu')(y4) y4 = Dropout(DROPOUT)(y4) # Channel 1 - Cov Net Layer 8 # x5 = merge([x4, y4], mode='mul') x5 = multiply([x4, y4]) x5 = Dense(2048, activation='relu')(x5) x5 = Dropout(DROPOUT)(x5) # Channel 2 - Cov Net Layer 8 # y5 = merge([x4, y4], mode='mul') y5 = multiply([x4, y4]) y5 = Dense(2048, activation='relu')(y5) y5 = Dropout(DROPOUT)(y5) # Final Channel - Cov Net 9 # xy = merge([x5, y5], mode='mul') xy = multiply([x5, y5]) xy = Dense(num_class, activation='softmax')(xy) # model = Model(input=img_input, # output=[xy]) model = Model(inputs=[img_input], outputs=[xy]) # return xy, img_input, CONCAT_AXIS, INP_SHAPE, DIM_ORDERING return model
WV_DIM, mask_zero=False, weights=[wv_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=True) sent1_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences1 = wv_layer(sent1_input) sent2_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences2 = wv_layer(sent2_input) lstmed_sequences1 = LSTM(300)(embedded_sequences1) lstmed_sequences2 = LSTM(300)(embedded_sequences2) x_mult_y = multiply([lstmed_sequences1, lstmed_sequences2]) x_minus_y = subtract([lstmed_sequences1, lstmed_sequences2]) abs_x_minus_y = Lambda(lambda x: abs(x))(x_minus_y) concatenation = concatenate([abs_x_minus_y, x_mult_y]) fcnn_input = Reshape((600, ))(concatenation) fcnn_layer_one = Dense(len(scores[0]), input_shape=(600, ), activation='softmax')(fcnn_input) model = Model(inputs=[sent1_input, sent2_input], outputs=[fcnn_layer_one]) print(model.summary()) filepath = path + 'lstm_weights.last.hdf5'
def mask_net_3d( ishape, # type: Tuple[Optional[int], Optional[int], Optional[int], int] fg_filt_wid, # type: Tuple[int, int, int] bg_filt_wid, # type: Tuple[int, int, int] trainable=False): # type: (...) -> keras.models.Model """ Mask net takes a mask and turns it into a distance map like object using uniform filters on the mask and its inverse to represent the outside as below [-1, 0) and the inside as [0, 1] :param ishape: :param fg_filt_wid: :param bg_filt_wid: :param trainable: Should the network be trained with the rest of the model :return: >>> inet = mask_net_3d((5, 9, 10, 1), (3, 3, 3), (2, 9, 9)) >>> inet.summary() #doctest: +NORMALIZE_WHITESPACE ____________________________________________________________________________________________________ Layer (type) Output Shape Param # Connected to ==================================================================================================== RawMask (InputLayer) (None, 5, 9, 10, 1) 0 ____________________________________________________________________________________________________ ExpandingImage_3_9_9 (ZeroPaddin (None, 11, 27, 28, 1) 0 RawMask[0][0] ____________________________________________________________________________________________________ InvertedMask (Lambda) (None, 11, 27, 28, 1) 0 ExpandingImage_3_9_9[0][0] ____________________________________________________________________________________________________ BlurMask_7_7_7 (Conv3D) (None, 11, 27, 28, 1) 343 ExpandingImage_3_9_9[0][0] ____________________________________________________________________________________________________ BlurInvMask_5_19_19 (Conv3D) (None, 11, 27, 28, 1) 1805 InvertedMask[0][0] ____________________________________________________________________________________________________ MaskPositive (Multiply) (None, 11, 27, 28, 1) 0 BlurMask_7_7_7[0][0] ExpandingImage_3_9_9[0][0] ____________________________________________________________________________________________________ MaskNegative (Multiply) (None, 11, 27, 28, 1) 0 BlurInvMask_5_19_19[0][0] InvertedMask[0][0] ____________________________________________________________________________________________________ CombiningImage (Add) (None, 11, 27, 28, 1) 0 MaskPositive[0][0] MaskNegative[0][0] ____________________________________________________________________________________________________ CroppingEdges_3_9_9 (Cropping3D) (None, 5, 9, 10, 1) 0 CombiningImage[0][0] ==================================================================================================== Total params: 2,148 Trainable params: 0 Non-trainable params: 2,148 ____________________________________________________________________________________________________ >>> inet2 = mask_net_3d((None, None, None, 1), (1, 1, 1), (2, 2, 2)) >>> (100*inet2.predict(np.ones((1, 3, 3, 3, 1))).ravel()).astype(int) array([ 29, 44, 29, 44, 66, 44, 29, 44, 29, 44, 66, 44, 66, 100, 66, 44, 66, 44, 29, 44, 29, 44, 66, 44, 29, 44, 29]) """ zp_wid = [max(a, b) for a, b in zip(fg_filt_wid, bg_filt_wid)] in_np_mask = Input(shape=ishape, name='RawMask') in_mask = ZeroPadding3D( padding=zp_wid, name='ExpandingImage_{}_{}_{}'.format(*zp_wid))(in_np_mask) inv_mask = Lambda(lambda x: 1.0 - x, name='InvertedMask')(in_mask) fg_kernel = np.ones((fg_filt_wid[0] * 2 + 1, fg_filt_wid[1] * 2 + 1, fg_filt_wid[2] * 2 + 1)) fg_kernel = fg_kernel / fg_kernel.sum() fg_kernel = np.expand_dims(np.expand_dims(fg_kernel, -1), -1) bg_kernel = np.ones((bg_filt_wid[0] * 2 + 1, bg_filt_wid[1] * 2 + 1, bg_filt_wid[2] * 2 + 1)) bg_kernel = bg_kernel / bg_kernel.sum() bg_kernel = np.expand_dims(np.expand_dims(bg_kernel, -1), -1) blur_func = lambda name, c_weights: Conv3D(c_weights.shape[-1], kernel_size=c_weights.shape[:3], padding='same', name=name, activation='linear', weights=[c_weights], use_bias=False) gmask_in = blur_func('BlurMask_{}_{}_{}'.format(*fg_kernel.shape), fg_kernel)(in_mask) gmask_inv = blur_func('BlurInvMask_{}_{}_{}'.format(*bg_kernel.shape), -1 * bg_kernel)(inv_mask) gmask_in = multiply([gmask_in, in_mask], name='MaskPositive') gmask_inv = multiply([gmask_inv, inv_mask], name='MaskNegative') full_img = add([gmask_in, gmask_inv], name='CombiningImage') full_img = Cropping3D( cropping=zp_wid, name='CroppingEdges_{}_{}_{}'.format(*zp_wid))(full_img) out_model = Model(inputs=[in_np_mask], outputs=[full_img]) out_model.trainable = trainable for ilay in out_model.layers: ilay.trainable = trainable return out_model
def senet_layer(x, nb_channels, ratio): xd = GlobalAveragePooling2D()(x) xd = Dense(int(nb_channels / ratio), activation='relu')(xd) xd = Dense(nb_channels, activation='sigmoid')(xd) return multiply([x, xd])
def build_model(self, input_shape): input_dim = input_shape[-1] output_dim = self.output_dim input_length = input_shape[1] hidden_dim = self.hidden_dim print "the input shape is ", input_shape, "hidden shape ", hidden_dim # print input_shape # print hidden_dim # raw_input("Verify Shapes") # x = K.variable(np.random.rand(1,input_shape[1],input_shape[2])) x = Input(batch_shape=input_shape) # Slicing doesn't work # slice_layer = Lambda(self.slice,output_shape=(1,hidden_dim)) # x_tm1 = slice_layer(x) #Transposing, forget it. # x_tm1 = K.transpose(x_tm1) #Does not work! # Let's try flattening inputs instead x_tm1 = Lambda(self.custom_flatten, output_shape=(input_shape[0], input_length*hidden_dim))(x) # x_tm1 = K.batch_flatten(x) h_tm1 = Input(batch_shape=(input_shape[0], hidden_dim)) c_tm1 = Input(batch_shape=(input_shape[0], hidden_dim)) # h_tm1 = K.variable(np.random.rand(1,hidden_dim)) # c_tm1 = K.variable(np.random.rand(1,hidden_dim)) W1 = Dense(hidden_dim * 4, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer, use_bias=False, input_shape=(hidden_dim*input_length,), name="W1") W2 = Dense(output_dim, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer) W3 = Dense(1, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer, use_bias=False, name="W3") U = Dense(hidden_dim * 4, kernel_initializer=self.kernel_initializer, kernel_regularizer=self.kernel_regularizer, use_bias=False, name="U") # print K.eval(x).shape # print K.eval(x_tm1).shape # print K.eval(h_tm1).shape # raw_input('check the dimenbasipon f0r x and h') # print "x_tm1" # print K.eval(x_tm1) # print K.eval(x_tm1).shape # raw_input("Berry Berry Berrifyxxxx") # print "W1 dot x_tm1" # print K.eval(W1(x_tm1)) # print K.eval(W1(x_tm1)).shape # raw_input("Berry Berry Berrify") z = add([W1(x_tm1), U(h_tm1)]) z0, z1, z2, z3 = get_slices_custom(z, 4, 4*hidden_dim) i = Activation(self.recurrent_activation)(z0) f = Activation(self.recurrent_activation)(z1) temp1 = multiply([f, c_tm1]) temp2 = multiply([i, Activation(self.activation)(z2)]) c = add([temp1, temp2]) # c = add([multiply([f, c_tm1]), multiply([i, Activation(self.activation)(z2)])]) o = Activation(self.recurrent_activation)(z3) h = multiply([o, Activation(self.activation)(c)]) # #Treating h as d_i (wrt Pointer Network nomenclature https://arxiv.org/pdf/1506.03134.pdf) H = Lambda(lambda x: K.repeat(x, input_length), output_shape=(input_length, input_dim))(h) _xH = concatenate([x, H]) _xH = Lambda(lambda x: K.reshape(x, (-1, input_dim + hidden_dim)), output_shape=(input_dim + hidden_dim,))(_xH) # print K.eval(_xH) # print K.eval(_xH).shape # raw_input("Verify Shapes _xH") alpha = W3(_xH) alpha = Lambda(lambda x: K.reshape(x, (-1, input_length)), output_shape=(input_length,))(alpha) #Transpose alpha = W2(alpha) alpha = Activation('softmax')(alpha) # softer = Lambda(self.custom_soft_max,output_shape=(input_length,)) # alphas = softer(alpha) return Model([x, h_tm1, c_tm1], [alpha, h, c])
def __init__(self, inputs=None, outputs=None, N=None, M=None, unroll=False, hdim=300, word2vec_dim=300, dropout_rate=0.2, **kwargs): # Load model from config if inputs is not None and outputs is not None: super(FastQA, self).__init__(inputs=inputs, outputs=outputs, **kwargs) return '''Dimensions''' B = None H = hdim W = word2vec_dim '''Inputs''' P = Input(shape=(N, W), name='P') Q = Input(shape=(M, W), name='Q') '''Word in question binary''' def wiq_feature(P, Q): ''' Binary feature mentioned in the paper. For each word in passage returns if that word is present in question. ''' slice = [] for i in range(N): word_sim = K.tf.equal(W, K.tf.reduce_sum( K.tf.cast(K.tf.equal(K.tf.expand_dims(P[:, i, :], 1), Q), K.tf.int32), axis=2)) question_sim = K.tf.equal(M, K.tf.reduce_sum(K.tf.cast(word_sim, K.tf.int32), axis=1)) slice.append(K.tf.cast(question_sim, K.tf.float32)) wiqout = K.tf.expand_dims(K.tf.stack(slice, axis=1), 2) return wiqout '''Word in question soft alignment''' def wiq_feature_soft(P,Q): pass wiq_p = Lambda(lambda arg: wiq_feature(arg[0], arg[1]))([P, Q]) wiq_q = Lambda(lambda q: K.tf.ones([K.tf.shape(Q)[0], M, 1], dtype=K.tf.float32))(Q) passage_input = P question_input = Q # passage_input = Lambda(lambda arg: concatenate([arg[0], arg[1]], axis=2))([P, wiq_p]) # question_input = Lambda(lambda arg: concatenate([arg[0], arg[1]], axis=2))([Q, wiq_q]) '''Encoding''' encoder = Bidirectional(LSTM(units=W, return_sequences=True, dropout=dropout_rate, unroll=unroll)) passage_encoding = passage_input passage_encoding = encoder(passage_encoding) passage_encoding = TimeDistributed( Dense(W, use_bias=False, trainable=True, weights=np.concatenate((np.eye(W), np.eye(W)), axis=1)))(passage_encoding) question_encoding = question_input question_encoding = encoder(question_encoding) question_encoding = TimeDistributed( Dense(W, use_bias=False, trainable=True, weights=np.concatenate((np.eye(W), np.eye(W)), axis=1)))(question_encoding) '''Attention over question''' # compute the importance of each step question_attention_vector = TimeDistributed(Dense(1))(question_encoding) question_attention_vector = Lambda(lambda q: keras.activations.softmax(q, axis=1))(question_attention_vector) # apply the attention question_attention_vector = Lambda(lambda q: q[0] * q[1])([question_encoding, question_attention_vector]) question_attention_vector = Lambda(lambda q: K.sum(q, axis=1))(question_attention_vector) question_attention_vector = RepeatVector(N)(question_attention_vector) '''Answer span prediction''' # Answer start prediction answer_start = Lambda(lambda arg: concatenate([arg[0], arg[1], arg[2]]))([ passage_encoding, question_attention_vector, multiply([passage_encoding, question_attention_vector])]) answer_start = TimeDistributed(Dense(W, activation='relu'))(answer_start) answer_start = TimeDistributed(Dense(1))(answer_start) answer_start = Flatten()(answer_start) answer_start = Activation('softmax')(answer_start) # Answer end prediction depends on the start prediction def s_answer_feature(x): maxind = K.argmax( x, axis=1, ) return maxind x = Lambda(lambda x: K.tf.cast(s_answer_feature(x), dtype=K.tf.int32))(answer_start) start_feature = Lambda(lambda arg: K.tf.gather_nd(arg[0], K.tf.stack( [K.tf.range(K.tf.shape(arg[1])[0]), K.tf.cast(arg[1], K.tf.int32)], axis=1)))([passage_encoding, x]) start_feature = RepeatVector(N)(start_feature) # Answer end prediction answer_end = Lambda(lambda arg: concatenate([ arg[0], arg[1], arg[2], multiply([arg[0], arg[1]]), multiply([arg[0], arg[2]]) ]))([passage_encoding, question_attention_vector, start_feature]) answer_end = TimeDistributed(Dense(W, activation='relu'))(answer_end) answer_end = TimeDistributed(Dense(1))(answer_end) answer_end = Flatten()(answer_end) answer_end = Activation('softmax')(answer_end) input_placeholders = [P, Q] inputs = input_placeholders outputs = [answer_start, answer_end] super(FastQA, self).__init__(inputs=inputs, outputs=outputs, **kwargs)
def mb_conv_block(inputs, block_args, activation, drop_rate=None, prefix='', freeze_bn=False): has_se = (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) bn_axis = 3 Dropout = get_dropout() filters = block_args.input_filters * block_args.expand_ratio if block_args.expand_ratio != 1: x = layers.Conv2D(filters, 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'expand_conv')(inputs) x = layers.BatchNormalization(axis=bn_axis, name=prefix + 'expand_bn')(x) x = layers.Activation(activation, name=prefix + 'expand_activation')(x) else: x = inputs # Depthwise Convolution x = layers.DepthwiseConv2D(block_args.kernel_size, strides=block_args.strides, padding='same', use_bias=False, depthwise_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'dwconv')(x) x = layers.BatchNormalization(axis=bn_axis, name=prefix + 'bn')(x) x = layers.Activation(activation, name=prefix + 'activation')(x) # Squeeze and Excitation phase if has_se: num_reduced_filters = max( 1, int(block_args.input_filters * block_args.se_ratio)) se_tensor = layers.GlobalAveragePooling2D(name=prefix + 'se_squeeze')(x) target_shape = ( 1, 1, filters) if backend.image_data_format() == 'channels_last' else ( filters, 1, 1) se_tensor = layers.Reshape(target_shape, name=prefix + 'se_reshape')(se_tensor) se_tensor = layers.Conv2D(num_reduced_filters, 1, activation=activation, padding='same', use_bias=True, kernel_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'se_reduce')(se_tensor) se_tensor = layers.Conv2D(filters, 1, activation='sigmoid', padding='same', use_bias=True, kernel_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'se_expand')(se_tensor) if backend.backend() == 'theano': # For the Theano backend, we have to explicitly make # the excitation weights broadcastable. pattern = ([True, True, True, False] if backend.image_data_format() == 'channels_last' else [True, False, True, True]) se_tensor = layers.Lambda( lambda x: backend.pattern_broadcast(x, pattern), name=prefix + 'se_broadcast')(se_tensor) x = layers.multiply([x, se_tensor], name=prefix + 'se_excite') # Output phase x = layers.Conv2D(block_args.output_filters, 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=prefix + 'project_conv')(x) # x = BatchNormalization(freeze=freeze_bn, axis=bn_axis, name=prefix + 'project_bn')(x) x = layers.BatchNormalization(axis=bn_axis, name=prefix + 'project_bn')(x) if block_args.id_skip and all( s == 1 for s in block_args.strides ) and block_args.input_filters == block_args.output_filters: if drop_rate and (drop_rate > 0): x = Dropout(drop_rate, noise_shape=(None, 1, 1, 1), name=prefix + 'drop')(x) x = layers.add([x, inputs], name=prefix + 'add') return x
def IntentConvNet(tokens_input=None, pos_input=None, static_embedding_layer=None, non_static_embedding_layer=None, num_classes=10): # Allocate space for the 3 channels static_channels = FILTER_SIZES[:] non_static_channels = FILTER_SIZES[:] time_steps = int(pos_input.shape[1]) input_dim = int(pos_input.shape[2]) embedding_input_dim = int(static_embedding_layer.shape[2]) pos_attn = Permute((2, 1))(pos_input) pos_attn = Reshape((input_dim, time_steps))(pos_attn) # pos_attn = Bidirectional(CuDNNLSTM(32))(pos_input) pos_attn = Dense(time_steps, activation='softmax')(pos_attn) # single layer perceptron pos_attn = Dropout(0.5)(pos_attn) pos_attn = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(pos_attn) pos_attn = RepeatVector(embedding_input_dim)(pos_attn) pos_attn = Permute((2, 1), name='pos_attention_vec')(pos_attn) static_attn_input = multiply([static_embedding_layer, pos_attn]) non_static_attn_input = multiply([static_embedding_layer, pos_attn]) for i, filter_size in enumerate(FILTER_SIZES): static_channels[i] = \ Conv1D(MAX_SEQUENCE_LENGTH, filter_size, activation='relu', padding='valid')(static_attn_input) static_channels[i] = \ MaxPooling1D(MAX_SEQUENCE_LENGTH - filter_size + 1) \ (static_channels[i]) if non_static_embedding_layer is not None: non_static_channels[i] = \ Conv1D(MAX_SEQUENCE_LENGTH, filter_size, activation='relu', padding='valid')(non_static_attn_input) non_static_channels[i] = \ MaxPooling1D(MAX_SEQUENCE_LENGTH - filter_size + 1) \ (non_static_channels[i]) static_conv = concatenate(static_channels) static_conv = Flatten()(static_conv) static_conv = Dropout(0.3)(static_conv) output_static = Dense(SENTENCE_DIM, activation='relu', name='static_output') \ (static_conv) if non_static_embedding_layer is not None: non_static_conv = concatenate(non_static_channels) non_static_conv = Flatten()(non_static_conv) non_static_conv = Dropout(0.3)(non_static_conv) output_non_static = Dense(SENTENCE_DIM, activation='relu', name='non_static_output') \ (non_static_conv) if non_static_embedding_layer is not None: x = concatenate([output_static, output_non_static]) else: x = output_static main_output = Dense(num_classes, activation='softmax', name='main_output')(x) model = Model(inputs=[tokens_input, pos_input], outputs=[main_output]) model.summary() return model