def __init__(self, attention_mask=None, num_attention_heads=1, size_per_head=512, query_act=None, key_act=None, value_act=None, attention_probs_dropout_prob=0.0, initializer_range=0.02, do_return_2d_tensor=False, from_seq_length=None, to_seq_length=None, W_regularizer=None, b_regularizer=None, W_constraint=None, b_constraint=None, bias=True, **kwargs): self.init = initializers.truncated_normal(stddev=initializer_range) self.attention_mask = attention_mask self.num_attention_heads = num_attention_heads self.size_per_head = size_per_head self.query_act = query_act self.key_act = key_act self.value_act = value_act self.attention_probs_dropout_prob = attention_probs_dropout_prob self.initializer_range = initializer_range self.do_return_2d_tensor = do_return_2d_tensor self.from_seq_length = from_seq_length self.to_seq_length = to_seq_length self.output_dim = num_attention_heads * size_per_head self.W_regularizer = regularizers.get(W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) self.bias = bias super(Attention, self).__init__(**kwargs)
def encoder_model(size_image, size_age_label, size_name_label, size_gender_label, num_input_channels, size_kernel, size_z, num_encoder_channels): # map the label of age + gender to size (size_image) kernel_initializer = initializers.truncated_normal(stddev=0.02) bias_initializer = initializers.constant(value=0.0) # input of input images input_images = Input(shape=(size_image, size_image, num_input_channels)) # input of age labels (use {function dupilicate_conv} to time the age_labels to match with images, then concatenate ) input_ages_conv = Input(shape=(1, 1, size_age_label)) # (1, 1, 10*tile_ratio) input_ages_conv_repeat = Lambda( duplicate_conv, output_shape=(size_image, size_image, size_age_label), arguments={'times': size_image})(input_ages_conv) #(128, 128, 10*tile_ratio) input_names_conv = Input(shape=(1, 1, size_name_label)) input_names_conv_repeat = Lambda(duplicate_conv, output_shape=(size_image, size_image, size_name_label), arguments={'times': size_image})(input_names_conv) input_genders_conv = Input(shape=(1, 1, size_gender_label)) input_genders_conv_repeat = Lambda(duplicate_conv, output_shape=(size_image, size_image, size_gender_label), arguments={'times': size_image })(input_genders_conv) current = Concatenate(axis=-1)([ input_images, input_ages_conv_repeat, input_names_conv_repeat, input_genders_conv_repeat ]) # E_conv layer + Batch Normalization num_layers = len(num_encoder_channels) for i in range(num_layers): name = 'E_conv' + str(i) current = Conv2D(filters=num_encoder_channels[i], kernel_size=(size_kernel, size_kernel), strides=(2, 2), padding='same', kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, name=name)(current) size_image = int(size_image / 2) current = Lambda(lrelu, output_shape=(size_image, size_image, int(current.shape[3])))(current) # current = Lambda(tf.contrib.layers.batch_norm, output_shape=(size_image, size_image, int(current.shape[3])), # arguments={'decay':0.9, 'epsilon': 1e-5, 'scale':True})(current) # reshape current = Flatten()(current) # fully connection layer kernel_initializer = initializers.random_normal(stddev=0.02) name = 'E_fc' current = Dense(units=size_z, activation='tanh', kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, name=name)(current) # output return Model(inputs=[ input_images, input_ages_conv, input_names_conv, input_genders_conv ], outputs=current)
def discriminator_model(): kernel_initializer = initializers.truncated_normal(stddev=0.02) bias_initializer = initializers.constant(value=0.0) inputs_img = Input(shape=(64, 64, 3)) current = Conv2D(filters=64, kernel_size=(5, 5), padding='same', strides=(2, 2), kernel_initializer=kernel_initializer, bias_initializer=bias_initializer)(inputs_img) current = Lambda(lrelu, output_shape=(32, 32, int(current.shape[3])))(current) current = Conv2D(filters=64 * 2, kernel_size=(5, 5), padding='same', strides=(2, 2), kernel_initializer=kernel_initializer, bias_initializer=bias_initializer)(current) # current = Lambda(tf.layers.batch_normalization, output_shape=(16, 16, int(current.shape[3])), # arguments={'momentum': 0.9, 'epsilon': 1e-5, 'scale': True})(current) current = Lambda(tf.contrib.layers.batch_norm, output_shape=(16, 16, int(current.shape[3])), arguments={ 'decay': 0.9, 'epsilon': 1e-5, 'scale': True })(current) current = Lambda(lrelu, output_shape=(16, 16, int(current.shape[3])))(current) current = Conv2D(filters=64 * 4, kernel_size=(5, 5), padding='same', strides=(2, 2), kernel_initializer=kernel_initializer, bias_initializer=bias_initializer)(current) # current = Lambda(tf.layers.batch_normalization, output_shape=(8, 8, int(current.shape[3])), # arguments={'momentum': 0.9, 'epsilon': 1e-5, 'scale': True})(current) current = Lambda(tf.contrib.layers.batch_norm, output_shape=(8, 8, int(current.shape[3])), arguments={ 'decay': 0.9, 'epsilon': 1e-5, 'scale': True })(current) current = Lambda(lrelu, output_shape=(8, 8, int(current.shape[3])))(current) current = Conv2D(filters=64 * 8, kernel_size=(5, 5), padding='same', strides=(2, 2), kernel_initializer=kernel_initializer, bias_initializer=bias_initializer)(current) # current = Lambda(tf.layers.batch_normalization, output_shape=(4, 4, int(current.shape[3])), # arguments={'momentum': 0.9, 'epsilon': 1e-5, 'scale': True})(current) current = Lambda(tf.contrib.layers.batch_norm, output_shape=(4, 4, int(current.shape[3])), arguments={ 'decay': 0.9, 'epsilon': 1e-5, 'scale': True })(current) current = Lambda(lrelu, output_shape=(4, 4, int(current.shape[3])))(current) kernel_initializer = initializers.random_normal(stddev=0.02) current = Reshape(target_shape=(4 * 4 * 512, ))(current) current = Dense(1, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer)(current) current = Activation('sigmoid')(current) return Model(inputs=inputs_img, outputs=current)
def discriminator_model(): kernel_initializer = initializers.truncated_normal(stddev=0.02) bias_initializer = initializers.constant(value=0.0) inputs_img = Input(shape=(28, 28, 1)) inputs_y = Input(shape=(10, )) input_y_conv = Input(shape=(1, 1, 10)) # current = Reshape((28, 28, 1))(inputs_img) inputs_y_repeat = Lambda(concatenate, output_shape=(28, 28, 10), arguments={'times': 28})(input_y_conv) current = Concatenate(axis=-1)([inputs_img, inputs_y_repeat]) current = Conv2D(filters=1 + 10, kernel_size=(5, 5), padding='same', strides=(2, 2), kernel_initializer=kernel_initializer, bias_initializer=bias_initializer)(current) current = Lambda(lrelu, output_shape=(14, 14, int(current.shape[3])))(current) inputs_y_repeat = Lambda(concatenate, output_shape=(14, 14, 10), arguments={'times': 14})(input_y_conv) current = Concatenate(axis=-1)([current, inputs_y_repeat]) current = Conv2D(filters=64 + 10, kernel_size=(5, 5), padding='same', strides=(2, 2), kernel_initializer=kernel_initializer, bias_initializer=bias_initializer)(current) # current = Lambda(tf.layers.batch_normalization, output_shape=(7, 7, int(current.shape[3])), # arguments={'momentum': 0.9, 'epsilon': 1e-5, 'scale': True})(current) current = Lambda(tf.contrib.layers.batch_norm, output_shape=(7, 7, int(current.shape[3])), arguments={ 'decay': 0.9, 'epsilon': 1e-5, 'scale': True })(current) current = Lambda(lrelu, output_shape=(7, 7, int(current.shape[3])))(current) kernel_initializer = initializers.random_normal(stddev=0.02) current = Reshape(target_shape=(7 * 7 * 74, ))(current) current = Concatenate(axis=-1)([current, inputs_y]) current = Dense(1024, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer)(current) # current = Lambda(tf.layers.batch_normalization, output_shape=(int(current.shape[1]),), # arguments={'momentum': 0.9, 'epsilon': 1e-5, 'scale': True})(current) current = Lambda(tf.contrib.layers.batch_norm, output_shape=(int(current.shape[1]), ), arguments={ 'decay': 0.9, 'epsilon': 1e-5, 'scale': True })(current) current = Lambda(lrelu, output_shape=(int(current.shape[1]), ))(current) current = Concatenate(axis=-1)([current, inputs_y]) current = Dense(1, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer)(current) current = Activation('sigmoid')(current) # conv1 = Conv2D(filters=64, kernel_size=(5, 5), padding="same")(x) # conv1 = LeakyReLU(alpha=0.2)(conv1) # Convolution2D is another name of Conv2D # conv1 = Convolution2D(filters=64, kernel_size=(5, 5), padding="same", activation="relu")(x) # max1 = MaxPooling2D(pool_size=(2, 2))(conv1) # conv2 = Conv2D(filters=128, kernel_size=(5, 5), padding="same")(max1) # conv2 = LeakyReLU(alpha=0.2)(conv2) # conv2 = Convolution2D(filters=128, kernel_size=(5, 5), padding="same")(max1) # max2 = MaxPooling2D(pool_size=(2, 2))(conv2) # flat = Flatten()(max2) # dense1 = Dense(units=1024, activation="relu")(flat) # # dense1 = LeakyReLU(alpha=0.2)(dense1) # dense2 = Dense(units=1, activation="sigmoid")(dense1) return Model(inputs=[inputs_img, inputs_y, input_y_conv], outputs=current)
def Transformer(input_tensor, attention_mask=None, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, intermediate_act_fn=None, hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, initializer_range=0.02, do_return_all_layers=False): if hidden_size % num_attention_heads != 0: raise ValueError( "The hidden size (%d) is not a multiple of the number of attention " "heads (%d)" % (hidden_size, num_attention_heads)) attention_head_size = int(hidden_size / num_attention_heads) input_shape = K.int_shape(input_tensor) # batch_size = input_shape[0] seq_length = input_shape[1] input_width = input_shape[2] # The Transformer performs sum residuals on all layers so the input needs # to be the same as the hidden size. if input_width != hidden_size: raise ValueError( "The width of the input tensor (%d) != hidden size (%d)" % (input_width, hidden_size)) # We keep the representation as a 2D tensor to avoid re-shaping it back and # forth from a 3D tensor to a 2D tensor. Re-shapes are normally free on # the GPU/CPU but may not be free on the TPU, so we want to minimize them to # help the optimizer. prev_output = reshape_to_matrix(input_tensor) all_layer_outputs = [] for layer_idx in range(num_hidden_layers): layer_input = prev_output attention_heads = [] attention_head = Attention( attention_mask=attention_mask, num_attention_heads=num_attention_heads, size_per_head=attention_head_size, query_act='gelu', key_act='gelu', value_act='gelu', attention_probs_dropout_prob=attention_probs_dropout_prob, initializer_range=initializer_range, do_return_2d_tensor=True, from_seq_length=seq_length, to_seq_length=seq_length)([layer_input, layer_input]) attention_heads.append(attention_head) attention_output = None if len(attention_heads) == 1: attention_output = attention_heads[0] else: # In the case where we have other sequences, we just concatenate # them to the self-attention head before the projection. attention_output = Concatenate(axis=-1)(attention_heads) # Run a linear projection of `hidden_size` then add a residual # with `layer_input`. attention_output = Dense( hidden_size, kernel_initializer=initializers.truncated_normal( stddev=initializer_range))(attention_output) attention_output = Dropout(hidden_dropout_prob)(attention_output) attention_output = Add()([attention_output, layer_input]) attention_output = LayerNormalization()(attention_output) # The activation is only applied to the "intermediate" hidden layer. intermediate_output = Dense( intermediate_size, activation=intermediate_act_fn, kernel_initializer=initializers.truncated_normal( stddev=initializer_range))(attention_output) # Down-project back to `hidden_size` then add the residual. layer_output = Dense( hidden_size, kernel_initializer=initializers.truncated_normal( stddev=initializer_range))(intermediate_output) layer_output = Dropout(hidden_dropout_prob)(layer_output) layer_output = Add()([layer_output, attention_output]) layer_output = LayerNormalization()(layer_output) prev_output = layer_output all_layer_outputs.append(layer_output) if do_return_all_layers: final_outputs = [] for layer_output in all_layer_outputs: final_output = reshape_from_matrix(layer_output, input_shape) final_outputs.append(final_output) return final_outputs else: final_output = reshape_from_matrix(prev_output, input_shape) return final_output
def discriminator_img_model(size_image, size_kernel, size_age_label, size_name_label, size_gender_label, num_input_channels, num_Dimg_channels, num_Dimg_fc_channels): kernel_initializer = initializers.truncated_normal(stddev=0.02) bias_initializer = initializers.constant(value=0.0) # Dimg model input_images = Input(shape=(size_image, size_image, num_input_channels)) # the label of age + gender input_ages_conv = Input(shape=(1, 1, size_age_label)) # (1, 1, 10*tile_ratio) input_ages_conv_repeat = Lambda( duplicate_conv, output_shape=(size_image, size_image, size_age_label), arguments={'times': size_image})(input_ages_conv) #(128, 128, 10*tile_ratio) input_names_conv = Input(shape=(1, 1, size_name_label)) input_names_conv_repeat = Lambda(duplicate_conv, output_shape=(size_image, size_image, size_name_label), arguments={'times': size_image})(input_names_conv) input_genders_conv = Input(shape=(1, 1, size_gender_label)) input_genders_conv_repeat = Lambda(duplicate_conv, output_shape=(size_image, size_image, size_gender_label), arguments={'times': size_image })(input_genders_conv) # concatenate current = Concatenate(axis=-1)([ input_images, input_ages_conv_repeat, input_names_conv_repeat, input_genders_conv_repeat ]) num_layers = len(num_Dimg_channels) # name = 'D_img_conv0' # current = Conv2D( # filters=num_Dimg_channels[0], # kernel_size=(size_kernel, size_kernel), # strides=(2, 2), # padding='same', # kernel_initializer=kernel_initializer, # bias_initializer=bias_initializer, # name=name)(current) # size_image = int(size_image / 2) # current = Lambda(tf.contrib.layers.batch_norm, output_shape=(size_image, size_image, int(current.shape[3])), # arguments={'decay':0.9, 'epsilon': 1e-5, 'scale':True})(current) # current = Lambda(lrelu, output_shape=(size_image, size_image, int(current.shape[3])))(current) # conv layers with stride 2 for i in range(num_layers): name = 'D_img_conv' + str(i) current = Conv2D(filters=num_Dimg_channels[i], kernel_size=(size_kernel, size_kernel), strides=(2, 2), padding='same', kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, name=name)(current) size_image = int(size_image / 2) # current = Lambda(tf.contrib.layers.batch_norm, output_shape=(size_image, size_image, int(current.shape[3])), # arguments={'decay':0.9, 'epsilon': 1e-5, 'scale':True})(current) current = Lambda(lrelu, output_shape=(size_image, size_image, int(current.shape[3])))(current) # current = Flatten()(current) current = Reshape(target_shape=(size_image * size_image * int(current.shape[3]), ))(current) # fully connection layer kernel_initializer = initializers.random_normal(stddev=0.02) name = 'D_img_fc1' current = Dense(units=num_Dimg_fc_channels, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, name=name)(current) current = Lambda(lrelu, output_shape=(num_Dimg_fc_channels, ))(current) name = 'D_img_fc2' current = Dense(units=1, activation='sigmoid', kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, name=name)(current) # output = Activation('sigmoid')(current) # output return Model(inputs=[ input_images, input_ages_conv, input_names_conv, input_genders_conv ], outputs=current)
def discriminator_img_model(size_image, size_kernel, size_age_label, size_name_label, size_gender_label, num_input_channels, num_Dimg_channels, num_Dimg_fc_channels, GANs): kernel_initializer = initializers.truncated_normal(stddev=0.02) bias_initializer = initializers.constant(value=0.0) # Dimg model input_images = Input(shape=(size_image, size_image, num_input_channels)) # the label of age + gender input_ages_conv = Input(shape=(1, 1, size_age_label)) # (1, 1, 10*tile_ratio) input_names_conv = Input(shape=(1, 1, size_name_label)) input_genders_conv = Input(shape=(1, 1, size_gender_label)) input_ages_conv_repeat = Lambda( duplicate_conv, output_shape=(size_image, size_image, size_age_label), arguments={'times': size_image})(input_ages_conv) #(128, 128, 10*tile_ratio) input_names_conv_repeat = Lambda(duplicate_conv, output_shape=(size_image, size_image, size_name_label), arguments={'times': size_image})(input_names_conv) input_genders_conv_repeat = Lambda(duplicate_conv, output_shape=(size_image, size_image, size_gender_label), arguments={'times': size_image })(input_genders_conv) # concatenate current = Concatenate(axis=-1)([ input_images, input_ages_conv_repeat, input_names_conv_repeat, input_genders_conv_repeat ]) num_layers = len(num_Dimg_channels) size_current = size_image # conv layers with stride 2 for i in range(num_layers): # if i == 0: # strides = 1 # else: # strides = 2 # size_image = int(size_image / 2) # # name = 'D_img_conv' + str(i) # current = Conv2D( # filters=num_Dimg_channels[i], # kernel_size=(size_kernel, size_kernel), # strides=(strides, strides), # padding='same', # kernel_initializer=kernel_initializer, # bias_initializer=bias_initializer, # name=name)(current) name = 'D_img_conv' + str(i) + str('_') + str(1) current = Conv2D(filters=num_Dimg_channels[i], kernel_size=(size_kernel, size_kernel), strides=(1, 1), padding='same', kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, name=name)(current) if i != 0: current = Lambda(tf.contrib.layers.batch_norm, output_shape=(size_image, size_image, int(current.shape[3])), arguments={ 'decay': 0.9, 'epsilon': 1e-5, 'scale': True })(current) current = Lambda(lrelu, output_shape=(size_image, size_image, int(current.shape[3])))(current) size_current = int(size_current / 2) name = 'D_img_conv' + str(i) + str('_') + str(2) current = Conv2D(filters=num_Dimg_channels[i], kernel_size=(size_kernel, size_kernel), strides=(2, 2), padding='same', kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, name=name)(current) if i != 0: current = Lambda(tf.contrib.layers.batch_norm, output_shape=(size_current, size_current, int(current.shape[3])), arguments={ 'decay': 0.9, 'epsilon': 1e-5, 'scale': True })(current) current = Lambda(lrelu, output_shape=(size_current, size_current, int(current.shape[3])))(current) # if i < num_layers - 1: # input_ages_conv_repeat = Lambda(duplicate_conv, output_shape=(size_current, size_current, size_age_label), # arguments={'times': size_current})(input_ages_conv) # (128, 128, 10*tile_ratio) # input_names_conv_repeat = Lambda(duplicate_conv, output_shape=(size_current, size_current, size_name_label), # arguments={'times': size_current})(input_names_conv) # input_genders_conv_repeat = Lambda(duplicate_conv, output_shape=(size_current, size_current, size_gender_label), # arguments={'times': size_current})(input_genders_conv) # current = Concatenate(axis=-1)([current, input_ages_conv_repeat, input_names_conv_repeat, input_genders_conv_repeat]) # Patch GAN_D # name = 'D_img_conv_final' # if GANs == 'LSGAN': # current = Conv2D( # filters=1, # kernel_size=(size_kernel, size_kernel), # strides=(1, 1), # padding='same', # kernel_initializer=kernel_initializer, # bias_initializer=bias_initializer, # name=name)(current) # elif GANs == 'cGAN': # current = Conv2D( # filters=1, # kernel_size=(size_kernel, size_kernel), # strides=(1, 1), # padding='same', # kernel_initializer=kernel_initializer, # bias_initializer=bias_initializer, # activation='sigmoid', # name=name)(current) name = 'D_img_conv_final_1' current = Flatten()(current) current = Dense(units=num_Dimg_fc_channels, name=name)(current) current = Lambda(lrelu, output_shape=(num_Dimg_fc_channels, ))(current) name = 'D_img_conv_final_2' if GANs == 'LSGAN': current = Dense(units=1, name=name)(current) elif GANs == 'cGAN': current = Dense(units=1, activation='sigmoid', name=name)(current) # output return Model(inputs=[ input_images, input_ages_conv, input_names_conv, input_genders_conv ], outputs=current)