def hieroRecoModel_offline(input_shape): """ Arguments: input_shape -- shape of the images of the dataset Returns: model -- a Model() instance in Keras """ X_input = Input(input_shape) # Zero-Padding X = ZeroPadding2D((3, 3))(X_input) # First Block X = Conv2D(64, (3, 3), strides=(2, 2), name='conv1')(X) X = BatchNormalization(axis=1, name='bn1')(X) X = Activation('relu')(X) X = MaxPooling2D((3, 3), strides=2)(X) X = Conv2D(64, (3, 3))(X) X = Activation('relu')(X) X = MaxPooling2D((3, 3), strides=2)(X) X = Flatten()(X) X = Dense(128, name='dense_layer')(X) # L2 normalization X = Lambda(lambda x: K.l2_normalize(x, axis=1))(X) features = Model(X_input, X, name="features") # Inputs of the siamese network anchor = Input(shape=input_shape) positive = Input(shape=input_shape) negative = Input(shape=input_shape) # Embedding Features of input anchor_features = features(anchor) pos_features = features(positive) neg_features = features(negative) input_triplet = [anchor, positive, negative] output_features = [anchor_features, pos_features, neg_features] # Define the trainable model loss_model = Model(inputs=input_triplet, outputs=output_features,name='loss') loss_model.add_loss(K.mean(triplet_loss(output_features))) loss_model.compile(loss=None,optimizer='adam') # Create model instance #model = Model(inputs=X_input, outputs=X, name='HieroRecoModel_off') return features, loss_model
def create_multi_input_model_from(layer1, layer2): input_1 = Input(shape=(data_dim,)) input_2 = Input(shape=(data_dim,)) out1 = layer1(input_1) out2 = layer2(input_2) out = Average()([out1, out2]) model = Model([input_1, input_2], out) model.add_loss(K.mean(out2)) model.add_loss(1) model.add_loss(1) return model
class kmn_many_sigma(object): def __init__(self, x_train, y_train, center_sampling_method='k_means', n_centers=20, sigmas=None, keep_edges=True, estimator=None, validation_set=None, batch_size=32): self.center_sampling_method = center_sampling_method self.n_centers = n_centers self.batch_size = batch_size if (sigmas is None): self.sigmas = np.ones(self.n_centers).astype('float32') else: self.n_sigma = len(sigmas) self.sigmas = np.repeat(sigmas, self.n_centers).astype('float32') self.keep_edges = keep_edges self.center_locs = sample_center_points(y_train, method=self.center_sampling_method, k=self.n_centers, keep_edges=self.keep_edges).astype('float32') self.n_data, self.n_features = x_train.shape self.train = [] self.train.append(x_train.reshape(self.n_data, self.n_features).astype('float32')) self.train.append(y_train.reshape(self.n_data, 1).astype('float32')) self.validation_present = False if (validation_set != None): self.validation_present = True x_val = validation_set['x'] y_val = validation_set['y'] self.n_data_val, _ = x_val.shape self.validation = [] self.validation.append(x_val.reshape(self.n_data_val, self.n_features).astype('float32')) self.validation.append(y_val.reshape(self.n_data_val, 1).astype('float32')) self.oneDivSqrtTwoPI = 1.0 / np.sqrt(2.0*np.pi) # normalisation factor for gaussian. def gaussian_distribution(self, y, mu, sigma): result = (y - mu) / sigma result = - 0.5 * (result * result) return (K.exp(result) / sigma) * self.oneDivSqrtTwoPI def mdn_loss_function(self, args): y, weights = args result = self.gaussian_distribution(y, self.center_locs, self.sigmas) * weights result = K.sum(result, axis=1) result = - K.log(result) return K.mean(result) def estimator_many_sigma(self, depth, n_filters, n_center, n_sigma): # Inputs input_x = Input(shape=(self.n_lambda,4), name='stokes_input') y_true = Input(shape=(1,), name='y_true') mu_input = Input(shape=(1,), name='mu_input') # Neural network x = Conv1D(n_filters, 3, activation='relu', padding='same', kernel_initializer='he_normal', name='conv_1')(input_x) for i in range(depth): x = residual(x, n_filters*(i+1), 'relu', strides=2) intermediate = Flatten(name='flat')(x) intermediate_conv = concatenate([intermediate, mu_input], name='FC') # Output weights weights = Dense(self.n_centers*self.n_sigma, activation='softmax', name='weights')(intermediate_conv) # Definition of the loss function loss = Lambda(self.mdn_loss_function, output_shape=(1,), name='loss')([y_true, weights]) self.model = Model(inputs=[input_x, y_true], outputs=[loss]) self.model.add_loss(loss) # Compile with the loss weight set to None, so it will be omitted self.model.compile(loss=[None], loss_weights=[None], optimizer=Adam(lr=0.01)) self.model.summary() # Now generate a second network that ends up in the weights for later evaluation layer_name = 'weights' self.output_weights = Model(inputs=self.model.input, outputs=self.model.get_layer(layer_name).output) def fit(self): self.estimator_many_sigma(self.n_centers, self.n_sigma) cb = CSVLogger("training.csv") self.model.fit(x=self.train, epochs=300, batch_size=3750, callbacks=[cb], validation_data=(self.validation, None)) def predict_density(self, x_test): y = np.linspace(-10,10,300) weights = self.model.predict(x_test) result = self.gaussian_distribution(torch.unsqueeze(y,1), self.center_locs, self.sigma) * weights result = torch.sum(result, dim=1) return y.data.numpy(), result def sample_density(self, x_test): test = [] test.append(x_test) test.append(x_test) weights = self.output_weights.predict(test) print(weights.shape) locs = self.center_locs sigma = self.sigmas n = len(x_test) out = np.zeros(n) for i in range(n): ind = np.random.choice(self.n_centers * self.n_sigma, p=weights[i,:]) out[i] = np.random.normal(loc=locs[ind], scale=sigma[ind]) return out def plot_loss(self): out = pd.read_csv('training.csv').as_matrix() f, ax = pl.subplots() ax.plot(out[:,1], label='Training set') if (self.validation_present): ax.plot(out[:,2], label='Validation set') ax.set_xlabel('Iteration') ax.set_ylabel('Loss') ax.legend()
# 整合模型(训练判别器) x_in = Input(shape=(img_dim, img_dim, 3)) z_in = Input(shape=(z_dim, )) g_model.trainable = False x_fake = g_model(z_in) x_real_score = d_model(x_in) x_fake_score = d_model(x_fake) d_train_model = Model([x_in, z_in], [x_real_score, x_fake_score]) d_loss = K.mean(x_fake_score - x_real_score) d_train_model.add_loss(d_loss) d_train_model.compile(optimizer=Adam(2e-4, 0.5)) # 整合模型(训练生成器) g_model.trainable = True d_model.trainable = False x_fake_score = d_model(g_model(z_in)) g_train_model = Model(z_in, x_fake_score) g_train_model.add_loss(K.mean(- x_fake_score)) g_train_model.compile(optimizer=Adam(2e-4, 0.5)) # 检查模型结构 d_train_model.summary()
# 训练模型 train_model = Model( bert.model.inputs + [subject_labels, subject_ids, object_labels], [subject_preds, object_preds]) mask = bert.model.get_layer('Sequence-Mask').output_mask subject_loss = K.binary_crossentropy(subject_labels, subject_preds) subject_loss = K.mean(subject_loss, 2) subject_loss = K.sum(subject_loss * mask) / K.sum(mask) object_loss = K.binary_crossentropy(object_labels, object_preds) object_loss = K.sum(K.mean(object_loss, 3), 2) object_loss = K.sum(object_loss * mask) / K.sum(mask) train_model.add_loss(subject_loss + object_loss) train_model.compile(optimizer=Adam(1e-5)) def extract_spoes(text): """抽取输入text所包含的三元组 """ tokens = tokenizer.tokenize(text, max_length=maxlen) token_ids, segment_ids = tokenizer.encode(text, max_length=maxlen) # 抽取subject subject_preds = subject_model.predict([[token_ids], [segment_ids]]) start = np.where(subject_preds[0, :, 0] > 0.6)[0] end = np.where(subject_preds[0, :, 1] > 0.5)[0] subjects = [] for i in start: j = end[end >= i]
def E2EModel(bert_config_path, bert_checkpoint_path, LR, num_rels): bert_model = load_trained_model_from_checkpoint(bert_config_path, bert_checkpoint_path, seq_len=None) for l in bert_model.layers: l.trainable = True tokens_in = Input(shape=(None, )) segments_in = Input(shape=(None, )) gold_sub_heads_in = Input(shape=(None, )) gold_sub_tails_in = Input(shape=(None, )) sub_head_in = Input(shape=(1, )) sub_tail_in = Input(shape=(1, )) gold_obj_heads_in = Input(shape=(None, num_rels)) gold_obj_tails_in = Input(shape=(None, num_rels)) tokens, segments, gold_sub_heads, gold_sub_tails, sub_head, sub_tail, \ gold_obj_heads, gold_obj_tails = tokens_in, segments_in, gold_sub_heads_in, \ gold_sub_tails_in, sub_head_in, sub_tail_in, \ gold_obj_heads_in, gold_obj_tails_in mask = Lambda( lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(tokens) tokens_feature = bert_model([tokens, segments]) pred_sub_heads = Dense(1, activation='sigmoid')(tokens_feature) pred_sub_tails = Dense(1, activation='sigmoid')(tokens_feature) subject_model = Model([tokens_in, segments_in], [pred_sub_heads, pred_sub_tails]) sub_head_feature = Lambda(seq_gather)([tokens_feature, sub_head]) sub_tail_feature = Lambda(seq_gather)([tokens_feature, sub_tail]) sub_feature = Average()([sub_head_feature, sub_tail_feature]) tokens_feature = Add()([tokens_feature, sub_feature]) pred_obj_heads = Dense(num_rels, activation='sigmoid')(tokens_feature) pred_obj_tails = Dense(num_rels, activation='sigmoid')(tokens_feature) object_model = Model([tokens_in, segments_in, sub_head_in, sub_tail_in], [pred_obj_heads, pred_obj_tails]) hbt_model = Model([ tokens_in, segments_in, gold_sub_heads_in, gold_sub_tails_in, sub_head_in, sub_tail_in, gold_obj_heads_in, gold_obj_tails_in ], [pred_sub_heads, pred_sub_tails, pred_obj_heads, pred_obj_tails]) gold_sub_heads = K.expand_dims(gold_sub_heads, 2) gold_sub_tails = K.expand_dims(gold_sub_tails, 2) sub_heads_loss = K.binary_crossentropy(gold_sub_heads, pred_sub_heads) sub_heads_loss = K.sum(sub_heads_loss * mask) / K.sum(mask) sub_tails_loss = K.binary_crossentropy(gold_sub_tails, pred_sub_tails) sub_tails_loss = K.sum(sub_tails_loss * mask) / K.sum(mask) obj_heads_loss = K.sum(K.binary_crossentropy(gold_obj_heads, pred_obj_heads), 2, keepdims=True) obj_heads_loss = K.sum(obj_heads_loss * mask) / K.sum(mask) obj_tails_loss = K.sum(K.binary_crossentropy(gold_obj_tails, pred_obj_tails), 2, keepdims=True) obj_tails_loss = K.sum(obj_tails_loss * mask) / K.sum(mask) loss = (sub_heads_loss + sub_tails_loss) + (obj_heads_loss + obj_tails_loss) hbt_model.add_loss(loss) hbt_model.compile(optimizer=Adam(LR)) hbt_model.summary() return subject_model, object_model, hbt_model
x = bert_model([x1, x2]) ps1 = Dense(1, use_bias=False)(x) ps1 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)([ps1, x_mask]) ps2 = Dense(1, use_bias=False)(x) ps2 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)([ps2, x_mask]) model = Model([x1_in, x2_in], [ps1, ps2]) train_model = Model([x1_in, x2_in, s1_in, s2_in], [ps1, ps2]) loss1 = K.mean(K.categorical_crossentropy(s1_in, ps1, from_logits=True)) ps2 -= (1 - K.cumsum(s1, 1)) * 1e10 loss2 = K.mean(K.categorical_crossentropy(s2_in, ps2, from_logits=True)) loss = loss1 + loss2 train_model.add_loss(loss) train_model.compile(optimizer=Adam(learning_rate)) train_model.summary() def softmax(x): x = x - np.max(x) x = np.exp(x) return x / np.sum(x) def extract_entity(text_in, c_in): if c_in not in classes: return 'NaN' # 文本和事件类型合并成一个字符串进行利用(博客中提到的) text_in = u'___%s___%s' % (c_in, text_in)
def vaegan_complete_model(original_dim=(64, 64, 3), batch_size=64, latent_dim=128, epochs=50, mse_flag=True, lr=0.0003): '''VAEGAN complete model.''' # VAE model = encoder + decoder # build encoder model input_shape = original_dim inputs = Input(shape=input_shape, name='encoder_input') x = Conv2D(64, (5, 5), strides=(2, 2), padding='same', name='enc_conv1')(inputs) x = BatchNormalization(name='enc_bn1')(x) #x = Activation('relu')(x) x = LeakyReLU(alpha=0.2, name='enc_LReLU1')(x) x = Conv2D(128, (5, 5), strides=(2, 2), padding='same', name='enc_conv2')(x) x = BatchNormalization(name='enc_bn2')(x) #x = Activation('relu')(x) x = LeakyReLU(alpha=0.2, name='enc_LReLU2')(x) x = Conv2D(256, (5, 5), strides=(2, 2), padding='same', name='enc_conv3')(x) x = BatchNormalization(name='enc_bn3')(x) #x = Activation('relu')(x) x = LeakyReLU(alpha=0.2, name='enc_LReLU3')(x) x = Flatten()(x) #x = Dense(2048, name = 'enc_dense1')(x) #x = BatchNormalization(name = 'enc_bn4')(x) #x = Activation('relu', name='z_mean')(x) #x = LeakyReLU(alpha = 0.2, name = 'enc_dense2')(x) x_mean = Dense(latent_dim, name='x_mean')(x) x_mean = BatchNormalization()(x_mean) z_mean = LeakyReLU(alpha=0.2, name='z_mean')(x_mean) x_log_var = Dense(latent_dim, name='x_log_var')(x) x_log_var = BatchNormalization()(x_log_var) z_log_var = LeakyReLU(alpha=0.2, name='z_log_var')(x_log_var) # use reparameterization trick to push the sampling out as input # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(sampling, output_shape=(latent_dim, ), name='z')([z_mean, z_log_var]) #encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') print('encoder') encoder.summary() #plot_model(encoder, to_file='vaegan_encoder_complete.png', show_shapes=True) # build decoder model latent_inputs = Input(shape=(latent_dim, ), name='z_sampling') x = Dense(8 * 8 * 256)(latent_inputs) x = BatchNormalization()(x) #x = Activation('relu')(x) x = LeakyReLU(alpha=0.2)(x) x = Reshape((8, 8, 256))(x) x = Conv2DTranspose(256, (5, 5), strides=(2, 2), padding='same')(x) x = BatchNormalization()(x) #x = Activation('relu')(x) x = LeakyReLU(alpha=0.2)(x) x = Conv2DTranspose(128, (5, 5), strides=(2, 2), padding='same')(x) x = BatchNormalization()(x) #x = Activation('relu')(x) x = LeakyReLU(alpha=0.2)(x) x = Conv2DTranspose(32, (5, 5), strides=(2, 2), padding='same')(x) x = BatchNormalization()(x) #x = Activation('relu')(x) x = LeakyReLU(alpha=0.2)(x) x = Conv2DTranspose(3, (5, 5), strides=(1, 1), padding='same')(x) outputs = Activation('tanh')(x) # instantiate decoder model decoder = Model(latent_inputs, outputs, name='decoder') print('decoder') decoder.summary() #plot_model(decoder, to_file='vaegan_decoder_complete.png', show_shapes=True) #instantiate discriminator x_recon = Input(shape=input_shape) #x = Conv2D(32,(5,5), strides =(2,2),padding='same')(x_recon) x = Conv2D(32, (5, 5), strides=(1, 1), padding='same')(x_recon) #x = BatchNormalization()(x) #x = Activation('relu')(x) x = LeakyReLU(alpha=0.2)(x) x = Conv2D(128, (5, 5), strides=(2, 2), padding='same')(x) x = BatchNormalization()(x) #x = Activation('relu')(x) x = LeakyReLU(alpha=0.2)(x) x = Conv2D(256, (5, 5), strides=(2, 2), padding='same')(x) x = BatchNormalization()(x) #x = Activation('relu')(x) x = LeakyReLU(alpha=0.2)(x) l_layer = Conv2D(256, (5, 5), strides=(2, 2), padding='same')(x) l_layer_shape = (8, 8, 256) input_disc2 = Input(shape=l_layer_shape) x = BatchNormalization()(input_disc2) #x = BatchNormalization()(l_layer) #x = Activation('relu')(x) x = LeakyReLU(alpha=0.2)(x) x = Flatten()(x) x = Dense(512)(x) x = BatchNormalization()(x) #x = Activation('relu')(x) x = LeakyReLU(alpha=0.2)(x) x = Dense(1)(x) output_dis = Activation('sigmoid')(x) #discriminator_2 = Model(input_disc2, output_dis, name='discriminator_1') '''construct discriminator with l_layer output''' discriminator_l = Model(x_recon, l_layer, name='discriminator_l') print('discriminator_l') discriminator_l.summary() ''' construct discriminator second part''' discriminator_2 = Model(input_disc2, output_dis, name='discriminator_2') print('discriminator_2') discriminator_2.summary() ''' construct discriminator (discriminator trainable) ''' discriminator = Model(x_recon, discriminator_2(discriminator_l(x_recon)), name='discriminator') print('discriminator') #optimizer = RMSprop(lr=lr) discriminator.compile(loss='binary_crossentropy', optimizer=RMSprop(lr=lr), metrics=['accuracy']) print('discriminator') discriminator.summary() '''construct model 1 (encoder trainable) ''' encoder.trainable = True decoder.trainable = False discriminator_l.trainable = False discriminator_2.trainable = False print('encoder_model_try') disc_xtilde = discriminator_l(decoder(encoder(inputs)[2])) disc_x = discriminator_l(inputs) out_recon = decoder(encoder(inputs)[2]) model1_enc = Model(inputs, [discriminator_2(disc_x), discriminator_2(disc_xtilde)], name='model_encoder1') model1_enc.summary() plot_model(model1_enc, to_file='model1_enc.png', show_shapes=True) ''' model1_enc = Model(inputs, discriminator_l(decoder(encoder(inputs)[2])), name='model1_encoder') print('model1 encoder trainable') plot_model(model1_enc, to_file='model1_enc.png', show_shapes=True) ''' '''Define losses for encoder parameter update''' reconstruction_loss = nll_loss(disc_x, disc_xtilde) #reconstruction_loss *= original_dim[0]*original_dim[1]*original_dim[2] #recon_mse = mse(inputs,out_recon) #recon_mse *= original_dim[0]*original_dim[1]*original_dim[2] kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 #vae_loss = K.mean(reconstruction_loss + kl_loss+recon_mse) vae_loss = K.mean(reconstruction_loss + kl_loss) model1_enc.add_loss(vae_loss) model1_enc.compile(optimizer=RMSprop(lr=lr * (0.5))) #model1_enc.compile(optimizer=RMSprop(lr=0.003*0.001)) #model1_enc.summary() ''' construct model 2 (decoder trainable) ''' encoder.trainable = False decoder.trainable = True discriminator_l.trainable = False discriminator_2.trainable = False zp = Input(shape=(latent_dim, ), name='zp') out_zp = discriminator_2(discriminator_l(decoder(zp))) model2_dec = Model( [inputs, zp], [discriminator_2(disc_x), discriminator_2(disc_xtilde), out_zp], name='model2_encoder') print('model2 decoder trainable') model2_dec.summary() plot_model(model2_dec, to_file='model2_dec.png', show_shapes=True) #reconstruction_loss = nll_loss(disc_x,disc_xtilde) #reconstruction_loss *= original_dim[0]*original_dim[1]*original_dim[2] gamma = 1e-6 #vae_loss = K.mean(reconstruction_loss + kl_loss) #gan_real_loss = binary_crossentropy(K.ones_like(discriminator_2(disc_x)),discriminator_2(disc_x)) gan_fake_loss1 = binary_crossentropy( K.ones_like(discriminator_2(disc_xtilde)), discriminator_2(disc_xtilde)) gan_fake_loss2 = binary_crossentropy(K.ones_like(out_zp), out_zp) #gan_fake_loss1 = binary_crossentropy(K.zeros_like(discriminator_2(disc_xtilde)),discriminator_2(disc_xtilde)) #gan_fake_loss2 = binary_crossentropy(K.zeros_like(out_zp),out_zp) gan_fake_loss = K.mean(gan_fake_loss1 + gan_fake_loss2) #dec_loss = K.mean(gamma*reconstruction_loss - gan_fake_loss) #dec_loss = gamma*reconstruction_loss - gan_fake_loss dec_loss = gamma * reconstruction_loss + gan_fake_loss model2_dec.add_loss(dec_loss) model2_dec.compile(optimizer=RMSprop(lr=lr)) #optimizer = RMSprop(lr=lr) #discriminator.compile(loss='binary_crossentropy', # optimizer=optimizer, # metrics=['accuracy']) #print('discriminator') return encoder, decoder, discriminator, model1_enc, model2_dec
def wgangp_conditional(h=128, w=128, c=3, latent_dim=2, condition_dim=10, epsilon_std=1.0, dropout_rate=0.1, GRADIENT_PENALTY_WEIGHT=10): optimizer_g = AdamWithWeightnorm(lr=0.0001, beta_1=0.5) optimizer_d = AdamWithWeightnorm(lr=0.0001, beta_1=0.5) optimizer_c = AdamWithWeightnorm(lr=0.0001, beta_1=0.5) t_h, t_w = h // 16, w // 16 generator = residual_decoder(t_h, t_w, c=c, latent_dim=latent_dim + condition_dim, dropout_rate=dropout_rate) discriminator = residual_discriminator(h=h, w=w, c=c, dropout_rate=dropout_rate, return_hidden=True) classifier = residual_discriminator(h=h, w=w, c=c, dropout_rate=dropout_rate, as_classifier=condition_dim, return_hidden=True) for layer in discriminator.layers: layer.trainable = False discriminator.trainable = False for layer in classifier.layers: layer.trainable = False classifier.trainable = False generator_input = Input(shape=(latent_dim + condition_dim, )) generator_layers = generator(generator_input) discriminator_layers_for_generator = discriminator(generator_layers)[0] classifier_layers_for_generator = classifier(generator_layers)[0] generator_model = Model(inputs=[generator_input], outputs=[ discriminator_layers_for_generator, classifier_layers_for_generator ]) generator_model.add_loss(K.mean(discriminator_layers_for_generator)) generator_model.compile(optimizer=optimizer_g, loss=[None, 'categorical_crossentropy']) # Now that the generator_model is compiled, we can make the discriminator layers trainable. for layer in discriminator.layers: layer.trainable = True for layer in generator.layers: layer.trainable = False discriminator.trainable = True generator.trainable = False # The discriminator_model is more complex. It takes both real image samples and random noise seeds as input. # The noise seed is run through the generator model to get generated images. Both real and generated images # are then run through the discriminator. Although we could concatenate the real and generated images into a # single tensor, we don't (see model compilation for why). real_samples = Input(shape=(h, w, c)) generator_input_for_discriminator = Input(shape=(latent_dim + condition_dim, )) generated_samples_for_discriminator = generator( generator_input_for_discriminator) discriminator_output_from_generator = discriminator( generated_samples_for_discriminator)[0] discriminator_output_from_real_samples, d0, d1, d2 = discriminator( real_samples) classifier_output_from_real_samples, c0, c1, c2 = classifier(real_samples) ds = K.concatenate([K.flatten(d0), K.flatten(d1), K.flatten(d2)], axis=-1) cs = K.concatenate([K.flatten(c0), K.flatten(c1), K.flatten(c2)], axis=-1) c_loss = .1 * K.mean(K.square(ds - cs)) averaged_samples = RandomWeightedAverage()( [real_samples, generated_samples_for_discriminator]) averaged_samples_out = discriminator(averaged_samples)[0] discriminator_model = Model( [real_samples, generator_input_for_discriminator], [ discriminator_output_from_real_samples, discriminator_output_from_generator, averaged_samples_out ]) discriminator_model.add_loss( K.mean(discriminator_output_from_real_samples) - K.mean(discriminator_output_from_generator) + gradient_penalty_loss( averaged_samples_out, averaged_samples, GRADIENT_PENALTY_WEIGHT)) discriminator_model.add_loss(c_loss, inputs=[discriminator]) discriminator_model.compile(optimizer=optimizer_d, loss=None) for layer in classifier.layers: layer.trainable = True classifier.trainable = True classifier_model = Model([real_samples], [classifier_output_from_real_samples]) classifier_model.add_loss(c_loss, inputs=[classifier]) classifier_model.compile(optimizer=optimizer_c, loss='categorical_crossentropy') return generator_model, discriminator_model, classifier_model, generator, discriminator, classifier
class AutoEncoder: """ Auto Encoder class. """ def __init__(self, input_shape, latent_dim, learning_rate=0.0005): self.input_shape = input_shape # (w,h,c) self.latent_dim = latent_dim # n # Auto Encoder self.encoder = self._build_encoder() self.encoder_input = Input(shape=self.input_shape) self.encoder_mean_output, self.encoder_logvar_output = self.encoder( self.encoder_input) self.decoder = self._build_decoder() self.decoder_input = Input(shape=(self.latent_dim, )) self.decoder_output = self.decoder(self.encoder_mean_output) # Generator self.latent_output = Lambda(sampling)( [self.encoder_mean_output, self.encoder_logvar_output]) self.gen_output = self.decoder(self.latent_output) # Critic self.critic = self._build_critic() # Disable the generator self.critic.trainable = True self.encoder.trainable = False self.decoder.trainable = False # Critic trainer self.h1_real, self.h2_real, self.h3_real, self.critic_output_real = self.critic( self.encoder_input) self.h1_fake, self.h2_fake, self.h3_fake, self.critic_output_fake = self.critic( self.gen_output) self.critic_trainer = Model( self.encoder_input, [self.critic_output_real, self.critic_output_fake]) critic_loss = self._critic_loss() self.critic_trainer.add_loss(K.mean(critic_loss)) self.critic_trainer.compile(optimizer=RMSprop(lr=learning_rate)) self.critic_trainer.summary() # Disable the critic and re-enable the generator self.critic.trainable = False self.encoder.trainable = True self.decoder.trainable = True # Generator trainer self.gen_trainer = Model( self.encoder_input, [self.critic_output_real, self.critic_output_fake]) gen_loss = self._gen_loss() self.gen_trainer.add_loss(K.mean(gen_loss)) self.gen_trainer.compile(optimizer=RMSprop(lr=learning_rate)) self.gen_trainer.summary() # Reconstruction prediction self.rec_sample = K.function([self.encoder_input], [self.decoder_output]) # Generate prediction self.gen_sample = K.function([self.decoder_input], [self.decoder(self.decoder_input)]) # Compute discriminator score (by means of the distance) self.compute_score = K.function([self.encoder_input], [gen_loss]) def _build_encoder(self): # Input encoder_input = Input(shape=self.input_shape) # Encoder h = Conv2D(64, 5, strides=2, padding='same')(encoder_input) h = Activation('relu')(h) h = Conv2D(128, 5, strides=2, padding='same')(h) h = BatchNormalization(momentum=0.8)(h) h = Activation('relu')(h) h = Conv2D(256, 5, strides=2, padding='same')(h) h = BatchNormalization(momentum=0.8)(h) h = Activation('relu')(h) h = Flatten()(h) encoder_mean_output = Dense(self.latent_dim)(h) encoder_logvar_output = Dense(self.latent_dim)(h) # Model return Model(encoder_input, [encoder_mean_output, encoder_logvar_output]) def _build_decoder(self): # Input decoder_input = Input(shape=(self.latent_dim, )) # Decoder h = Dense(self.input_shape[0] * self.input_shape[1] // 2**6 * 256, activation='relu')(decoder_input) h = Reshape( (self.input_shape[0] // 2**3, self.input_shape[1] // 2**3, 256))(h) h = Conv2DTranspose(256, 5, strides=2, padding='same')(h) h = BatchNormalization()(h) h = Activation('relu')(h) h = Conv2DTranspose(128, 5, strides=2, padding='same')(h) h = BatchNormalization()(h) h = Activation('relu')(h) h = Conv2DTranspose(64, 5, strides=2, padding='same')(h) h = Activation('relu')(h) decoder_output = Conv2D(self.input_shape[2], 5, padding='same')(h) # linear activation # Model return Model(decoder_input, decoder_output) def _build_critic(self): # Input critic_input = Input(shape=self.input_shape) # Critic h = Conv2D(64, 5, strides=2, padding='same')(critic_input) h1 = LeakyReLU(alpha=0.2)(h) h = Conv2D(128, 5, strides=2, padding='same')(h1) h = BatchNormalization()(h) h2 = LeakyReLU(alpha=0.2)(h) h = Conv2D(256, 5, strides=2, padding='same')(h2) h = BatchNormalization()(h) h3 = LeakyReLU(alpha=0.2)(h) h = Flatten()(h3) critic_output = Dense(1)(h) # Model return Model(critic_input, [h1, h2, h3, critic_output]) def _critic_loss(self): true_loss = K.mean(K.square(self.critic_output_real - 1.), axis=-1) false_loss = K.mean(K.square(self.critic_output_fake), axis=-1) return true_loss + false_loss def _gen_loss(self): kl_loss = -0.5 * K.sum(1 + self.encoder_logvar_output - K.square( self.encoder_mean_output) - K.exp(self.encoder_logvar_output), axis=-1) gen_loss = K.mean(K.square(self.critic_output_real - self.critic_output_fake), axis=-1) rec_loss = K.mean(K.abs(self.h1_real - self.h1_fake), axis=[1, 2, 3]) \ + K.mean(K.abs(self.h2_real - self.h2_fake), axis=[1, 2, 3]) \ + K.mean(K.abs(self.h3_real - self.h3_fake), axis=[1, 2, 3]) return 0.01 * kl_loss + gen_loss + rec_loss def _reconstruct_samples(self, data_gen, vis_id=0): x, _ = data_gen.next() x_gen = (self.rec_sample([x])[0] * 255.).astype('int') if x.shape[-1] > 1 else self.rec_sample( [x])[0] f = plt.figure() plt.clf() for i in range(min(x.shape[0], 25)): plt.subplot(5, 5, i + 1) plt.imshow(x[i]) if x.shape[-1] > 1 else plt.imshow( np.squeeze(x[i]), cmap='gray') plt.axis('off') f.canvas.draw() plt.savefig('real_samples_e%i.eps' % vis_id) plt.close() f = plt.figure() plt.clf() for i in range(min(x.shape[0], 25)): plt.subplot(5, 5, i + 1) plt.imshow(x_gen[i]) if x.shape[-1] > 1 else plt.imshow( np.squeeze(x_gen[i]), cmap='gray') plt.axis('off') f.canvas.draw() plt.savefig('fake_samples_e%i.eps' % vis_id) plt.close() def _generate_samples(self, vis_id=0): n = np.random.randn(25, self.latent_dim) #n = np.ones(shape = (25, self.latent_dim)) * 0.5 #n[..., 8] = np.linspace(-10, 10, 25) # change x_gen = self.gen_sample([n])[0] f = plt.figure() plt.clf() for i in range(min(n.shape[0], 25)): plt.subplot(5, 5, i + 1) plt.imshow(x_gen[i]) if x_gen.shape[-1] > 1 else plt.imshow( np.squeeze(x_gen[i]), cmap='gray') plt.axis('off') f.canvas.draw() plt.savefig('generated_samples_e%i.eps' % vis_id) plt.close() def train(self, train_dir, val_dir, epochs=10, batch_size=64): # Generators color_mode = 'rgb' if self.input_shape[-1] > 1 else 'grayscale' datagen = ImageDataGenerator(rescale=1. / 255, fill_mode='constant') train_gen = datagen.flow_from_directory( train_dir, target_size=self.input_shape[:2], interpolation='bilinear', color_mode=color_mode, class_mode='categorical', batch_size=batch_size) val_gen = datagen.flow_from_directory(val_dir, target_size=self.input_shape[:2], interpolation='bilinear', color_mode=color_mode, class_mode='categorical', batch_size=batch_size) steps_per_epoch = (np.ceil(train_gen.n / batch_size)).astype('int') for i in range(epochs): print('Epoch %i/%i' % (i + 1, epochs)) pbar = Progbar(steps_per_epoch) self._reconstruct_samples(val_gen, i) for j in range(steps_per_epoch): x, _ = train_gen.next() critic_loss = self.critic_trainer.train_on_batch(x=x, y=None) gen_loss = self.gen_trainer.train_on_batch(x=x, y=None) pbar.update(j + 1, [('critic loss', critic_loss), ('generator loss', gen_loss)]) # Save weights self.encoder.save_weights('./encoder.h5') self.decoder.save_weights('./decoder.h5') self.critic.save_weights('./critic.h5') def restore_weights(self): self.encoder.load_weights('./encoder.h5') self.decoder.load_weights('./decoder.h5') self.critic.load_weights('./critic.h5') def reconstruct_samples(self, dir, vis_id=0): color_mode = 'rgb' if self.input_shape[-1] > 1 else 'grayscale' datagen = ImageDataGenerator(rescale=1. / 255, fill_mode='constant') gen = datagen.flow_from_directory(dir, target_size=self.input_shape[:2], interpolation='bilinear', color_mode=color_mode, class_mode='categorical', batch_size=25) self._reconstruct_samples(gen, vis_id) def generate_samples(self, vis_id=0): self._generate_samples(vis_id) def compute_distance(self, dir, vis_id=0): color_mode = 'rgb' if self.input_shape[-1] > 1 else 'grayscale' datagen = ImageDataGenerator(rescale=1. / 255, fill_mode='constant') gen = datagen.flow_from_directory(dir, target_size=self.input_shape[:2], interpolation='bilinear', color_mode=color_mode, class_mode='categorical', batch_size=25) x, _ = gen.next() dist = self.compute_score([x])[0] f = plt.figure() plt.clf() for i in range(min(x.shape[0], 25)): plt.subplot(5, 5, i + 1) plt.imshow(x[i]) if x.shape[-1] > 1 else plt.imshow( np.squeeze(x[i]), cmap='gray') plt.title('d_%.3f' % dist[i]) plt.axis('off') f.canvas.draw() plt.savefig('distance_samples_e%i.eps' % vis_id) plt.close()
class CustomVae(VanillaVae): ''' An altered version of VanillaVae that allows us to investigate: - whether we have posterior collapse - try using a different reconstruction loss metric - try using different values to weight the recon loss vs KL div, if time ''' def __init__(self, input_dim, intermediate_dim, latent_dim, fn, recon_type='mse', beta=1.0): self.recon_loss = None self.kl_loss = None self.total_loss = None self.recon_type = recon_type # nothing is done with this yet self.beta = beta self.z_mean = None self.z_log_var = None self.fn = fn # Encoder inputs = Input(shape=(input_dim, )) h = Dense(intermediate_dim, activation='relu')(inputs) z_mean = Dense(latent_dim)(h) z_log_var = Dense(latent_dim)(h) # Not sure if this works self.z_mean = z_mean self.z_log_var = z_log_var # Latent space args = [z_mean, z_log_var] z = Lambda(self.sampling, output_shape=(latent_dim, ))(args) self.encoder = Model(inputs, z_mean) # Decoder decoder_inputs = Input(shape=(latent_dim, )) decoder_h = Dense(intermediate_dim, activation='relu')(decoder_inputs) outputs = Dense(input_dim, activation='sigmoid')(decoder_h) self.decoder = Model(decoder_inputs, outputs) # end-to-end vae vae_outputs = self.decoder(z) self.vae = Model(inputs, vae_outputs) # Setup and compile self.vae.add_loss( self.vae_loss(inputs, vae_outputs, input_dim, z_mean, z_log_var)) self.vae.compile( optimizer='adam', metrics=[ ] # this doesn't actually work; metrics get ignored when using add_loss, see keras issue 9459 ) self.vae.metrics_tensors.append( CustomVae.calc_mse_alone(inputs, vae_outputs, input_dim)) self.vae.metrics_names.append("mse") self.vae.metrics_tensors.append(self.calc_kl_alone(beta=self.beta)) self.vae.metrics_names.append("kl") def calc_kl_alone(self, beta=1.0): kl_loss = 1 + self.z_log_var - K.square(self.z_mean) - K.exp( self.z_log_var) kl_loss = -0.5 * K.sum(kl_loss, axis=-1) kl_loss = K.mean(beta * kl_loss) return kl_loss @staticmethod def calc_mse_alone(input_x, output_x, original_dim): # original_dim = 128 * 128 return K.mean(mse(input_x, output_x) * original_dim) def vae_loss(self, inputs, outputs, original_dim, z_mean, z_log_var): """ VAE loss = mse_loss (reconstruction) + kl_loss Note - it may not make sense to use cross-ent loss here if the input images are not binarized!! beta is a weight that we put on the kl_loss component. Defaults to 1. TODO: add xent to this """ self.recon_loss = mse(inputs, outputs) * original_dim kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) self.kl_loss = -0.5 * K.sum(kl_loss, axis=-1) self.total_loss = K.mean(self.recon_loss + self.beta * self.kl_loss) return self.total_loss def fit(self, val_split, epochs, batch_size, save_dir=None, fn=None): """ Train the model and save the weights if a `save_dir` is set. """ if save_dir: if not os.path.exists(save_dir): os.makedirs(save_dir) temp_fn = "incomplete_" + fn # Custom callback to keep track of KL and Recon loss split # during training split_recon_kl = SplitReconKL(fn=self.fn) # Setup checkpoint to save best model callbacks = [ ModelCheckpoint(save_dir + temp_fn, monitor='val_loss', verbose=1, save_best_only=True), split_recon_kl ] if save_dir else [] start = time.time() history = self.vae.fit(self.x_train, epochs=epochs, batch_size=batch_size, validation_split=val_split, shuffle=True, callbacks=callbacks, verbose=1) print("Total train time: {0:.2f} sec".format(time.time() - start)) print('Recon losses saved by callback:') print(split_recon_kl.recon_losses) print('Total losses saved by callback:') print(split_recon_kl.total_losses) if save_dir: # Rename to proper filename after all epochs successfully run os.rename(save_dir + temp_fn, save_dir + fn) self.vae.save_weights(save_dir + fn) print("Saved final weights to {}".format(save_dir + fn)) return history
def build_model(cfg, summary=False, word_embedding_matrix=None): def _get_model(base_dir, cfg_=None): if "albert" in cfg["verbose"].lower(): from bert4keras.bert import build_bert_model config_file = os.path.join(base_dir, 'albert_config.json') checkpoint_file = os.path.join(base_dir, 'model.ckpt-best') model = build_bert_model(config_path=config_file, checkpoint_path=checkpoint_file, model='albert', return_keras_model=True) if cfg_["cls_num"] > 1: output = Concatenate(axis=-1)([ model.get_layer( "Encoder-1-FeedForward-Norm").get_output_at(-i) for i in range(1, cfg["cls_num"] + 1) ]) model = Model(model.inputs[:2], outputs=output) model.trainable = cfg_["bert_trainable"] else: config_file = os.path.join(base_dir, 'bert_config.json') checkpoint_file = os.path.join(base_dir, 'bert_model.ckpt') if not os.path.exists(config_file): config_file = os.path.join(base_dir, 'bert_config_large.json') checkpoint_file = os.path.join(base_dir, 'roberta_l24_large_model') model = load_trained_model_from_checkpoint( config_file, checkpoint_file, training=False, trainable=cfg_["bert_trainable"], output_layer_num=cfg_["cls_num"], seq_len=cfg_['maxlen']) # model = Model(inputs=model.inputs[: 2], outputs=model.layers[-7].output) return model def _get_opt(num_example, warmup_proportion=0.1, lr=2e-5, min_lr=None): total_steps, warmup_steps = calc_train_steps( num_example=num_example, batch_size=B_SIZE, epochs=MAX_EPOCH, warmup_proportion=warmup_proportion, ) opt = AdamWarmup(total_steps, warmup_steps, lr=lr, min_lr=min_lr) if cfg.get("accum_step", None) and cfg["accum_step"] > 1: print("[!] using accum_step = {}".format(cfg["accum_step"])) from accum_optimizer import AccumOptimizer opt = AccumOptimizer(opt, steps_per_update=cfg["accum_step"]) return opt bert_model = _get_model(cfg["base_dir"], cfg) if word_embedding_matrix is not None: embed = Embedding(input_dim=word_embedding_matrix.shape[0], output_dim=word_embedding_matrix.shape[1], weights=[word_embedding_matrix], trainable=cfg["trainable"], name="char_embed") t1_in = Input(shape=(None, )) t2_in = Input(shape=(None, )) o1_in = Input(shape=(1, )) o2_in = Input(shape=(1, )) t1, t2, o1, o2 = t1_in, t2_in, o1_in, o2_in t = bert_model([t1, t2]) mask = Lambda(lambda x: K.cast(K.not_equal(x, cfg["x_pad"]), 'float32'))( t1) ## Char information if word_embedding_matrix is not None: word_embed = embed(t1) if cfg.get("use_embed_v2", False): _t2 = Lambda(lambda x: K.expand_dims(x, axis=-1))(t2) word_embed = Concatenate(axis=-1)([word_embed, _t2]) word_embed = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))( [word_embed, mask]) word_embed = Bidirectional(LSTM(cfg["unit1"], return_sequences=True), merge_mode="sum")(word_embed) word_embed = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))( [word_embed, mask]) t = Concatenate(axis=-1)([t, word_embed]) t = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))([t, mask]) t = Bidirectional(LSTM(cfg["unit3"], return_sequences=True), merge_mode="concat")(t) # t = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))([t, mask]) # t = Conv1D(cfg["conv_num"], kernel_size=3, padding="same")(t) t = Lambda(lambda x: x[:, 0, :], name="extract_layer")(t) if cfg.get("num_class", 1) == 2: po1_logit = Dense(1, name="po1_logit")(t) po1 = Activation('sigmoid', name="po1")(po1_logit) train_model = Model(inputs=[t1_in, t2_in, o1_in], outputs=[po1]) o1_loss = K.binary_crossentropy(o1, po1) loss = K.mean(o1_loss) else: po1_logit = Dense(cfg["num_class"], name="po1_logit")(t) po1 = Activation('softmax', name="po1")(po1_logit) train_model = Model(inputs=[t1_in, t2_in, o1_in], outputs=[po1]) loss = K.categorical_crossentropy(o1, po1, axis=-1) loss = K.mean(loss) train_model.add_loss(loss) opt = _get_opt(num_example=cfg["num_example"], lr=cfg["lr"], min_lr=cfg['min_lr']) train_model.compile(optimizer=opt) if summary: train_model.summary() return train_model
def _build_model(self): rpn_trainable = self.config.training_mode in ['rpn_only', 'all'] head_trainable = self.config.training_mode in ['head_only', 'all'] # backbone network backbone_in, backbone_out = self._model_backbone_headless() # rpn normalized_rois, rpn_offsets, objects, objects_logit \ = self._nn_rpn(backbone_out, rpn_trainable) # 学習時のみ損失を計算 if self.config.training: # 学習時 # 入力 input_gt_rois = Input(shape=[None, 4], name="input_gt_rois", dtype='float32') input_gt_objects = Input(shape=[None], name="input_gt_objects", dtype='int32') inputs = [backbone_in, input_gt_rois, input_gt_objects] losses = [] if rpn_trainable: # 損失計算 # RPNの損失 rpn_offsets_loss = Lambda(lambda x: loss.rpn_offsets_loss(*x), name="rpn_offsets_loss")([ input_gt_rois, input_gt_objects, rpn_offsets ]) rpn_objects_loss = Lambda( lambda x: loss.rpn_objects_loss(*x), name="rpn_objects_loss")([input_gt_objects, objects]) losses += [rpn_offsets_loss, rpn_objects_loss] if head_trainable: input_gt_boxes = Input(shape=[None, 4], name="input_gt_boxes", dtype='float32') input_gt_label_ids = Input(shape=[None], name="input_gt_label_ids", dtype='int32') inputs += [input_gt_boxes, input_gt_label_ids] # 正解データとRoIから評価対象のRoIを絞り込み、それに対応する正解データを得る。 normalized_sample_rois, normalized_sample_gt_offsets, \ sample_gt_labels \ = SubsamplingRoiLayer(self.config, name='subsampling_roi_and_gt')( [normalized_rois, input_gt_boxes, input_gt_label_ids]) # 以下のようにoutput_shapeを直接指定するとIndexErrorが発生したので、 # ↑のようにカスタムレイヤー化する # batch_size = K.shape(normalized_rois)[0] # sample_rois, sample_gt_offsets, sample_labels = \ # Lambda(lambda x: self._subsampling_roi_and_gt(*x), # output_shape=[(batch_size, None, 4), # (batch_size, None, 4), # (batch_size, None)], # name="subsampling_roi_and_gt")( # [normalized_rois, input_gt_boxes, # input_gt_label_ids]) # head head_offsets, labels, labels_logit\ = self._nn_head(backbone_out, normalized_sample_rois) # 損失計算 # ヘッドの損失はModel#compileで損失関数を指定する方法では対応出来ないため、 # Layerとして定義してModel#add_lossで加算する。 head_offsets_loss = Lambda( lambda x: loss.head_offsets_loss(*x), name="head_offsets_loss")([ normalized_sample_gt_offsets, sample_gt_labels, head_offsets ]) head_labels_loss = Lambda( lambda x: loss.head_labels_loss(*x), name="head_labels_loss")([sample_gt_labels, labels]) # 損失 losses += [head_offsets_loss, head_labels_loss] # 出力=損失 outputs = losses else: # 予測時 # head # head_offsetsは0〜1で正規化された値 head_offsets, labels, _ = self._nn_head(backbone_out, normalized_rois) # 予測時は損失不要 # ダミーの損失関数 dummy_loss = Lambda(lambda x: K.constant(0), name="dummy_loss")([backbone_in]) losses = [dummy_loss, dummy_loss, dummy_loss] inputs = [backbone_in] # normalized_roisの正規化を戻した座標にhead_offsetを適用することでBBoxを得る。 outputs = [ normalized_rois, head_offsets, labels, rpn_offsets, objects ] model = Model(inputs=inputs, outputs=outputs, name='faser_r_cnn') # Kerasは複数指定した損失の合計をモデル全体の損失として評価してくれる。 # 損失を追加 for output in losses: model.add_loss(tf.reduce_mean(output, keep_dims=True)) return model, len(outputs)
class CycleGAN(): def __init__(self): ''' 参数与结构 Q1:为什么要定义输出? A1: 因为使用keras框架,只有定义了输入输出,才能定义一个模型。 Q2: 为何输入输出都不用定义为私有变量? A2:如上所述,输入输出只用于定义模型,不会涉及类内参数或函数传导;简便起见,不作为私有变量。 ''' #定义 输入图像 self.img_dim = 64 img_x = Input(shape=(self.img_dim, self.img_dim, 3)) img_y = Input(shape=(self.img_dim, self.img_dim, 3)) #定义 循环一致性的变换函数(包含域映射结构 和 生成器) self.F_x2y = self.F_x2y() self.G_y2x = self.G_y2x() #定义 域映射结果 im_fake_y = self.F_x2y(img_x) im_fake_x = self.G_y2x(img_y) #定义 域返回映射结果 reconstr_x = self.G_y2x(im_fake_y) reconstr_y = self.F_x2y(im_fake_x) #定义 通过F、G映射进行风格转换的输出 translation_x2y = self.F_x2y(img_x) translation_y2x = self.G_y2x(img_y) #定义 GAN的判别器D self.D_x = self.D_x() self.D_y = self.D_y() #定义 判别器D的输出结果 ''' 注意,判别器输入输出都是单个量 ''' valid_x = self.D_x(im_fake_x) valid_y = self.D_y(im_fake_y) #向D中加入loss并编译 loss1 = K.mean(K.log(valid_x)) + K.mean(K.log(1 - self.D_x(im_fake_x))) loss2 = K.mean(K.log(valid_y)) + K.mean(K.log(1 - self.D_y(im_fake_y))) self.D_x.add_loss(loss1) self.D_y.add_loss(loss2) self.D_x.compile(optimizer=Adam(2e-4, 0.5)) self.D_y.compile(optimizer=Adam(2e-4, 0.5)) #定义 整个模型 ''' 对输入输出进行解释: 输入:两张图 输出:1、判别器输出;2、循环一致性输出;3、期望输出(即 进行风格转换的输出) ''' self.C_gan = Model([img_x, img_y], [ valid_x, valid_y, reconstr_x, reconstr_y, translation_x2y, translation_y2x ]) #定义 整个CycleGAN的Loss lamda = 0.1 cyc_loss = K.mean(K.sum(K.abs(translation_x2y - img_x))) + K.mean( K.sum(K.abs(reconstr_y - img_y))) total_loss = loss1 + loss2 + lamda * cyc_loss #加入Loss并编译 self.C_gan.add_loss(total_loss) self.C_gan.compile(optimizer=Adam(2e-4, 0.5))
z_in = Input(shape=(z_dim, )) g_model.trainable = False x_fake = g_model(z_in) x_real_encoded = e_model(x_in) x_fake_encoded = e_model(x_fake) x_real_fake = Subtract()([x_real_encoded, x_fake_encoded]) x_fake_real = Subtract()([x_fake_encoded, x_real_encoded]) x_real_fake_score = d_model(x_real_fake) x_fake_real_score = d_model(x_fake_real) d_train_model = Model([x_in, z_in], [x_real_fake_score, x_fake_real_score]) d_loss = K.mean(- log_sigmoid(x_real_fake_score) - log_sigmoid(- x_fake_real_score)) d_train_model.add_loss(d_loss) d_train_model.compile(optimizer=Adam(2e-4, 0.5)) # 整合模型(训练生成器) g_model.trainable = True d_model.trainable = False e_model.trainable = False x_fake = g_model(z_in) x_real_encoded = e_model(x_in) x_fake_encoded = e_model(x_fake) x_real_fake = Subtract()([x_real_encoded, x_fake_encoded]) x_fake_real = Subtract()([x_fake_encoded, x_real_encoded]) x_real_fake_score = d_model(x_real_fake) x_fake_real_score = d_model(x_fake_real)
def correlation(x, y): x = x - K.mean(x, 1, keepdims=True) y = y - K.mean(y, 1, keepdims=True) x = K.l2_normalize(x, 1) y = K.l2_normalize(y, 1) return K.sum(x * y, 1, keepdims=True) t1_loss = z_real_mean - z_fake_ng_mean t2_loss = z_fake_mean - z_fake_ng_mean z_corr = correlation(z_in, z_fake) qp_loss = 0.25 * t1_loss[:, 0]**2 / K.mean( (x_real - x_fake_ng)**2, axis=[1, 2, 3]) train_model.add_loss(K.mean(t1_loss + t2_loss - 1. * z_corr) + K.mean(qp_loss)) train_model.compile(optimizer=RMSprop(1e-4, 0.99)) #train_model.metrics_names.append('t_loss') #train_model.metrics_tensors.append(K.mean(t1_loss)) train_model.add_metric(K.mean(t1_loss), 't_loss') #train_model.metrics_names.append('z_corr') #train_model.metrics_tensors.append(K.mean(z_corr)) train_model.add_metric(K.mean(z_corr), 'z_loss') # 检查模型结构 train_model.summary() class ExponentialMovingAverage: """对模型权重进行指数滑动平均。 用法:在model.compile之后、第一次训练之前使用;
x_train_batch = tf.cast(x_train_batch, tf.float32) x_train_batch = tf.reshape(x_train_batch, shape=batch_shape) y_train_batch = tf.cast(y_train_batch, tf.int32) y_train_batch = tf.one_hot(y_train_batch, classes) x_batch_shape = x_train_batch.get_shape().as_list() y_batch_shape = y_train_batch.get_shape().as_list() x_train_input = layers.Input(tensor=x_train_batch, batch_shape=x_batch_shape) x_train_out = cnn_layers(x_train_input) train_model = Model(inputs=x_train_input, outputs=x_train_out) cce = objectives.categorical_crossentropy(y_train_batch, x_train_out) train_model.add_loss(cce) # Do not pass the loss directly to model.compile() # because it is not yet supported for Input Tensors. train_model.compile(optimizer='rmsprop', loss=None, metrics=['accuracy']) train_model.summary() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) train_model.fit(epochs=epochs, steps_per_epoch=steps_per_epoch) train_model.save_weights('saved_wt.h5')
class GAN(): def __init__(self, dataset_name='mnist', load_model_name=''): optimizer = Adam(0.0002, 0.5) if (load_model_name == ''): X_train = self.load_gan_data(dataset_name) # default parameters for mnist self.img_rows = X_train.shape[1] self.img_cols = X_train.shape[2] self.img_channels = X_train.shape[3] self.img_shape = (self.img_rows, self.img_cols, self.img_channels) self.z_dim = 32 self.iter_count = 0 self.dataset_name = dataset_name self.model_file = "./" + self.dataset_name + '_gan_model.pickle' # Build and compile the discriminator and discriminator loss self.discriminator = self.build_discriminator() # set discriminator loss # BEGIN INSERT CODE self.discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) # END INSERT CODE # Build the generator self.generator = self.build_generator() else: # load gan class and models (generator, discriminator and stacked model) self.load_gan_model(load_model_name) # Create the stacked model # first, create the random vector z in the latent space z = Input(shape=(self.z_dim, )) # create generated (fake) image img = self.generator(z) # indicate that for the stacked model, the weights are not trained self.discriminator.trainable = False # The discriminator takes generated images as input and gives a probability of whether it is a true or # false image p_true = self.discriminator(img) # The combined model (stacked generator and discriminator) # In this model, we train the generator only self.stacked_gen_disc = Model(z, p_true) # loss # START INSERT CODE HERE generator_loss = K.mean(K.log(1 - p_true)) # END INSERT CODE HERE self.stacked_gen_disc.add_loss(generator_loss) self.stacked_gen_disc.compile(optimizer=optimizer) def build_generator(self): z_rand = Input(shape=(self.z_dim, )) # START INSERT CODE HERE y = Dense(256)(z_rand) y = LeakyReLU(alpha=0.2)(y) y = Dense(512)(y) y = LeakyReLU(alpha=0.2)(y) y = Dense(784)(y) y = Activation('tanh')(y) output_img = Reshape(target_shape=(28, 28, 1))(y) # END INSERT CODE HERE model_generator = Model(z_rand, output_img) model_generator.summary() return model_generator def build_discriminator(self): input_img = Input(shape=self.img_shape) y = Flatten()(input_img) y = Dense(512)(y) y = LeakyReLU(alpha=0.2)(y) y = Dense(256)(y) y = LeakyReLU(alpha=0.2)(y) y = Dense(1)(y) p_true = Activation('sigmoid')(y) model_discriminator = Model(input_img, p_true) model_discriminator.summary() return model_discriminator def load_gan_data(self, dataset_name): # Load the dataset if (dataset_name == 'mnist'): (X_train, _), (_, _) = mnist.load_data() elif (dataset_name == 'cifar'): from keras.datasets import cifar10 (X_train, y_train), (X_test, y_test) = cifar10.load_data() else: print('Error, unknown database') # Rescale -1 to 1 X_train = X_train / 127.5 - 1. # add a channel dimension, if need be (for mnist data) if (X_train.ndim == 3): X_train = np.expand_dims(X_train, axis=3) return X_train def save_gan_model(self, model_file): # save the GAN class instance gan_temp = GAN(self.dataset_name, '') gan_temp.generator = self.generator gan_temp.discriminator = self.discriminator gan_temp.stacked_gen_disc = [] gan_temp.iter_count = self.iter_count with open(model_file, 'wb') as file_class: pickle.dump(gan_temp, file_class, -1) def load_gan_model(self, model_file): # load GAN class instance gan_temp = pickle.load(open(model_file, "rb", -1)) # copy parameters self.img_rows = gan_temp.img_rows self.img_cols = gan_temp.img_cols self.img_channels = gan_temp.img_channels self.img_shape = gan_temp.img_shape self.z_dim = gan_temp.z_dim self.iter_count = gan_temp.iter_count self.model_file = gan_temp.model_file self.dataset_name = gan_temp.dataset_name # copy models self.generator = gan_temp.generator self.discriminator = gan_temp.discriminator def train(self, epochs, batch_size=128, sample_interval=50): k = 1 # number of internal loops # load dataset X_train = self.load_gan_data(self.dataset_name) # Adversarial ground truths d_output_true = np.ones((batch_size, 1)) d_output_false = np.zeros((batch_size, 1)) first_iter = self.iter_count for epoch in range(first_iter, epochs): # --------------------- # Train Discriminator # --------------------- # Train the discriminator for i in range(0, k): # Select a random batch of images idx = np.random.randint(0, X_train.shape[0], batch_size) imgs = X_train[idx] z_random = np.random.normal(0, 1, (batch_size, self.z_dim)) # Generate a batch of new (fake) images gen_imgs = self.generator.predict(z_random) # START INSERT CODE d_loss_real = self.discriminator.train_on_batch( imgs, np.ones(batch_size)) d_loss_fake = self.discriminator.train_on_batch( gen_imgs, np.zeros(batch_size)) # END INSERT CODE d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # --------------------- # Train Generator # --------------------- # Select a random batch of images idx = np.random.randint(0, X_train.shape[0], batch_size) imgs = X_train[idx] z_random = np.random.normal(0, 1, (batch_size, self.z_dim)) # Generate a batch of new (fake) images gen_imgs = self.generator.predict(z_random) # Generator training : try to make generated images be classified as true by the discriminator g_loss = self.stacked_gen_disc.train_on_batch(z_random, None) # increase epoch counter self.iter_count = self.iter_count + 1 # Plot the losses print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss)) # Save some random generated images and the models at every sample_interval iterations if (epoch % sample_interval == 0): self.sample_images('images/' + self.dataset_name + '_sample_%06d.png' % epoch) self.save_gan_model(self.model_file) def sample_images(self, image_filename, rand_seed=30): np.random.seed(rand_seed) r, c = 5, 5 z_random = np.random.normal(0, 1, (r * c, self.z_dim)) gen_imgs = self.generator.predict(z_random) # Rescale images 0 - 1 gen_imgs = 0.5 * gen_imgs + 0.5 fig, axs = plt.subplots(r, c) cnt = 0 for i in range(r): for j in range(c): # black and white images if (gen_imgs.shape[3] == 1): axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') elif (gen_imgs.shape[3] == 3): # colour images axs[i, j].imshow(gen_imgs[cnt, :, :]) else: print('Error, unsupported channel size. Dude, I don' 't know what you want me to do.\ I can' 't handle this data. You' 've made me very sad ...') axs[i, j].axis('off') cnt += 1 fig.savefig(image_filename) plt.close()
def vaegan_actual_model(original_dim=(64, 64, 3), batch_size=64, latent_dim=128, epochs=50, mse_flag=True): '''VAE model.''' # VAE model = encoder + decoder # build encoder model input_shape = original_dim inputs = Input(shape=input_shape, name='encoder_input') x = Conv2D(64, (5, 5), strides=(2, 2), padding='same')(inputs) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(128, (5, 5), strides=(2, 2), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2D(256, (5, 5), strides=(2, 2), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Flatten()(x) x = Dense(2048)(x) x = BatchNormalization()(x) x = Activation('relu', name='z_mean')(x) z_mean = Dense(latent_dim, name='x_mean')(x) z_log_var = Dense(latent_dim, name='x_log_var')(x) # use reparameterization trick to push the sampling out as input # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(sampling, output_shape=(latent_dim, ), name='z')([z_mean, z_log_var]) #encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') encoder.summary() plot_model(encoder, to_file='vaegan_encoder.png', show_shapes=True) # build decoder model latent_inputs = Input(shape=(latent_dim, ), name='z_sampling') x = Dense(8 * 8 * 256)(latent_inputs) x = BatchNormalization()(x) x = Activation('relu')(x) x = Reshape((8, 8, 256))(x) x = Conv2DTranspose(256, (5, 5), strides=(2, 2), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2DTranspose(128, (5, 5), strides=(2, 2), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2DTranspose(32, (5, 5), strides=(2, 2), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2DTranspose(3, (5, 5), strides=(1, 1), padding='same')(x) outputs = Activation('tanh')(x) # instantiate decoder model decoder = Model(latent_inputs, outputs, name='decoder') decoder.summary() plot_model(decoder, to_file='vaegan_decoder.png', show_shapes=True) # instantiate VAE model outputs = decoder(encoder(inputs)[2]) vae = Model(inputs, outputs, name='vae_mlp') #outputs = Dense(original_dim, activation='sigmoid')(x) if mse_flag: reconstruction_loss = mse(inputs, outputs) else: reconstruction_loss = binary_crossentropy(inputs, outputs) reconstruction_loss *= original_dim[0] * original_dim[1] * original_dim[2] kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer=RMSprop(lr=0.0003)) vae.summary() plot_model(vae, to_file='vae.png', show_shapes=True) return encoder, decoder, vae
def vae(): # MNIST dataset (x_train, y_train), (x_test, y_test) = mnist.load_data() image_size = x_train.shape[1] original_dim = image_size * image_size x_train = np.reshape(x_train, [-1, original_dim]) x_test = np.reshape(x_test, [-1, original_dim]) x_train = x_train.astype('float32') / 255 x_test = x_test.astype('float32') / 255 # network parameters input_shape = (original_dim, ) intermediate_dim = 512 batch_size = 128 latent_dim = 2 epochs = 50 # VAE model = encoder + decoder # build encoder model # 784 inputs = Input(shape=input_shape, name='encoder_input') # 512 x = Dense(intermediate_dim, activation='relu')(inputs) # 2 z_mean = Dense(latent_dim, name='z_mean')(x) # 2 z_log_var = Dense(latent_dim, name='z_log_var')(x) # use reparameterization trick to push the sampling out as input # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(sampling, output_shape=(latent_dim, ), name='z')([z_mean, z_log_var]) # instantiate encoder model encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') encoder.summary() plot_model(encoder, to_file='vae_mlp_encoder.png', show_shapes=True) # build decoder model latent_inputs = Input(shape=(latent_dim, ), name='z_sampling') x = Dense(intermediate_dim, activation='relu')(latent_inputs) outputs = Dense(original_dim, activation='sigmoid')(x) # instantiate decoder model decoder = Model(latent_inputs, outputs, name='decoder') decoder.summary() plot_model(decoder, to_file='vae_mlp_decoder.png', show_shapes=True) # instantiate VAE model outputs = decoder(encoder(inputs)[2]) vae = Model(inputs, outputs, name='vae_mlp') # model and data models = (encoder, decoder) data = (x_test, y_test) # use either one of these loss # reconstruction_loss = mse(inputs, outputs) reconstruction_loss = binary_crossentropy(inputs, outputs) reconstruction_loss *= original_dim kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer='adam') vae.summary() plot_model(vae, to_file='vae_mlp.png', show_shapes=True) vae.fit(x_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, None)) plot_results(models, data, batch_size=batch_size, model_name="vae_mlp")
o_in = Input(shape=(None, )) train_model = Model(model.inputs + [o_in], model.outputs + [o_in]) # 交叉熵作为loss,并mask掉输入部分的预测 y_true = train_model.input[2][:, 1:] # 目标tokens y_mask = train_model.input[1][:, 1:] y_pred = train_model.output[0][:, :-1] # 预测tokens,预测与目标错开一位 cross_entropy = sparse_categorical_crossentropy(y_true, y_pred) cross_entropy = K.sum(cross_entropy * y_mask) / K.sum(y_mask) embeddings = search_layer(train_model.output[0], 'Embedding-Token').embeddings gp = K.sum(K.gradients(cross_entropy, [embeddings])[0].values**2) train_model.add_loss(cross_entropy + 0.5 * gp) train_model.compile(optimizer=Adam(1e-5)) # train_model.add_loss(cross_entropy) # train_model.compile(optimizer=Adam(1e-5)) class AutoTitle(AutoRegressiveDecoder): """seq2seq解码器 """ @AutoRegressiveDecoder.set_rtype('probas') def predict(self, inputs, output_ids, step): token_ids, segment_ids = inputs token_ids = np.concatenate([token_ids, output_ids], 1) segment_ids = np.concatenate( [segment_ids, np.ones_like(output_ids)], 1)
def conv_vae(): # MNIST dataset (x_train, y_train), (x_test, y_test) = mnist.load_data() image_size = x_train.shape[1] x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) x_train = x_train.astype('float32') / 255 x_test = x_test.astype('float32') / 255 # network parameters input_shape = (image_size, image_size, 1) batch_size = 128 kernel_size = 3 filters = 16 latent_dim = 2 epochs = 30 # VAE model = encoder + decoder # build encoder model inputs = Input(shape=input_shape, name='encoder_input') x = inputs for i in range(2): filters *= 2 x = Conv2D(filters=filters, kernel_size=kernel_size, activation='relu', strides=2, padding='same')(x) # shape info needed to build decoder model shape = K.int_shape(x) # generate latent vector Q(z|X) x = Flatten()(x) x = Dense(16, activation='relu')(x) z_mean = Dense(latent_dim, name='z_mean')(x) z_log_var = Dense(latent_dim, name='z_log_var')(x) # use reparameterization trick to push the sampling out as input # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(sampling, output_shape=(latent_dim, ), name='z')([z_mean, z_log_var]) # instantiate encoder model encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') encoder.summary() plot_model(encoder, to_file='vae_cnn_encoder.png', show_shapes=True) # build decoder model latent_inputs = Input(shape=(latent_dim, ), name='z_sampling') x = Dense(shape[1] * shape[2] * shape[3], activation='relu')(latent_inputs) x = Reshape((shape[1], shape[2], shape[3]))(x) for i in range(2): x = Conv2DTranspose(filters=filters, kernel_size=kernel_size, activation='relu', strides=2, padding='same')(x) filters //= 2 outputs = Conv2DTranspose(filters=1, kernel_size=kernel_size, activation='sigmoid', padding='same', name='decoder_output')(x) # instantiate decoder model decoder = Model(latent_inputs, outputs, name='decoder') decoder.summary() plot_model(decoder, to_file='vae_cnn_decoder.png', show_shapes=True) # instantiate VAE model outputs = decoder(encoder(inputs)[2]) vae = Model(inputs, outputs, name='vae') models = (encoder, decoder) data = (x_test, y_test) reconstruction_loss = binary_crossentropy(K.flatten(inputs), K.flatten(outputs)) reconstruction_loss *= image_size * image_size kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer='rmsprop') vae.summary() plot_model(vae, to_file='vae_cnn.png', show_shapes=True) vae.fit(x_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, None)) plot_results(models, data, batch_size=batch_size, model_name="vae_cnn")
def build(self, score_func='cosine', margin=0.04, max_mention=False, avg_mention=False, add_cnn=None, encoder_type='self_attend_max', ent_attend_type='add'): '''1. prepare input''' model_inputs = [] link_model_inputs = [] input_erl_text = Input(shape=(None, )) model_inputs.append(input_erl_text) link_model_inputs.append(input_erl_text) input_begin = Input(shape=(1, )) input_end = Input(shape=(1, )) model_inputs.extend([input_begin, input_end]) link_model_inputs.extend([input_begin, input_end]) if self.config.use_relative_pos: input_relative_pos = Input(shape=(None, )) model_inputs.append(input_relative_pos) link_model_inputs.append(input_relative_pos) input_pos_desc = Input(shape=(None, )) input_neg_desc = Input(shape=(None, )) model_inputs.extend([input_pos_desc, input_neg_desc]) link_model_inputs.append(input_pos_desc) # CUDALSTM (or CNN) doesn't support masking, so we don't use mask_zero in embedding layer, instead we apply # masking on our own get_mask_layer = Lambda( lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), K.floatx())) erl_text_mask = get_mask_layer(input_erl_text) pos_desc_mask = get_mask_layer(input_pos_desc) neg_desc_mask = get_mask_layer(input_neg_desc) apply_mask_layer = Lambda(lambda x: x[0] * x[1]) '''2. prepare embedding''' if self.config.embeddings is not None: embedding_layer = Embedding(input_dim=self.config.vocab_size, output_dim=self.config.embed_dim, weights=[self.config.embeddings], trainable=self.config.embed_trainable) else: embedding_layer = Embedding(input_dim=self.config.vocab_size, output_dim=self.config.embed_dim) erl_text_embed = SpatialDropout1D(0.2)(embedding_layer(input_erl_text)) if self.config.use_relative_pos: rel_pos_embedding_layer = Embedding( input_dim=self.config.n_rel_pos_embed, output_dim=self.config.rel_pos_embed_dim) rel_pos_embed = rel_pos_embedding_layer(input_relative_pos) erl_text_embed = concatenate([erl_text_embed, rel_pos_embed]) erl_text_embed = apply_mask_layer([erl_text_embed, erl_text_mask]) pos_desc_embed = SpatialDropout1D(0.2)(embedding_layer(input_pos_desc)) neg_desc_embed = SpatialDropout1D(0.2)(embedding_layer(input_neg_desc)) pos_desc_embed = apply_mask_layer([pos_desc_embed, pos_desc_mask]) neg_desc_embed = apply_mask_layer([neg_desc_embed, neg_desc_mask]) '''3. encode mention & entity representation''' if add_cnn == 'before': erl_text_embed = Conv1D(filters=self.config.embed_dim, kernel_size=3, padding='same', activation='relu')(erl_text_embed) erl_text_lstm = Bidirectional( CuDNNLSTM(units=self.config.embed_dim // 2, return_sequences=True))(erl_text_embed) if add_cnn == 'after': erl_text_lstm = Conv1D(filters=self.config.embed_dim, kernel_size=3, padding='same', activation='relu')(erl_text_lstm) erl_text_lstm = apply_mask_layer([erl_text_lstm, erl_text_mask]) if add_cnn == 'before': ent_cnn_layer = Conv1D(filters=self.config.embed_dim, kernel_size=3, padding='same', activation='relu') pos_desc_embed = ent_cnn_layer(pos_desc_embed) neg_desc_embed = ent_cnn_layer(neg_desc_embed) ent_lstm_layer = Bidirectional( CuDNNLSTM(units=self.config.embed_dim // 2, return_sequences=True)) pos_desc_lstm = ent_lstm_layer(pos_desc_embed) neg_desc_lstm = ent_lstm_layer(neg_desc_embed) if add_cnn == 'after': ent_cnn_layer = Conv1D(filters=self.config.embed_dim, kernel_size=3, padding='same', activation='relu') pos_desc_lstm = ent_cnn_layer(pos_desc_lstm) neg_desc_lstm = ent_cnn_layer(neg_desc_lstm) pos_desc_lstm = apply_mask_layer([pos_desc_lstm, pos_desc_mask]) neg_desc_lstm = apply_mask_layer([neg_desc_lstm, neg_desc_mask]) if encoder_type in ['self_attend_max', 'self_attend_single_attend']: '''mention presentation based on self_attention, first token & last token, max or avg pooling (optional)''' # first token & last token of mention index_layer = Lambda(lambda x: tf.gather_nd( x[0], tf.concat([ tf.expand_dims(tf.range(tf.shape(x[0])[0]), 1), tf.to_int32(x[1]) ], axis=1))) mention_begin_embed = index_layer([erl_text_lstm, input_begin]) mention_end_embed = index_layer([erl_text_lstm, input_end]) mention_spand_embed, mention_index, mention_mask = Lambda( self.span_index)([erl_text_lstm, input_begin, input_end]) # soft head attention head_score = TimeDistributed(Dense( 1, activation='tanh'))(erl_text_lstm) mention_head_score = Lambda(lambda x: tf.squeeze( tf.gather_nd(x[0], tf.to_int32(x[1])), 2))( [head_score, mention_index]) # [batch_size, max_mention_length] # self attention mention_head_score = Lambda( self.softmax_with_mask)([mention_head_score, mention_mask]) mention_attention = Lambda( lambda x: K.sum(x[0] * K.expand_dims(x[1], 2), 1))( [mention_spand_embed, mention_head_score]) mention_embed = concatenate( [mention_begin_embed, mention_end_embed, mention_attention]) # max pooling & avg pooling if max_mention: mention_max_embed = Lambda(lambda x: K.max( x[0] - (1 - K.expand_dims(x[1], 2)) * 1e10, 1))( [mention_spand_embed, mention_mask]) mention_embed = concatenate([mention_embed, mention_max_embed]) if avg_mention: mention_avg_embed = Lambda( lambda x: K.sum(x[0], 1) / K.sum(x[1], 1, keepdims=True))( [mention_spand_embed, mention_mask]) mention_embed = concatenate([mention_embed, mention_avg_embed]) mention_embed = Dense(self.config.embed_dim, activation='relu')( mention_embed) # [batch_size, embed_dim] mention_pos_embed = mention_embed mention_neg_embed = mention_embed if encoder_type == 'self_attend_max': '''entity representation based on max pooling''' pos_embed = Lambda( self.seq_maxpool)([pos_desc_lstm, pos_desc_mask]) neg_embed = Lambda( self.seq_maxpool)([neg_desc_lstm, neg_desc_mask]) else: '''entity representation based on single sided attention using mention representation as query''' ent_attend_layer = SingleSideAttention(ent_attend_type) pos_embed = ent_attend_layer([mention_embed, pos_desc_lstm]) neg_embed = ent_attend_layer([mention_embed, neg_desc_lstm]) elif encoder_type in ['co_attend', 'max_co_attend']: attend_layer = InteractiveAttention(attend_type=encoder_type) mention_pos_embed, pos_embed = attend_layer( [erl_text_lstm, pos_desc_lstm]) mention_neg_embed, neg_embed = attend_layer( [erl_text_lstm, neg_desc_lstm]) else: raise ValueError( 'encoder_type not understood'.format(encoder_type)) if score_func == 'dense': hidden_layer = Dense(self.config.embed_dim, activation='relu') score_layer = Dense(1, activation='sigmoid') pos_score = score_layer( hidden_layer( concatenate([ mention_pos_embed, pos_embed, multiply([mention_pos_embed, pos_embed]), subtract([mention_pos_embed, pos_embed]) ]))) neg_score = score_layer( hidden_layer( concatenate([ mention_neg_embed, neg_embed, multiply([mention_neg_embed, neg_embed]), subtract([mention_neg_embed, neg_embed]) ]))) else: score_layer = self.get_score_layer(score_func) pos_score = score_layer([mention_pos_embed, pos_embed]) neg_score = score_layer([mention_pos_embed, neg_embed]) link_model = Model(link_model_inputs, pos_score) loss = K.mean(K.relu(margin + neg_score - pos_score)) train_model = Model(model_inputs, [pos_score, neg_score]) train_model.add_loss(loss) train_model.compile(optimizer=self.config.optimizer) return train_model, link_model
def fit(self, Y, T, X, Z): """Estimate the counterfactual model from data. That is, estimate functions τ(·, ·, ·), ∂τ(·, ·). Parameters ---------- Y: (n × d_y) matrix or vector of length n Outcomes for each sample T: (n × dₜ) matrix or vector of length n Treatments for each sample X: optional (n × dₓ) matrix Features for each sample Z: optional (n × d_z) matrix Instruments for each sample Returns ------- self """ # TODO: allow 1D arguments for Y and T assert np.ndim(X) == np.ndim(Y) == np.ndim(T) == np.ndim(Z) == 2 assert np.shape(X)[0] == np.shape(Y)[0] == np.shape(T)[0] == np.shape( Z)[0] d_x, d_y, d_z, d_t = [np.shape(a)[1] for a in [X, Y, Z, T]] x_in, y_in, z_in, t_in = [L.Input((d, )) for d in [d_x, d_y, d_z, d_t]] n_components = self._n_components treatment_network = self._m(z_in, x_in) # the dimensionality of the output of the network # TODO: is there a more robust way to do this? d_n = K.int_shape(treatment_network)[-1] pi, mu, sig = mog_model(n_components, d_n, d_t)([treatment_network]) ll = mog_loss_model(n_components, d_t)([pi, mu, sig, t_in]) model = Model([z_in, x_in, t_in], [ll]) model.add_loss(L.Lambda(K.mean)(ll)) model.compile(self._optimizer) # TODO: do we need to give the user more control over other arguments to fit? model.fit([Z, X, T], [], epochs=self._s1) lm = response_loss_model( lambda t, x: self._h(t, x), lambda z, x: Model( [z_in, x_in], # subtle point: we need to build a new model each time, # because each model encapsulates its randomness [mog_sample_model(n_components, d_t)([pi, mu, sig])])([z, x]), d_z, d_x, d_y, self._n_samples, self._use_upper_bound_loss, self._n_gradient_samples) rl = lm([z_in, x_in, y_in]) response_model = Model([z_in, x_in, y_in], [rl]) response_model.add_loss(L.Lambda(K.mean)(rl)) response_model.compile(self._optimizer) # TODO: do we need to give the user more control over other arguments to fit? response_model.fit([Z, X, Y], [], epochs=self._s2) self._effect_model = Model([t_in, x_in], [self._h(t_in, x_in)]) # TODO: it seems like we need to sum over the batch because we can only apply gradient to a scalar, # not a general tensor (because of how backprop works in every framework) # (alternatively, we could iterate through the batch in addition to iterating through the output, # but this seems annoying...) # Therefore, it's important that we use a batch size of 1 when we call predict with this model def calc_grad(t, x): h = self._h(t, x) all_grads = K.concatenate([ g for i in range(d_y) for g in K.gradients(K.sum(h[:, i]), [t]) ]) return K.reshape(all_grads, (-1, d_y, d_t)) self._marginal_effect_model = Model( [t_in, x_in], L.Lambda(lambda tx: calc_grad(*tx))([t_in, x_in]))
else: for _ in outer_layers: _.append(None) final_actnorm = Actnorm() final_concat = Concat() final_reshape = Reshape() x = final_actnorm(x) x = final_reshape(x) x = final_concat(x_outs+[x]) encoder = Model(x_in, x) for l in encoder.layers: if hasattr(l, 'logdet'): encoder.add_loss(l.logdet) encoder.summary() encoder.compile(loss=lambda y_true,y_pred: 0.5 * K.sum(y_pred**2, 1) + 0.5 * np.log(2*np.pi) * K.int_shape(y_pred)[1], optimizer=Adam(1e-4)) # 搭建逆模型(生成模型),将所有操作倒过来执行 x_in = Input(shape=K.int_shape(encoder.outputs[0])[1:]) x = x_in x = final_concat.inverse()(x) outputs = x[:-1] x = x[-1] x = final_reshape.inverse()(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x) x = UpSampling2D((2, 2))(x) #todo NOTICE there is no padding here, to match the dimensions needed. x = Conv2D(16, (3, 3), activation='relu')(x) x = UpSampling2D((2, 2))(x) decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x) #computing the VAE loss # xent_loss = metrics.binary_crossentropy(input_img,decoded) xent_loss = K.mean(metrics.binary_crossentropy(input_img, decoded)) reward_based_loss = input_reward * xent_loss # reward_based_loss = xent_loss #full AE model autoencoder = Model(inputs=[input_img, input_reward], outputs=[decoded]) autoencoder.add_loss(reward_based_loss) autoencoder.compile(optimizer='sgd') autoencoder.summary() #encoder model encoder = Model(input_img, encoded) #decoder model if not train_model: autoencoder.load_weights(filepath=model_weights_file) else: #todo try changing the loss function to return a single value as opposed to an array and see if tensorboard works autoencoder.fit([x_train, x_train_reward], epochs=4, batch_size=1, shuffle=True,
#decoder = Model(inputs = [latent_input], outputs= [End], name='decoder') #decoder.summary() #decoded_outputs = decoder(encoder(Image_input)[2]) vae = Model(inputs=[Image_input], outputs = [End], name='vae_mlp') vae.summary() kl_loss = 1 + End_log_var - K.square(End_mean) - K.exp(End_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 reconstruction_loss = mse(Image_input, End) #reconstruction_loss *= original_dim vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) adam = optimizers.Adam(lr = 0.001, beta_1 = 0.9, beta_2 = 0.999) vae.compile(optimizer=adam) #plot_model(vae, #to_file='vae_mlp.png', #show_shapes=True) <<<<<<< HEAD history = vae.fit_generator(gen_sample(), epochs = 1, steps_per_epoch = 2) vae.save('multi_model.h5') vae.save_weights('multi_weights.h5') ======= vae.fit_generator(gen_sample(), epochs = 1000, steps_per_epoch = 30) vae.save('model.h5') vae.save_weights('weights.h5') >>>>>>> e46f1601b0fa2664ade7fbee5e33cf18e1098ce7
outputs = decoder(encoder(inputs)[2]) vae = Model(inputs, outputs, name='vae') encoder.summary() decoder.summary() # loss #reconstruction_loss = mse(inputs, outputs) reconstruction_loss = binary_crossentropy(inputs, outputs) reconstruction_loss *= ORIG_IMG_SHAPE[0] * ORIG_IMG_SHAPE[1] k1_loss = 1 + latent_log_var - K.square(latent_mean) - K.exp(latent_log_var) k1_loss = K.sum(k1_loss, axis=-1) k1_loss *= -0.5 loss = K.mean(reconstruction_loss + k1_loss) vae.add_loss(loss) vae.compile(optimizer='Adam') vae.summary() # encoder.load_weights('encoder_weights.h5') # decoder.load_weights('decoder_weights.h5') vae.fit(train_data, epochs=EPOCH_NUM, batch_size=BATCH_SIZE, validation_data=(test_data, None)) encoder.save_weights('encoder_weights.h5') decoder.save_weights('decoder_weights.h5') # plot results for some test data for img in test_data[:3]:
class GAN(): def __init__(self): self.img_rows = 48 self.img_cols = 48 self.channels = 3 self.img_shape = (self.img_rows, self.img_cols, self.channels) self.latent_dim = 64 self.flat_dim = self.img_rows * self.img_cols * self.channels self.batch_size = 32 # VAE model = encoder + decoder self.encoder = self.build_encoder() self.encoder.summary() # plot_model(self.encoder, to_file='vae_mlp_encoder.png', show_shapes=True) # build decoder model self.decoder = self.build_decoder() self.decoder.summary() # plot_model(self.decoder, to_file='vae_mlp_decoder.png', show_shapes=True) # instantiate VAE model self.outputs = self.decoder(self.encoder(self.inputs)[2]) self.vae = Model(self.inputs, self.outputs, name='vae_mlp') self.sample_noise = np.random.normal( 0, 1, (5 * 5, self.latent_dim)) # 5 * 5 = r * c def build_encoder(self): self.inputs = Input( shape=self.img_shape, name='encoder_input' ) #, batch_shape=(self.batch_size, self.img_rows, self.img_cols, self.channels),) h_l = Conv2D(16, 5, activation='relu', strides=2)(self.inputs) h_l = LeakyReLU(alpha=0.2)(h_l) h_l = Dropout(0.2)(h_l) h_l = Conv2D(32, 5, activation='relu', strides=2)(h_l) h_l = LeakyReLU(alpha=0.2)(h_l) h_l = Dropout(0.2)(h_l) h_l = Conv2D(64, 5, activation='relu', strides=2)(h_l) h_l = LeakyReLU(alpha=0.2)(h_l) h_l = Dropout(0.2)(h_l) h_l = Flatten()(h_l) h_l = Dense(128, activation='relu')(h_l) # h_l = Dense(self.latent_dim, activation='sigmoid')(h_l) self.z_mean = Dense(self.latent_dim, name='z_mean')(h_l) self.z_log_var = Dense(self.latent_dim, name='z_log_var')(h_l) # use reparameterization trick to push the sampling out as input # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(self.sampling, output_shape=(self.latent_dim, ), name='z')([self.z_mean, self.z_log_var]) # instantiate encoder model return Model(self.inputs, [self.z_mean, self.z_log_var, z], name='encoder') def build_decoder(self): latent_inputs = Input( shape=(self.latent_dim, ), name='z_sampling' ) #, batch_shape=(self.batch_size, self.latent_dim)) h_l = Dense(12 * 12 * 128, activation='relu')(latent_inputs) # TODO: Batch norm? h_l = LeakyReLU(alpha=0.2)(h_l) h_l = Reshape((12, 12, 128))(h_l) h_l = Conv2DTranspose(64, (7, 7), padding='same', strides=(1, 1), activation='relu')(h_l) h_l = LeakyReLU(alpha=0.2)(h_l) h_l = Conv2DTranspose(32, (5, 5), padding='same', strides=(2, 2), activation='relu')(h_l) h_l = LeakyReLU(alpha=0.2)(h_l) self.outputs = Conv2DTranspose(3, (5, 5), padding='same', strides=(2, 2), activation='sigmoid', batch_size=self.batch_size)(h_l) # self.outputs = Flatten()(h_l) # self.outputs = Dense(self.flat_dim, activation='sigmoid')(h_l) # self.outputs = Dense(self.flat_dim, activation='sigmoid')(h_l) # self.outputs = Reshape(target_shape=self.img_shape)(self.outputs) # instantiate decoder model return Model(latent_inputs, self.outputs, name='decoder') # reparameterization trick # instead of sampling from Q(z|X), sample epsilon = N(0,I) # z = z_mean + sqrt(var) * epsilon def sampling(self, args): """Reparameterization trick by sampling from an isotropic unit Gaussian. # Arguments args (tensor): mean and log of variance of Q(z|X) # Returns z (tensor): sampled latent vector """ z_mean, z_log_var = args batch = K.shape(z_mean)[0] dim = K.int_shape(z_mean)[1] # by default, random_normal has mean = 0 and std = 1.0 epsilon = K.random_normal(shape=(batch, dim)) return z_mean + K.exp(0.5 * z_log_var) * epsilon def load_images(self, path="images/preprocessed/48x48/oranges/"): result = np.zeros(shape=(len(os.listdir(path)), self.img_rows, self.img_cols, self.channels)) idx = 0 for file in os.listdir(path): img = Image.open(os.path.join(path, file)) img = img.convert("RGB") img = np.array(img) result[idx] = img idx += 1 return result def train(self, epochs, batch_size=32, sample_interval=50, save_interval=1500): # # Load the dataset # (X_train, _), (_, _) = mnist.load_data() loss = [] # Load the images X_train = self.load_images() # image_size = X_train.shape[1] # original_dim = image_size * image_size # Normalize X_train = X_train / 255 # Reshape # X_train = X_train.reshape((len(X_train), np.prod(X_train.shape[1:]))) # VAE loss = mse_loss or xent_loss + kl_loss reconstruction_loss = K.mean(mse(self.inputs, self.outputs)) reconstruction_loss *= self.img_rows * self.img_cols # reconstruction_loss = np.mean(reconstruction_loss, axis=(1, 2)) kl_loss = 1 + self.z_log_var - K.square(self.z_mean) - K.exp( self.z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 print(reconstruction_loss.shape, kl_loss.shape) vae_loss = K.mean(reconstruction_loss + kl_loss) self.vae.add_loss(vae_loss) self.vae.compile(optimizer='adam') self.vae.summary() # plot_model(self.vae, # to_file='vae_mlp.png', # show_shapes=True) try: for i in range(1, int(epochs / sample_interval) + 1): print("True Epoch: " + str(i * sample_interval)) # train the autoencoder history = self.vae.fit( X_train, shuffle=True, epochs=int(epochs / sample_interval), batch_size=batch_size, validation_data=(X_train, None)) # TODO: make test self.vae.save_weights('vae_mlp_fruit.h5') self.sample_images(X_train, i * sample_interval, noise=False) self.sample_images(X_train, i * sample_interval) loss.append(history.history['loss']) except KeyboardInterrupt: pass loss = np.stack(loss).flatten() history_file = open("histories/%d-history.pkl" % time.time(), "wb") pickle.dump(history, history_file) plt.clf() plt.plot(loss, label="loss") plt.legend() plt.title(label='VAE-GAN Loss') plt.savefig("images/plots/%d-vae-gan_loss.png" % time.time()) plt.show() def sample_images(self, X_train, epoch, noise=True): if noise: r, c = 5, 5 gen_imgs = self.decoder.predict(self.sample_noise, batch_size=5 * 5) # Rescale images 0 - 1 gen_imgs = 0.5 * gen_imgs + 0.5 gen_imgs = np.stack(gen_imgs).reshape( (5 * 5, self.img_rows, self.img_cols, self.channels)) fig, axs = plt.subplots(r, c) cnt = 0 for i in range(r): for j in range(c): axs[i, j].imshow(gen_imgs[cnt, :, :, :]) axs[i, j].axis('off') cnt += 1 fig.savefig("images/vae-conv-generated-%d.png" % epoch) plt.close() else: encoded_imgs = self.encoder.predict(X_train) decoded_imgs = self.decoder.predict(encoded_imgs[2]) n = 10 # how many digits we will display plt.figure(figsize=(20, 4)) for i in range(n): # display original ax = plt.subplot(2, n, i + 1) plt.imshow(X_train[i].reshape(self.img_shape)) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) # display reconstruction ax = plt.subplot(2, n, i + 1 + n) plt.imshow(decoded_imgs[i].reshape(self.img_shape)) plt.gray() ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.savefig("images/vae-conv-reconstructed%d.png" % epoch) plt.close()
my_max_sent_size_overall=max_sent_size_overall, my_n_cats=n_cats) my_optimizer = optimizers.SGD( lr=base_lr, momentum=max_mt, # we decrease momentum when lr increases decay=1e-5, nesterov=True) if regularization > 0: my_loss = CustomLossWrapper( regularization * InformationRegularizer(sent_att_vecs, context)) cahan.compile(loss=my_loss, optimizer=my_optimizer, metrics=['accuracy']) if regularization > 0: cahan.add_loss(regularization * InformationRegularizer(sent_att_vecs, context)) lr_sch = CyclicLR(base_lr=base_lr, max_lr=max_lr, step_size=step_size, mode='triangular') mt_sch = CyclicMT(base_mt=base_mt, max_mt=max_mt, step_size=step_size, mode='triangular') early_stopping = EarlyStopping( monitor= 'val_acc', # go through epochs as long as accuracy on validation set increases patience=my_patience, mode='max')
# Model Input Im_In = Input(shape=(im_dim, im_dim, im_ch)) Encoded, z_mean, z_log_sd = EncoderModel(Im_In) Im_Out = DecoderModel(Encoded) # Compile Model VAE = Model(Im_In, Im_Out) # Compute VAE loss xent_loss = im_dim * im_dim * metrics.binary_crossentropy( K.flatten(Im_In), K.flatten(Im_Out)) kl_loss = -0.5 * K.sum( 1 + z_log_sd - K.square(z_mean) - K.exp(z_log_sd), axis=-1) vae_loss = xent_loss + beta * K.mean(kl_loss) VAE.add_loss(vae_loss) VAE.compile(optimizer='adam', loss=None) VAE.load_weights("Completed_Training_beta={}.h5".format(beta)) if gen_data: train_data = LoadData(dup=False) test_data = LoadData(method='test') VAE.load_weights( "Checkpoint_Training_beta_double={}.hdf5".format(beta)) header_str = "id resp x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 x16 x17 x18 x19 x20 x21 x22 x23 x24 x25 x26 x27 x28 x29 x30 x31 x32" fmt_str = "%d %d %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f" train_1 = EncoderModel.predict(train_data.pairs[:, 0, ...])[selkey]
class BetaConvVAE(AutoEncoder): def __init__(self, beta=1, data_generator=None, latent_size=2): self.data_generator = data_generator self.model_name = "beta_conv_vae_beta" + str(beta) + "_augment_" + str(self.data_generator.number_to_augment) self.results_folder = "results/" + self.data_generator.data_name + "/" + self.model_name self.models_folder = "models/" + self.data_generator.data_name + "/" + self.model_name if not os.path.exists(self.results_folder): os.makedirs(self.results_folder) if not os.path.exists(self.models_folder): os.makedirs(self.models_folder) self.latent_size = latent_size self.beta = beta self.history = None self.encoder = None self.decoder = None self.autoencoder = None # define model # image format is channels last - (batch_size, x, y, no_filters) def build(self): input_img = Input(shape=(100, 100, 3)) # adapt this if using `channels_first` image data format conv_0_encoded = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img) # (100, 100) pool_0_encoded = MaxPooling2D((2, 2), padding='same')(conv_0_encoded) # (50, 50) conv_1_encoded = Conv2D(8, (3, 3), activation='relu', padding='same')(pool_0_encoded) # (50, 50) pool_1_encoded = MaxPooling2D((2, 2), padding='same')(conv_1_encoded) # (25, 25) conv_2_encoded = Conv2D(8, (4, 4), activation='relu')(pool_1_encoded) # (22, 22) pool_2_encoded = MaxPooling2D((2, 2), padding='same')(conv_2_encoded) # (11, 11) conv_3_encoded = Conv2D(8, (4, 4), activation='relu')(pool_2_encoded) # (8, 8) reshaped_encoded = Flatten()(conv_3_encoded) # (1,64) dense_0_encoded = Dense(8)(reshaped_encoded) # (1,8) z_mean = Dense(2)(dense_0_encoded) z_log_var = Dense(2)(dense_0_encoded) def sampling(args): z_mean, z_log_var = args epsilon = K.random_normal(shape=(K.shape(z_mean)[0], 2), mean=0., stddev=1.0) return z_mean + K.exp(z_log_var) * epsilon z = Lambda(sampling, output_shape=(2,))([z_mean, z_log_var]) # (1,2) # define layers decoder_dense_0 = Dense(8) # (1,8) decoder_dense_1 = Dense(8 * 8) # (1,64) decoder_reshaped = Reshape([8, 8, 1]) # (8,8) decoder_deconv_0 = Conv2D(8, (3, 3), activation='relu', padding='same') # (8, 8) decoder_up_0 = UpSampling2D((2, 2)) # (16, 16) decoder_deconv_1 = Conv2D(16, (3, 3), activation='relu') # (14, 14) decoder_up_1 = UpSampling2D((2, 2)) # (28, 28) decoder_deconv_2 = Conv2D(16, (4, 4), activation='relu') # (25, 25) decoder_up_2 = UpSampling2D((2, 2)) # (50, 50) decoder_deconv_3 = Conv2D(16, (3, 3), activation='relu', padding='same') # (50, 50) decoder_up_3 = UpSampling2D((2, 2)) # (100, 100) decoder_output_img = Conv2D(3, (3, 3), activation='sigmoid', padding='same') # (100, 100) # instantiate layers for training dense_0_decoded = decoder_dense_0(z) # (1,8) dense_1_decoded = decoder_dense_1(dense_0_decoded) # (1,64) reshaped_decoded = decoder_reshaped(dense_1_decoded) # (8,8) deconv_0_decoded = decoder_deconv_0(reshaped_decoded) # (8, 8) up_0_decoded = decoder_up_0(deconv_0_decoded) # (16, 16) deconv_1_decoded = decoder_deconv_1(up_0_decoded) # (14, 14) up_1_decoded = decoder_up_1(deconv_1_decoded) # (28, 28) deconv_2_decoded = decoder_deconv_2(up_1_decoded) # (25, 25) up_2_decoded = decoder_up_2(deconv_2_decoded) # (50, 50) deconv_3_decoded = decoder_deconv_3(up_2_decoded) # (50, 50) up_3_decoded = decoder_up_3(deconv_3_decoded) # (100, 100) output_img = decoder_output_img(up_3_decoded) # (100, 100) # instantiate layers for test-time generation from latent space samples latent = Input(shape=(self.latent_size,)) _dense_0_decoded = decoder_dense_0(latent) # (1,8) _dense_1_decoded = decoder_dense_1(_dense_0_decoded) # (1,64) _reshaped_decoded = decoder_reshaped(_dense_1_decoded) # (8,8) _deconv_0_decoded = decoder_deconv_0(_reshaped_decoded) # (8, 8) _up_0_decoded = decoder_up_0(_deconv_0_decoded) # (16, 16) _deconv_1_decoded = decoder_deconv_1(_up_0_decoded) # (14, 14) _up_1_decoded = decoder_up_1(_deconv_1_decoded) # (28, 28) _deconv_2_decoded = decoder_deconv_2(_up_1_decoded) # (25, 25) _up_2_decoded = decoder_up_2(_deconv_2_decoded) # (50, 50) _deconv_3_decoded = decoder_deconv_3(_up_2_decoded) # (50, 50) _up_3_decoded = decoder_up_3(_deconv_3_decoded) # (100, 100) _output_img = decoder_output_img(_up_3_decoded) # (100, 100) # define the 3 models self.autoencoder = Model(input_img, output_img) self.encoder = Model(input_img, z_mean) self.decoder = Model(latent, _output_img) xent_loss = metrics.binary_crossentropy(K.flatten(input_img), K.flatten(output_img)) kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) vae_loss = K.mean(xent_loss + self.beta*kl_loss) self.autoencoder.add_loss(vae_loss)
def hieroRecoModel_online(input_shape): """ Implementation of the Inception model used for FaceNet Arguments: input_shape -- shape of the images of the dataset Returns: model -- a Model() instance in Keras """ #Import VGG19 model for transfer learning without output layers vgg_model = applications.VGG19(weights = "imagenet", include_top=False, input_shape = input_shape) # Freeze the layers except the last 4 for layer in vgg_model.layers[:-4]: layer.trainable = False # Check the layers for layer in vgg_model.layers: print(layer, layer.trainable) X_input = vgg_model.output # Adding custom Layers X = Flatten()(X_input) X = Dense(512, activation="relu")(X) X = Dropout(0.5)(X) X = Dense(128, activation="relu")(X) # L2 normalization X = Lambda(lambda x: K.l2_normalize(x, axis=1))(X) # Create model instance #model = Model(inputs=vgg_model.input, outputs=X, name='HieroRecoModel') features = Model(vgg_model.input, X, name="features") # Inputs of the siamese network anchor = Input(shape=input_shape) positive = Input(shape=input_shape) negative = Input(shape=input_shape) # Embedding Features of input anchor_features = features(anchor) pos_features = features(positive) neg_features = features(negative) input_triplet = [anchor, positive, negative] output_features = [anchor_features, pos_features, neg_features] # Define the trainable model loss_model = Model(inputs=input_triplet, outputs=output_features, name='loss') loss_model.add_loss(K.mean(triplet_loss(output_features))) loss_model.compile(loss=None, optimizer='adam') # Create model instance # model = Model(inputs=X_input, outputs=X, name='HieroRecoModel_off') return features, loss_model
class EvadeGAN: def __init__(self, target_model, x_dim=10000, z_dim=100, g_input='xz', g_params={}, d_params={}, d_compile_params={}, gan_compile_params={}, summary=False, bin_threshold=0.5): self.graph = tf.compat.v1.get_default_graph() self.target_model = self.TargetModel(target_model) self.x_dim = x_dim self.z_dim = z_dim self.bin_threshold = bin_threshold self.g_input = g_input if self.g_input == 'z': self.name = 'EvadeGANz' self.setting = "Sample-Independent Perturbations Z" self.save_dir = 'EvadeGANz/' elif self.g_input == 'x': self.name = 'EvadeGANx' self.setting = "Sample-Dependent Perturbations X" self.save_dir = 'EvadeGANx/' else: self.name = 'EvadeGANxz' self.setting = "Sample-Dependent Perturbations XZ" self.save_dir = 'EvadeGANxz/' if summary: print(f"Summary of {self.name} Models [{self.setting}]:\n" + '=' * 62 + '\n') # Build the generator self.generator = self.build_generator(**g_params, summary=summary) # Build & compile the discriminator self.discriminator = self.build_discriminator(**d_params, **d_compile_params, summary=summary) # Build & compile the adversarial network, GAN self.GAN = self.build_GAN(**gan_compile_params, summary=summary) # Combine logs self.log_params = {'G': [self.g_log], 'D': [self.d_log], 'GAN': [self.gan_log]} def build_generator(self, n_hidden=256, h_activation='relu', regularizers={}, batchnorm=False, out_activation='sigmoid', drop_rate=0, summary=False): """Builds a generator using the passed hyperparameters""" # Input: xz, z, or x x = Input(shape=(self.x_dim,), name='g_x_input') z = Input(shape=(self.z_dim,), name='g_z_input') if self.g_input == 'z': g_input = z elif self.g_input == 'x': g_input = x else: g_input = Concatenate(axis=1, name='g_xz_input')([x, z]) # Hidden hidden = Dense(n_hidden, activation=h_activation, name='g_hidden_relu')(g_input) if batchnorm: hidden = BatchNormalization(name='g_hidden_bn')(hidden) perturb = Dense(self.x_dim, activation=out_activation, **regularizers, name='g_perturb_sigmoid')(hidden) # Dropout perturb = Dropout(drop_rate, name='perturb_dropout')(perturb) perturb = K.minimum(perturb, 1) # NB: dropout scales up the kept inputs, # so clip to stay <=1 (for max later).. # use K.clip Or K.minimum(perturb, 1) # Output x_adv = Maximum(name='g_adv_max')([perturb, x]) self.generator = Model([x, z], x_adv, name='Generator') if summary: self.generator.summary(); print() # G parameters for logging self.reg = get_reg_factors(regularizers) self.g_log = {'in': self.g_input, 'h': f'({n_hidden},{h_activation})', 'bn': batchnorm, 'reg': self.reg, 'drop': drop_rate} return self.generator def build_discriminator(self, n_hidden=256, h_activation=None, h_constraint=None, out_activation='sigmoid', summary=False, loss='binary_crossentropy', metrics=['accuracy'], optimizer=Nadam(lr=0.001, clipvalue=1.0)): """Builds a discriminator using the passed hyperparameters""" x = Input(shape=(self.x_dim,), name='d_x_input') hidden = Dense(n_hidden, activation=h_activation, kernel_constraint=h_constraint, name='d_hidden_linear')( x) pred = Dense(1, activation=out_activation, name='d_pred')(hidden) self.discriminator = Model(x, pred, name="Disriminator") if summary: self.discriminator.summary(); print() self.discriminator.compile(loss=loss, optimizer=optimizer, metrics=metrics) self.discriminator.trainable = False # D parameters, for logging self.d_log = {"loss": 'bce' if loss == 'binary_crossentropy' else loss, "opt": {type(optimizer).__name__: (optimizer.lr.numpy(), optimizer.clipvalue)}} return self.discriminator def build_GAN(self, loss='binary_crossentropy', metrics=['accuracy'], optimizer=Nadam(lr=0.001, clipvalue=1.0), beta=1.0, normalise_loss=False, target_label=0, bound_func='mean', max_changes=12, summary=False): """Builds an adversarial netowrk GAN using the passed hyperparameters""" x = Input(shape=(self.x_dim,), name='gan_x_input') z = Input(shape=(self.z_dim,), name='gan_z_input') x_adv = self.generator([x, z]) self.discriminator.trainable = False y_pred = self.discriminator(x_adv) # predictions self.GAN = Model([x, z], y_pred, name='GAN') if summary: self.GAN.summary(); print() # Binarise to get a valid sample x_adv_bin = binarise(x_adv, self.bin_threshold) # Optional: Minimise the score of the target model (Add to loss) # Target label (goodware) # y_target = target_label * ones(x_adv.get_shape().as_list()[0]) # loss_target_model = self.target_model.score(x_adv_bin, y_target) # self.GAN.add_loss(loss_target_mode) # Reduction function for the bound loss: mean or max (more restrictive) reduce_func = tf.reduce_max if bound_func == 'max' else tf.reduce_mean # Whether to scale the bound loss to the range [0, 1] scale = 1 / self.x_dim if normalise_loss else 1.0 loss_bound = \ reduce_func( tf.maximum(0.0, # OR tf.zeros((tf.shape(x_adv)[0])), tf.norm((x_adv_bin - x), ord=1, axis=1) - max_changes) * scale) # combined_loss = alpha*loss_target_model + beta*loss_changes self.GAN.add_loss(beta * loss_bound) self.GAN.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=metrics) self.gan_log = { "loss": f'custom({bound_func}, {max_changes}, {beta})', "opt": {type(optimizer).__name__: (optimizer.lr.numpy(), optimizer.clipvalue)}} return self.GAN class TargetModel: def __init__(self, model): # Get model parameters (weights & intercept) if type(model) == LinearSVC: w = model.coef_.flatten() elif type(model) == SVC: w = model.coef_.toarray().flatten() b = model.intercept_[0] self.weights = tf.Variable([w], dtype=tf.float32, trainable=False) self.intercept = tf.Variable(b, dtype=tf.float32, trainable=False) self.classes = tf.Variable(model.classes_, trainable=False) self.accuracy = tf.keras.metrics.BinaryAccuracy() def predict(self, X): # Get decision function # X = tf.convert_to_tensor(X, dtype=tf.float32) # in case no tensor scores = K.dot(X, tf.transpose(self.weights)) + self.intercept # Classify idx = tf.cast(scores > 0, tf.int32) # y_pred = K.get_value(self.classes)[idx] # Class Label, eager y_pred = tf.gather(self.classes, idx) return y_pred def score(self, X, y_target): y_pred = self.predict(X) self.accuracy.update_state(y_pred, y_target) return self.accuracy.result() def train(self, target_model, X_mal_train, X_mal_test, X_good_train, X_good_test, mal_label=1, good_label=0, earlystop=False, zmin=0, zmax=1, epochs=500, batch_size=32, combined_d_batch=False, d_train_mal=False, d_train_adv=True, good_batch_factor=1, d_times=1, gan_times=1, n_progress=1, minTPR_threshold=0, max_changes=np.inf, gan_dir=GAN_DIR, smooth_alpha=1.0, sample_train=True): """ Performs GAN training. :param target_model: The target model of the evasion attack :param X_mal_train: The malware training set :param X_mal_test: The malware test set :param X_good_train: The goodware training set :param X_good_test: The goodware test set :param mal_label: The label for the malware class (original label) :param good_label: The label for the goodware class (target label) :param zmin: The lower bound of the random noise :param zmax: The upper bound of the random noise :param epochs: The number of training epochs :param batch_size: The size of a training batch :param d_train_mal: Whether to train the disciminator on malware. :param combined_d_batch: Whether to train the discriminator on one batch that combine all classes or train on each eparately :param good_batch_factor: The size ratio of a goodware batch compared to that of a malware batch. :param d_times: The number of times to train the discriminator in each iteration. :param gan_times: The number of times to train the GAN in each iteration :param n_progress: The number of epochs with no improvement/output after which print ouput to check for progress. :param minTPR_threshold: The threshold to which we wish to minimise the the True Positive Rate (TPR). :param max_changes: A constraint on the maximum number of changes in generated adversarial examples (AEs) :return: tuple ( TPR_train: The list of TPR scores on the training set at each epoch, TPR_test: The list of TPR scores on the test set at each epoch, avg_diff_train: The list of avg changes in AEs generated from training set at each epoch, avg_diff_test: The list of avg changes in AEs generated from the test set at each epoch, d_metrics: The list of the discriminator metrics [loss, accuracy] at each epoch, gan_metrics: The list of the GAN metrics [loss, accuracy] at each epoch, best_G_path: The path to the best performing G model ) """ g_batch_size = good_batch_factor * batch_size # Metrics accumulators d_metrics = [] gan_metrics = [] # Initial TPR on the training & test sets TPR_train = [target_model.score(X_mal_train, mal_label * ones(X_mal_train.shape[0]))] TPR_test = [target_model.score(X_mal_test, mal_label * ones(X_mal_test.shape[0]))] minTPR = 1.0 minTPR_avg_changes = -1 minTPR_max_changes = -1 min_epoch = output_epoch = 0 best_G_path = None print(f"Initial TPR on the training set: {TPR_train}") print(f"Initial TPR on the test set: {TPR_test}\n") # Average changes (perturbations) in adversarial examples avg_diff_train = [] avg_diff_test = [] # IDs for plots plot_id = 1 gan_id = 1 tpr_id = 1 t1 = time.perf_counter() for epoch in range(epochs): # Generate batches of size (gan_times * batch_size) X_mal_batches = batch(X_mal_train, gan_times * batch_size, seed=epoch) # Epoch metrics accumulators d_metrics_epoch = np.empty((0, 2)) gan_metrics_epoch = np.empty((0, 2)) for X_mal_batch in X_mal_batches: ################################################################ # Train the discriminator for d_times iterations ################################################################ # Generate minibatches of size batch_size minibatches = batch(X_mal_batch, batch_size, seed=epoch) d_metrics_batch = np.empty((0, 2)) # Train for d_times for i in range(d_times): # __could reseed with (epoch + i) for reproducibility__ X_mal = next(minibatches, None) # Use these batches first if X_mal is None: # Then generate randomly X_mal = rand_batch(X_mal_train, batch_size) Y_mal = smooth_alpha * mal_label * ones( X_mal.shape[0]) # Smooth noise = np.random.uniform(zmin, zmax, size=[batch_size, self.z_dim]) # Generate adversarial examples X_adv = self.generator.predict([X_mal, noise]) X_adv = binarise(X_adv, self.bin_threshold) Y_adv = target_model.predict(X_adv) Y_adv[ Y_adv == mal_label] = smooth_alpha * mal_label # Smooth X_good = rand_batch(X_good_train, g_batch_size) Y_good = good_label * ones(X_good.shape[0]) # Good_Label # Train the discriminator self.discriminator.trainable = True if combined_d_batch: # *** Train once on a combined batch **** X = X_good Y = Y_good if d_train_mal: X = np.concatenate((X, X_mal)) Y = np.concatenate((Y, Y_mal)) if d_train_adv: X = np.concatenate((X, X_adv)) Y = np.concatenate((Y, Y_adv)) metrics = self.discriminator.train_on_batch(X, Y) else: # ** Train on separate batches & combine metrics ** metrics_good = self.discriminator.train_on_batch(X_good, Y_good) metrics_mal = self.discriminator.train_on_batch(X_mal, Y_mal) \ if d_train_mal else [np.nan, np.nan] metrics_adv = self.discriminator.train_on_batch(X_adv, Y_adv) \ if d_train_adv else [np.nan, np.nan] # Avg metrics metrics = np.nanmean(np.array([metrics_mal, metrics_good, metrics_adv]), axis=0) # Accumulate metrics for d_times iterations d_metrics_batch = np.vstack((d_metrics_batch, metrics)) # Average the metrics of all d_times iterations d_metrics_batch = np.mean(d_metrics_batch, axis=0) # Add to discriminator metrics for this epoch d_metrics_epoch = np.vstack((d_metrics_epoch, metrics)) ################################################################ # Train the Generator ################################################################ # Generate minibatches of size batch_size minibatches = batch(X_mal_batch, batch_size, seed=epoch) gan_metrics_batch = np.empty((0, 2)) # Train for gan_times for i in range(gan_times): # Number of minibatches should be exactly gan_times X_mal = next(minibatches, None) if X_mal is None: # Just in case, generate randomly X_mal = rand_batch(X_mal_train, batch_size) noise = np.random.uniform(zmin, zmax, size=[batch_size, self.z_dim]) self.discriminator.trainable = False # Train with target label = GOOD_LABEL metrics = self.GAN.train_on_batch([X_mal, noise], # <<<< good_label * ones( X_mal.shape[0])) # discriminator.trainable = True # Accumulate metrics for gan_times iterations gan_metrics_batch = np.vstack((gan_metrics_batch, metrics)) # Average the metrics of all gan_times iterations gan_metrics_batch = np.mean(gan_metrics_batch, axis=0) # Add to the generator metrics for this epoch gan_metrics_epoch = np.vstack((gan_metrics_epoch, metrics)) # Average metrics of each epoch d_metrics.append(np.mean(d_metrics_epoch, axis=0).tolist()) gan_metrics.append(np.mean(gan_metrics_epoch, axis=0).tolist()) gan_loss = gan_metrics[-1][0] # TPR on adversarial training set noise = np.random.uniform(zmin, zmax, (X_mal_train.shape[0], self.z_dim)) X_adv_train = binarise(self.generator.predict([X_mal_train, noise]), self.bin_threshold) # Score with target label = MAL_LABEL Y_adv_train = mal_label * ones(X_adv_train.shape[0]) # MAL_LABEL TPR = target_model.score(X_adv_train, Y_adv_train) TPR_train.append(TPR) # Changes (L1 norms) in the adversarial training set diff_train = norm((X_adv_train - X_mal_train), ord=1, axis=1) avg_diff_train_current = np.mean(diff_train) max_diff_train_current = np.max(diff_train) avg_diff_train.append(avg_diff_train_current) # TPR on adversarial test set noise = np.random.uniform(zmin, zmax, (X_mal_test.shape[0], self.z_dim)) X_adv_test = binarise(self.generator.predict([X_mal_test, noise]), self.bin_threshold) Y_adv_test = mal_label * ones(X_adv_test.shape[0]) # MAL_LABEL TPR = target_model.score(X_adv_test, Y_adv_test) TPR_test.append(TPR) # Changes (L1 norms) in the adversarial test set diff_test = norm((X_adv_test - X_mal_test), ord=1, axis=1) avg_diff_test_current = np.mean(diff_test) max_diff_test_current = np.max(diff_test) avg_diff_test.append(avg_diff_test_current) # Output progress if TPR has decreased (improved evasion) # ... or if TPR is the same but avg changes have decreased if (TPR < minTPR) or \ (TPR == minTPR and avg_diff_test_current < minTPR_avg_changes): # check avg or max print("\n>>>> New Best Results: " f"Previous minTPR: [{minTPR:.8f}] ==> " f"New minTPR: [{TPR:0.8f}] " f"GAN Loss: [{gan_loss:.8f}] <<<<") output_progress(epoch, TPR_train, TPR_test, diff_train, diff_test) minTPR = TPR min_epoch = output_epoch = epoch minTPR_avg_changes = avg_diff_test_current minTPR_max_changes = max_diff_test_current minTPR_std = np.std(diff_test) minTPR_quantiles = np.quantile(diff_test, [0.25, 0.5, 0.75]) # Save weights minTPR_weights_path = \ (gan_dir + self.save_dir + 'weights/' + f'GAN_minTPR_weights_epoch_{epoch}_' f'TPR_{minTPR:.2f}_dtimes_{d_times}_changes_' f'{avg_diff_test_current:.0f}_actReg_{self.reg[0]}_' + time.strftime("%m-%d_%H-%M-%S") + '.h5') self.GAN.save_weights(minTPR_weights_path) # Generate and plot a sample of AEs sample_sz = 10 sample_noise = np.random.uniform(zmin, zmax, size=[sample_sz, self.z_dim]) if sample_train: # Sample from training sample_mal = rand_batch(X_mal_batch, sample_sz) else: # Sample from test set sample_mal = np.asarray(rand_batch(X_mal_test, sample_sz)) plot_sample(sample_mal, sample_noise, self.generator, target_model, epoch, TPR_train=TPR_train, TPR_test=TPR_test, params=self.log_params, avg_changes=avg_diff_test_current, m_label=mal_label, g_label=good_label, annotate=False, out_dir=ADV_DIR, plot_id=plot_id) plot_id = plot_id + 1 if minTPR <= minTPR_threshold: print( "\n" + "#" * 150 + "\n" f"# Target Evasion Rate {100 * (1 - TPR):.2f}% " f"achieved at epoch [{epoch}], " f"with avg {avg_diff_test_current:.1f} " f"& max {max_diff_test_current:.1f} changes per sample " f"(on the test set) ... " f"GAN Loss: [{gan_loss:.8f}]" "\n" + "#" * 150 + "\n" ) if minTPR_avg_changes <= max_changes: print("Training CONVERGED. " "Target Evasion Rate achieved within max changes..." "TRAINING ENDS HERE #") # Save generator best_G_path = \ (gan_dir + self.save_dir + 'models/' + f'G_Target_TPR_epoch_{epoch}_' f'TPR_{minTPR:.2f}_dtimes_{d_times}_changes_' f'{avg_diff_test_current:.0f}_actReg_{self.reg[0]}_' + time.strftime("%m-%d_%H-%M-%S") + '.h5') self.generator.save(best_G_path) if earlystop: break # If no better than minTPR, but still achieved target evasion, ... elif TPR <= minTPR_threshold: # output_epoch = epoch print( "\n" + "#" * 150 + "\n" f"# Target Evasion Rate {100 * (1 - TPR):.2f}% " f"achieved at epoch [{epoch}] " f"with avg {avg_diff_test_current:.1f} " f"and max {max_diff_test_current:.1f} changes per sample " f"(on the test set) ... " f"GAN Loss: [{gan_loss:.8f}]" "\n" + "#" * 150 + "\n" ) # Save weights weights_path = \ (gan_dir + self.save_dir + 'weights/' + f'GAN_minTPR_weights_epoch_{epoch}_' f'TPR_{minTPR:.2f}_dtimes_{d_times}_changes_' f'{avg_diff_test_current:.0f}_actReg_{self.reg[0]}_' + time.strftime("%m-%d_%H-%M-%S") + '.h5') # self.GAN.save_weights(file_path) # If within max changes if avg_diff_test_current <= max_changes: # check avg or max? print("Target Evasion Rate achieved within max changes...") # Save model model_path = \ (gan_dir + self.save_dir + 'models/' + f'GAN_Target_TPR_epoch_{epoch}_' f'TPR_{minTPR:.2f}_dtimes_{d_times}_changes_' f'{avg_diff_test_current:.0f}_actReg_{self.reg[0]}_' + time.strftime("%m-%d_%H-%M-%S") + '.h5') # self.GAN.save(model_path) if earlystop: break else: print() # Maybe adjust weights # print("Should we adjust regulizers?") # generator.layers[-2].rate *= 0.1 # generator.layers[-3].activity_regularizer.l1 *= 0.1 # generator.layers[-3].activity_regularizer.l2 *= 0.1 # weights = generator.get_weights() # generator = keras.models.clone_model(generator) # generator.set_weights(weights) # Adapt regularisation weights # K.set_value(l1_factor, 0.1*l1_factor) # K.set_value(l2_factor, 0.1*l2_factor) if (epoch + 1 - output_epoch) > n_progress: # If no new imporovement for for a while, output progress output_epoch = epoch print(f"\n*** Checking progress *** " f"GAN Loss: [{gan_loss:.8f}] ***") output_progress(epoch, TPR_train, TPR_test, diff_train, diff_test) # Generate and plot a sample of AEs sample_sz = 10 sample_noise = np.random.uniform(zmin, zmax, size=[sample_sz, self.z_dim]) sample_mal = rand_batch(X_mal_batch, sample_sz) plot_sample(sample_mal, sample_noise, self.generator, target_model, epoch, TPR_train=TPR_train, TPR_test=TPR_test, params=self.log_params, avg_changes=avg_diff_test_current, m_label=mal_label, g_label=good_label, annotate=False, out_dir=ADV_DIR, plot_id=plot_id) plot_id = plot_id + 1 t2 = time.perf_counter() print("\n\n" + "#" * 165 + "\n" f"# Finished {epoch + 1} epochs in {(t2 - t1) / 60:.2f} minutes\n" f"# Best Evastion Rate = {100 * (1 - minTPR):.4f}% " f"(lowest TPR = {100 * minTPR:.4f}%) " f"achieved after {min_epoch + 1} epochs, with avg " f"{minTPR_avg_changes:.1f} \u00b1 SD({minTPR_std:.1f}) | " f" Q1-3 {minTPR_quantiles.astype(int).tolist()} | " f" and max {minTPR_max_changes:.1f} " f"changes per sample.\n" + "#" * 165 + "\n\n") return TPR_train, TPR_test, \ avg_diff_train, avg_diff_test, \ d_metrics, gan_metrics, \ best_G_path
outputs = decoder(encoder(inputs)[2]) vae = Model(inputs, outputs, name='vae') models = (encoder, decoder) data = (x_test, y_test) reconstruction_loss = binary_crossentropy(K.flatten(inputs), K.flatten(outputs)) reconstruction_loss *= image_size * image_size kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) kl_loss = K.sum(kl_loss, axis=-1) kl_loss *= -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer='rmsprop') # train the autoencoder vae.fit(x_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, None)) decoded_imgs = vae.predict(x_test) #How many digits we will display n = 10 plt.figure(figsize=(20, 4)) for i in range(n): #display original
def build_gan(h=128, w=128, c=3, latent_dim=2, epsilon_std=1.0, dropout_rate=0.1, GRADIENT_PENALTY_WEIGHT=10): optimizer_g = AdamWithWeightnorm(lr=0.0001, beta_1=0.5) optimizer_d = AdamWithWeightnorm(lr=0.0001, beta_1=0.5) t_h, t_w = h // 16, w // 16 generator = residual_decoder(t_h, t_w, c=c, latent_dim=latent_dim, dropout_rate=dropout_rate) discriminator = residual_discriminator(h=h, w=w, c=c, dropout_rate=dropout_rate) for layer in discriminator.layers: layer.trainable = False discriminator.trainable = False generator_input = Input(shape=(latent_dim, )) generator_layers = generator(generator_input) discriminator_layers_for_generator = discriminator(generator_layers) generator_model = Model(inputs=[generator_input], outputs=[discriminator_layers_for_generator]) generator_model.add_loss(K.mean(discriminator_layers_for_generator)) generator_model.compile(optimizer=optimizer_g, loss=None) # Now that the generator_model is compiled, we can make the discriminator layers trainable. for layer in discriminator.layers: layer.trainable = True for layer in generator.layers: layer.trainable = False discriminator.trainable = True generator.trainable = False # The discriminator_model is more complex. It takes both real image samples and random noise seeds as input. # The noise seed is run through the generator model to get generated images. Both real and generated images # are then run through the discriminator. Although we could concatenate the real and generated images into a # single tensor, we don't (see model compilation for why). real_samples = Input(shape=(h, w, c)) generator_input_for_discriminator = Input(shape=(latent_dim, )) generated_samples_for_discriminator = generator( generator_input_for_discriminator) discriminator_output_from_generator = discriminator( generated_samples_for_discriminator) discriminator_output_from_real_samples = discriminator(real_samples) averaged_samples = RandomWeightedAverage()( [real_samples, generated_samples_for_discriminator]) averaged_samples_out = discriminator(averaged_samples) discriminator_model = Model( [real_samples, generator_input_for_discriminator], [ discriminator_output_from_real_samples, discriminator_output_from_generator, averaged_samples_out ]) discriminator_model.add_loss( K.mean(discriminator_output_from_real_samples) - K.mean(discriminator_output_from_generator) + gradient_penalty_loss( averaged_samples_out, averaged_samples, GRADIENT_PENALTY_WEIGHT)) discriminator_model.compile(optimizer=optimizer_d, loss=None) return generator_model, discriminator_model, generator, discriminator