コード例 #1
0
ファイル: clean_inwork.py プロジェクト: fgimbert/Hieroglyphs
def hieroRecoModel_offline(input_shape):
    """
    Arguments:
    input_shape -- shape of the images of the dataset

    Returns:
    model -- a Model() instance in Keras
    """
    X_input = Input(input_shape)

    # Zero-Padding
    X = ZeroPadding2D((3, 3))(X_input)

    # First Block
    X = Conv2D(64, (3, 3), strides=(2, 2), name='conv1')(X)
    X = BatchNormalization(axis=1, name='bn1')(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((3, 3), strides=2)(X)

    X = Conv2D(64, (3, 3))(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((3, 3), strides=2)(X)

    X = Flatten()(X)
    X = Dense(128, name='dense_layer')(X)

    # L2 normalization
    X = Lambda(lambda x: K.l2_normalize(x, axis=1))(X)

    features = Model(X_input, X, name="features")

    # Inputs of the siamese network

    anchor = Input(shape=input_shape)
    positive = Input(shape=input_shape)
    negative = Input(shape=input_shape)

    # Embedding Features of input

    anchor_features = features(anchor)
    pos_features = features(positive)
    neg_features = features(negative)

    input_triplet = [anchor, positive, negative]
    output_features = [anchor_features, pos_features, neg_features]

    # Define the trainable model
    loss_model = Model(inputs=input_triplet, outputs=output_features,name='loss')
    loss_model.add_loss(K.mean(triplet_loss(output_features)))
    loss_model.compile(loss=None,optimizer='adam')


    # Create model instance
    #model = Model(inputs=X_input, outputs=X, name='HieroRecoModel_off')

    return features, loss_model
コード例 #2
0
ファイル: regularizers_test.py プロジェクト: 95vidhi/keras
def create_multi_input_model_from(layer1, layer2):
    input_1 = Input(shape=(data_dim,))
    input_2 = Input(shape=(data_dim,))
    out1 = layer1(input_1)
    out2 = layer2(input_2)
    out = Average()([out1, out2])
    model = Model([input_1, input_2], out)
    model.add_loss(K.mean(out2))
    model.add_loss(1)
    model.add_loss(1)
    return model
コード例 #3
0
ファイル: kmn.py プロジェクト: aasensio/DNHazel
class kmn_many_sigma(object):
    def __init__(self, x_train, y_train, center_sampling_method='k_means', n_centers=20, sigmas=None, keep_edges=True, 
                 estimator=None, validation_set=None, batch_size=32):
        self.center_sampling_method = center_sampling_method
        self.n_centers = n_centers
        self.batch_size = batch_size
        if (sigmas is None):
            self.sigmas = np.ones(self.n_centers).astype('float32')
        else:
            self.n_sigma = len(sigmas)
            self.sigmas = np.repeat(sigmas, self.n_centers).astype('float32')
        
        self.keep_edges = keep_edges
        self.center_locs = sample_center_points(y_train, method=self.center_sampling_method, 
                                                k=self.n_centers, keep_edges=self.keep_edges).astype('float32')
        
        self.n_data, self.n_features = x_train.shape
        
        self.train = []
        self.train.append(x_train.reshape(self.n_data, self.n_features).astype('float32'))
        self.train.append(y_train.reshape(self.n_data, 1).astype('float32'))
        
        self.validation_present = False
        if (validation_set != None):
            self.validation_present = True
            x_val = validation_set['x']
            y_val = validation_set['y']
            self.n_data_val, _ = x_val.shape
            self.validation = []
            self.validation.append(x_val.reshape(self.n_data_val, self.n_features).astype('float32'))
            self.validation.append(y_val.reshape(self.n_data_val, 1).astype('float32'))
            
        self.oneDivSqrtTwoPI = 1.0 / np.sqrt(2.0*np.pi) # normalisation factor for gaussian.
            
    def gaussian_distribution(self, y, mu, sigma):
        result = (y - mu) / sigma
        result = - 0.5 * (result * result)
        return (K.exp(result) / sigma) * self.oneDivSqrtTwoPI
    
    def mdn_loss_function(self, args):
        y, weights = args
        result = self.gaussian_distribution(y, self.center_locs, self.sigmas) * weights
        result = K.sum(result, axis=1)
        result = - K.log(result)
        return K.mean(result)
    
    def estimator_many_sigma(self, depth, n_filters, n_center, n_sigma):

# Inputs
        input_x = Input(shape=(self.n_lambda,4), name='stokes_input')
        y_true = Input(shape=(1,), name='y_true')
        mu_input = Input(shape=(1,), name='mu_input')

# Neural network
        x = Conv1D(n_filters, 3, activation='relu', padding='same', kernel_initializer='he_normal', name='conv_1')(input_x)

        for i in range(depth):
            x = residual(x, n_filters*(i+1), 'relu', strides=2)
    
        intermediate = Flatten(name='flat')(x)
        intermediate_conv = concatenate([intermediate, mu_input], name='FC')

# Output weights
        weights = Dense(self.n_centers*self.n_sigma, activation='softmax', name='weights')(intermediate_conv)

# Definition of the loss function
        loss = Lambda(self.mdn_loss_function, output_shape=(1,), name='loss')([y_true, weights])
        
        self.model = Model(inputs=[input_x, y_true], outputs=[loss])
        self.model.add_loss(loss)
    
# Compile with the loss weight set to None, so it will be omitted
        self.model.compile(loss=[None], loss_weights=[None], optimizer=Adam(lr=0.01))
        self.model.summary()

# Now generate a second network that ends up in the weights for later evaluation        
        layer_name = 'weights'
        self.output_weights = Model(inputs=self.model.input,
                                 outputs=self.model.get_layer(layer_name).output)
                       
    def fit(self):
        self.estimator_many_sigma(self.n_centers, self.n_sigma)
        cb = CSVLogger("training.csv")
        self.model.fit(x=self.train, epochs=300, batch_size=3750, callbacks=[cb], 
                       validation_data=(self.validation, None))
        
                
    def predict_density(self, x_test):
        y = np.linspace(-10,10,300)
        weights = self.model.predict(x_test)
        result = self.gaussian_distribution(torch.unsqueeze(y,1), self.center_locs, self.sigma) * weights
        result = torch.sum(result, dim=1)
        return y.data.numpy(), result
    
    def sample_density(self, x_test):
        test = []
        test.append(x_test)
        test.append(x_test)
        
        weights = self.output_weights.predict(test)
        print(weights.shape)
        locs = self.center_locs
        sigma = self.sigmas
        n = len(x_test)
        out = np.zeros(n)
        for i in range(n):
            ind = np.random.choice(self.n_centers * self.n_sigma, p=weights[i,:])
            out[i] = np.random.normal(loc=locs[ind], scale=sigma[ind])
        return out
    
    def plot_loss(self):
        out = pd.read_csv('training.csv').as_matrix()
        f, ax = pl.subplots()
        ax.plot(out[:,1], label='Training set')
        if (self.validation_present):
            ax.plot(out[:,2], label='Validation set')
        ax.set_xlabel('Iteration')
        ax.set_ylabel('Loss')
        ax.legend()
コード例 #4
0
ファイル: wgan_sn_celeba.py プロジェクト: jtlai0921/gan.

# 整合模型(训练判别器)
x_in = Input(shape=(img_dim, img_dim, 3))
z_in = Input(shape=(z_dim, ))
g_model.trainable = False

x_fake = g_model(z_in)
x_real_score = d_model(x_in)
x_fake_score = d_model(x_fake)

d_train_model = Model([x_in, z_in],
                      [x_real_score, x_fake_score])

d_loss = K.mean(x_fake_score - x_real_score)
d_train_model.add_loss(d_loss)
d_train_model.compile(optimizer=Adam(2e-4, 0.5))


# 整合模型(训练生成器)
g_model.trainable = True
d_model.trainable = False
x_fake_score = d_model(g_model(z_in))

g_train_model = Model(z_in, x_fake_score)
g_train_model.add_loss(K.mean(- x_fake_score))
g_train_model.compile(optimizer=Adam(2e-4, 0.5))


# 检查模型结构
d_train_model.summary()
# 训练模型
train_model = Model(
    bert.model.inputs + [subject_labels, subject_ids, object_labels],
    [subject_preds, object_preds])

mask = bert.model.get_layer('Sequence-Mask').output_mask

subject_loss = K.binary_crossentropy(subject_labels, subject_preds)
subject_loss = K.mean(subject_loss, 2)
subject_loss = K.sum(subject_loss * mask) / K.sum(mask)

object_loss = K.binary_crossentropy(object_labels, object_preds)
object_loss = K.sum(K.mean(object_loss, 3), 2)
object_loss = K.sum(object_loss * mask) / K.sum(mask)

train_model.add_loss(subject_loss + object_loss)
train_model.compile(optimizer=Adam(1e-5))


def extract_spoes(text):
    """抽取输入text所包含的三元组
    """
    tokens = tokenizer.tokenize(text, max_length=maxlen)
    token_ids, segment_ids = tokenizer.encode(text, max_length=maxlen)
    # 抽取subject
    subject_preds = subject_model.predict([[token_ids], [segment_ids]])
    start = np.where(subject_preds[0, :, 0] > 0.6)[0]
    end = np.where(subject_preds[0, :, 1] > 0.5)[0]
    subjects = []
    for i in start:
        j = end[end >= i]
コード例 #6
0
ファイル: model.py プロジェクト: Xuanfang1121/casrel-ch
def E2EModel(bert_config_path, bert_checkpoint_path, LR, num_rels):
    bert_model = load_trained_model_from_checkpoint(bert_config_path,
                                                    bert_checkpoint_path,
                                                    seq_len=None)
    for l in bert_model.layers:
        l.trainable = True

    tokens_in = Input(shape=(None, ))
    segments_in = Input(shape=(None, ))
    gold_sub_heads_in = Input(shape=(None, ))
    gold_sub_tails_in = Input(shape=(None, ))
    sub_head_in = Input(shape=(1, ))
    sub_tail_in = Input(shape=(1, ))
    gold_obj_heads_in = Input(shape=(None, num_rels))
    gold_obj_tails_in = Input(shape=(None, num_rels))

    tokens, segments, gold_sub_heads, gold_sub_tails, sub_head, sub_tail, \
    gold_obj_heads, gold_obj_tails = tokens_in, segments_in, gold_sub_heads_in, \
                                     gold_sub_tails_in, sub_head_in, sub_tail_in, \
                                     gold_obj_heads_in, gold_obj_tails_in
    mask = Lambda(
        lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(tokens)

    tokens_feature = bert_model([tokens, segments])
    pred_sub_heads = Dense(1, activation='sigmoid')(tokens_feature)
    pred_sub_tails = Dense(1, activation='sigmoid')(tokens_feature)

    subject_model = Model([tokens_in, segments_in],
                          [pred_sub_heads, pred_sub_tails])

    sub_head_feature = Lambda(seq_gather)([tokens_feature, sub_head])
    sub_tail_feature = Lambda(seq_gather)([tokens_feature, sub_tail])
    sub_feature = Average()([sub_head_feature, sub_tail_feature])

    tokens_feature = Add()([tokens_feature, sub_feature])
    pred_obj_heads = Dense(num_rels, activation='sigmoid')(tokens_feature)
    pred_obj_tails = Dense(num_rels, activation='sigmoid')(tokens_feature)

    object_model = Model([tokens_in, segments_in, sub_head_in, sub_tail_in],
                         [pred_obj_heads, pred_obj_tails])

    hbt_model = Model([
        tokens_in, segments_in, gold_sub_heads_in, gold_sub_tails_in,
        sub_head_in, sub_tail_in, gold_obj_heads_in, gold_obj_tails_in
    ], [pred_sub_heads, pred_sub_tails, pred_obj_heads, pred_obj_tails])

    gold_sub_heads = K.expand_dims(gold_sub_heads, 2)
    gold_sub_tails = K.expand_dims(gold_sub_tails, 2)

    sub_heads_loss = K.binary_crossentropy(gold_sub_heads, pred_sub_heads)
    sub_heads_loss = K.sum(sub_heads_loss * mask) / K.sum(mask)
    sub_tails_loss = K.binary_crossentropy(gold_sub_tails, pred_sub_tails)
    sub_tails_loss = K.sum(sub_tails_loss * mask) / K.sum(mask)

    obj_heads_loss = K.sum(K.binary_crossentropy(gold_obj_heads,
                                                 pred_obj_heads),
                           2,
                           keepdims=True)
    obj_heads_loss = K.sum(obj_heads_loss * mask) / K.sum(mask)
    obj_tails_loss = K.sum(K.binary_crossentropy(gold_obj_tails,
                                                 pred_obj_tails),
                           2,
                           keepdims=True)
    obj_tails_loss = K.sum(obj_tails_loss * mask) / K.sum(mask)

    loss = (sub_heads_loss + sub_tails_loss) + (obj_heads_loss +
                                                obj_tails_loss)

    hbt_model.add_loss(loss)
    hbt_model.compile(optimizer=Adam(LR))
    hbt_model.summary()

    return subject_model, object_model, hbt_model
コード例 #7
0
x = bert_model([x1, x2])
ps1 = Dense(1, use_bias=False)(x)
ps1 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)([ps1, x_mask])
ps2 = Dense(1, use_bias=False)(x)
ps2 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)([ps2, x_mask])

model = Model([x1_in, x2_in], [ps1, ps2])

train_model = Model([x1_in, x2_in, s1_in, s2_in], [ps1, ps2])

loss1 = K.mean(K.categorical_crossentropy(s1_in, ps1, from_logits=True))
ps2 -= (1 - K.cumsum(s1, 1)) * 1e10
loss2 = K.mean(K.categorical_crossentropy(s2_in, ps2, from_logits=True))
loss = loss1 + loss2

train_model.add_loss(loss)
train_model.compile(optimizer=Adam(learning_rate))
train_model.summary()


def softmax(x):
    x = x - np.max(x)
    x = np.exp(x)
    return x / np.sum(x)


def extract_entity(text_in, c_in):
    if c_in not in classes:
        return 'NaN'
    # 文本和事件类型合并成一个字符串进行利用(博客中提到的)
    text_in = u'___%s___%s' % (c_in, text_in)
コード例 #8
0
ファイル: models_worked.py プロジェクト: darylperalta/vaegan
def vaegan_complete_model(original_dim=(64, 64, 3),
                          batch_size=64,
                          latent_dim=128,
                          epochs=50,
                          mse_flag=True,
                          lr=0.0003):
    '''VAEGAN complete model.'''
    # VAE model = encoder + decoder
    # build encoder model
    input_shape = original_dim
    inputs = Input(shape=input_shape, name='encoder_input')

    x = Conv2D(64, (5, 5), strides=(2, 2), padding='same',
               name='enc_conv1')(inputs)
    x = BatchNormalization(name='enc_bn1')(x)
    #x = Activation('relu')(x)
    x = LeakyReLU(alpha=0.2, name='enc_LReLU1')(x)

    x = Conv2D(128, (5, 5), strides=(2, 2), padding='same',
               name='enc_conv2')(x)
    x = BatchNormalization(name='enc_bn2')(x)
    #x = Activation('relu')(x)
    x = LeakyReLU(alpha=0.2, name='enc_LReLU2')(x)

    x = Conv2D(256, (5, 5), strides=(2, 2), padding='same',
               name='enc_conv3')(x)
    x = BatchNormalization(name='enc_bn3')(x)
    #x = Activation('relu')(x)
    x = LeakyReLU(alpha=0.2, name='enc_LReLU3')(x)

    x = Flatten()(x)
    #x = Dense(2048, name = 'enc_dense1')(x)
    #x = BatchNormalization(name = 'enc_bn4')(x)
    #x = Activation('relu', name='z_mean')(x)
    #x = LeakyReLU(alpha = 0.2, name = 'enc_dense2')(x)

    x_mean = Dense(latent_dim, name='x_mean')(x)
    x_mean = BatchNormalization()(x_mean)
    z_mean = LeakyReLU(alpha=0.2, name='z_mean')(x_mean)

    x_log_var = Dense(latent_dim, name='x_log_var')(x)
    x_log_var = BatchNormalization()(x_log_var)
    z_log_var = LeakyReLU(alpha=0.2, name='z_log_var')(x_log_var)

    # use reparameterization trick to push the sampling out as input
    # note that "output_shape" isn't necessary with the TensorFlow backend
    z = Lambda(sampling, output_shape=(latent_dim, ),
               name='z')([z_mean, z_log_var])
    #encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
    encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
    print('encoder')
    encoder.summary()
    #plot_model(encoder, to_file='vaegan_encoder_complete.png', show_shapes=True)

    # build decoder model
    latent_inputs = Input(shape=(latent_dim, ), name='z_sampling')
    x = Dense(8 * 8 * 256)(latent_inputs)
    x = BatchNormalization()(x)
    #x = Activation('relu')(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Reshape((8, 8, 256))(x)

    x = Conv2DTranspose(256, (5, 5), strides=(2, 2), padding='same')(x)
    x = BatchNormalization()(x)
    #x = Activation('relu')(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv2DTranspose(128, (5, 5), strides=(2, 2), padding='same')(x)
    x = BatchNormalization()(x)
    #x = Activation('relu')(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv2DTranspose(32, (5, 5), strides=(2, 2), padding='same')(x)
    x = BatchNormalization()(x)
    #x = Activation('relu')(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv2DTranspose(3, (5, 5), strides=(1, 1), padding='same')(x)
    outputs = Activation('tanh')(x)

    # instantiate decoder model
    decoder = Model(latent_inputs, outputs, name='decoder')
    print('decoder')
    decoder.summary()
    #plot_model(decoder, to_file='vaegan_decoder_complete.png', show_shapes=True)

    #instantiate discriminator
    x_recon = Input(shape=input_shape)
    #x = Conv2D(32,(5,5), strides =(2,2),padding='same')(x_recon)
    x = Conv2D(32, (5, 5), strides=(1, 1), padding='same')(x_recon)
    #x = BatchNormalization()(x)
    #x = Activation('relu')(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv2D(128, (5, 5), strides=(2, 2), padding='same')(x)
    x = BatchNormalization()(x)
    #x = Activation('relu')(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv2D(256, (5, 5), strides=(2, 2), padding='same')(x)
    x = BatchNormalization()(x)
    #x = Activation('relu')(x)
    x = LeakyReLU(alpha=0.2)(x)

    l_layer = Conv2D(256, (5, 5), strides=(2, 2), padding='same')(x)

    l_layer_shape = (8, 8, 256)

    input_disc2 = Input(shape=l_layer_shape)

    x = BatchNormalization()(input_disc2)
    #x = BatchNormalization()(l_layer)

    #x = Activation('relu')(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Flatten()(x)

    x = Dense(512)(x)
    x = BatchNormalization()(x)
    #x = Activation('relu')(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Dense(1)(x)
    output_dis = Activation('sigmoid')(x)
    #discriminator_2 = Model(input_disc2, output_dis, name='discriminator_1')
    '''construct discriminator with l_layer output'''
    discriminator_l = Model(x_recon, l_layer, name='discriminator_l')
    print('discriminator_l')
    discriminator_l.summary()
    ''' construct discriminator second part'''
    discriminator_2 = Model(input_disc2, output_dis, name='discriminator_2')
    print('discriminator_2')
    discriminator_2.summary()
    ''' construct discriminator (discriminator trainable) '''

    discriminator = Model(x_recon,
                          discriminator_2(discriminator_l(x_recon)),
                          name='discriminator')
    print('discriminator')
    #optimizer = RMSprop(lr=lr)
    discriminator.compile(loss='binary_crossentropy',
                          optimizer=RMSprop(lr=lr),
                          metrics=['accuracy'])
    print('discriminator')
    discriminator.summary()
    '''construct model 1 (encoder trainable) '''

    encoder.trainable = True
    decoder.trainable = False
    discriminator_l.trainable = False
    discriminator_2.trainable = False
    print('encoder_model_try')

    disc_xtilde = discriminator_l(decoder(encoder(inputs)[2]))
    disc_x = discriminator_l(inputs)
    out_recon = decoder(encoder(inputs)[2])
    model1_enc = Model(inputs,
                       [discriminator_2(disc_x),
                        discriminator_2(disc_xtilde)],
                       name='model_encoder1')
    model1_enc.summary()
    plot_model(model1_enc, to_file='model1_enc.png', show_shapes=True)
    '''
        model1_enc = Model(inputs, discriminator_l(decoder(encoder(inputs)[2])), name='model1_encoder')
        print('model1 encoder trainable')
        plot_model(model1_enc, to_file='model1_enc.png', show_shapes=True)
        '''
    '''Define losses for encoder parameter update'''

    reconstruction_loss = nll_loss(disc_x, disc_xtilde)
    #reconstruction_loss *= original_dim[0]*original_dim[1]*original_dim[2]
    #recon_mse = mse(inputs,out_recon)
    #recon_mse *= original_dim[0]*original_dim[1]*original_dim[2]

    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5
    #vae_loss = K.mean(reconstruction_loss + kl_loss+recon_mse)
    vae_loss = K.mean(reconstruction_loss + kl_loss)
    model1_enc.add_loss(vae_loss)
    model1_enc.compile(optimizer=RMSprop(lr=lr * (0.5)))
    #model1_enc.compile(optimizer=RMSprop(lr=0.003*0.001))

    #model1_enc.summary()
    ''' construct model 2 (decoder trainable) '''

    encoder.trainable = False
    decoder.trainable = True
    discriminator_l.trainable = False
    discriminator_2.trainable = False

    zp = Input(shape=(latent_dim, ), name='zp')
    out_zp = discriminator_2(discriminator_l(decoder(zp)))

    model2_dec = Model(
        [inputs, zp],
        [discriminator_2(disc_x),
         discriminator_2(disc_xtilde), out_zp],
        name='model2_encoder')
    print('model2 decoder trainable')
    model2_dec.summary()
    plot_model(model2_dec, to_file='model2_dec.png', show_shapes=True)

    #reconstruction_loss = nll_loss(disc_x,disc_xtilde)
    #reconstruction_loss *= original_dim[0]*original_dim[1]*original_dim[2]
    gamma = 1e-6

    #vae_loss = K.mean(reconstruction_loss + kl_loss)

    #gan_real_loss = binary_crossentropy(K.ones_like(discriminator_2(disc_x)),discriminator_2(disc_x))
    gan_fake_loss1 = binary_crossentropy(
        K.ones_like(discriminator_2(disc_xtilde)),
        discriminator_2(disc_xtilde))
    gan_fake_loss2 = binary_crossentropy(K.ones_like(out_zp), out_zp)
    #gan_fake_loss1 = binary_crossentropy(K.zeros_like(discriminator_2(disc_xtilde)),discriminator_2(disc_xtilde))
    #gan_fake_loss2 = binary_crossentropy(K.zeros_like(out_zp),out_zp)
    gan_fake_loss = K.mean(gan_fake_loss1 + gan_fake_loss2)
    #dec_loss = K.mean(gamma*reconstruction_loss - gan_fake_loss)
    #dec_loss = gamma*reconstruction_loss - gan_fake_loss
    dec_loss = gamma * reconstruction_loss + gan_fake_loss

    model2_dec.add_loss(dec_loss)
    model2_dec.compile(optimizer=RMSprop(lr=lr))

    #optimizer = RMSprop(lr=lr)
    #discriminator.compile(loss='binary_crossentropy',
    #                      optimizer=optimizer,
    #                      metrics=['accuracy'])
    #print('discriminator')

    return encoder, decoder, discriminator, model1_enc, model2_dec
コード例 #9
0
def wgangp_conditional(h=128,
                       w=128,
                       c=3,
                       latent_dim=2,
                       condition_dim=10,
                       epsilon_std=1.0,
                       dropout_rate=0.1,
                       GRADIENT_PENALTY_WEIGHT=10):

    optimizer_g = AdamWithWeightnorm(lr=0.0001, beta_1=0.5)
    optimizer_d = AdamWithWeightnorm(lr=0.0001, beta_1=0.5)
    optimizer_c = AdamWithWeightnorm(lr=0.0001, beta_1=0.5)

    t_h, t_w = h // 16, w // 16
    generator = residual_decoder(t_h,
                                 t_w,
                                 c=c,
                                 latent_dim=latent_dim + condition_dim,
                                 dropout_rate=dropout_rate)

    discriminator = residual_discriminator(h=h,
                                           w=w,
                                           c=c,
                                           dropout_rate=dropout_rate,
                                           return_hidden=True)
    classifier = residual_discriminator(h=h,
                                        w=w,
                                        c=c,
                                        dropout_rate=dropout_rate,
                                        as_classifier=condition_dim,
                                        return_hidden=True)
    for layer in discriminator.layers:
        layer.trainable = False
    discriminator.trainable = False
    for layer in classifier.layers:
        layer.trainable = False
    classifier.trainable = False

    generator_input = Input(shape=(latent_dim + condition_dim, ))
    generator_layers = generator(generator_input)

    discriminator_layers_for_generator = discriminator(generator_layers)[0]
    classifier_layers_for_generator = classifier(generator_layers)[0]

    generator_model = Model(inputs=[generator_input],
                            outputs=[
                                discriminator_layers_for_generator,
                                classifier_layers_for_generator
                            ])
    generator_model.add_loss(K.mean(discriminator_layers_for_generator))
    generator_model.compile(optimizer=optimizer_g,
                            loss=[None, 'categorical_crossentropy'])

    # Now that the generator_model is compiled, we can make the discriminator layers trainable.
    for layer in discriminator.layers:
        layer.trainable = True
    for layer in generator.layers:
        layer.trainable = False
    discriminator.trainable = True
    generator.trainable = False

    # The discriminator_model is more complex. It takes both real image samples and random noise seeds as input.
    # The noise seed is run through the generator model to get generated images. Both real and generated images
    # are then run through the discriminator. Although we could concatenate the real and generated images into a
    # single tensor, we don't (see model compilation for why).
    real_samples = Input(shape=(h, w, c))
    generator_input_for_discriminator = Input(shape=(latent_dim +
                                                     condition_dim, ))
    generated_samples_for_discriminator = generator(
        generator_input_for_discriminator)
    discriminator_output_from_generator = discriminator(
        generated_samples_for_discriminator)[0]

    discriminator_output_from_real_samples, d0, d1, d2 = discriminator(
        real_samples)
    classifier_output_from_real_samples, c0, c1, c2 = classifier(real_samples)

    ds = K.concatenate([K.flatten(d0), K.flatten(d1), K.flatten(d2)], axis=-1)
    cs = K.concatenate([K.flatten(c0), K.flatten(c1), K.flatten(c2)], axis=-1)

    c_loss = .1 * K.mean(K.square(ds - cs))

    averaged_samples = RandomWeightedAverage()(
        [real_samples, generated_samples_for_discriminator])
    averaged_samples_out = discriminator(averaged_samples)[0]

    discriminator_model = Model(
        [real_samples, generator_input_for_discriminator], [
            discriminator_output_from_real_samples,
            discriminator_output_from_generator, averaged_samples_out
        ])
    discriminator_model.add_loss(
        K.mean(discriminator_output_from_real_samples) -
        K.mean(discriminator_output_from_generator) + gradient_penalty_loss(
            averaged_samples_out, averaged_samples, GRADIENT_PENALTY_WEIGHT))
    discriminator_model.add_loss(c_loss, inputs=[discriminator])
    discriminator_model.compile(optimizer=optimizer_d, loss=None)

    for layer in classifier.layers:
        layer.trainable = True
    classifier.trainable = True

    classifier_model = Model([real_samples],
                             [classifier_output_from_real_samples])
    classifier_model.add_loss(c_loss, inputs=[classifier])
    classifier_model.compile(optimizer=optimizer_c,
                             loss='categorical_crossentropy')

    return generator_model, discriminator_model, classifier_model, generator, discriminator, classifier
コード例 #10
0
ファイル: model.py プロジェクト: bacdavid/RobuGAN
class AutoEncoder:
    """ Auto Encoder class.
    """
    def __init__(self, input_shape, latent_dim, learning_rate=0.0005):
        self.input_shape = input_shape  # (w,h,c)
        self.latent_dim = latent_dim  # n

        # Auto Encoder
        self.encoder = self._build_encoder()
        self.encoder_input = Input(shape=self.input_shape)
        self.encoder_mean_output, self.encoder_logvar_output = self.encoder(
            self.encoder_input)
        self.decoder = self._build_decoder()
        self.decoder_input = Input(shape=(self.latent_dim, ))
        self.decoder_output = self.decoder(self.encoder_mean_output)

        # Generator
        self.latent_output = Lambda(sampling)(
            [self.encoder_mean_output, self.encoder_logvar_output])
        self.gen_output = self.decoder(self.latent_output)

        # Critic
        self.critic = self._build_critic()

        # Disable the generator
        self.critic.trainable = True
        self.encoder.trainable = False
        self.decoder.trainable = False

        # Critic trainer
        self.h1_real, self.h2_real, self.h3_real, self.critic_output_real = self.critic(
            self.encoder_input)
        self.h1_fake, self.h2_fake, self.h3_fake, self.critic_output_fake = self.critic(
            self.gen_output)
        self.critic_trainer = Model(
            self.encoder_input,
            [self.critic_output_real, self.critic_output_fake])
        critic_loss = self._critic_loss()
        self.critic_trainer.add_loss(K.mean(critic_loss))
        self.critic_trainer.compile(optimizer=RMSprop(lr=learning_rate))
        self.critic_trainer.summary()

        # Disable the critic and re-enable the generator
        self.critic.trainable = False
        self.encoder.trainable = True
        self.decoder.trainable = True

        # Generator trainer
        self.gen_trainer = Model(
            self.encoder_input,
            [self.critic_output_real, self.critic_output_fake])
        gen_loss = self._gen_loss()
        self.gen_trainer.add_loss(K.mean(gen_loss))
        self.gen_trainer.compile(optimizer=RMSprop(lr=learning_rate))
        self.gen_trainer.summary()

        # Reconstruction prediction
        self.rec_sample = K.function([self.encoder_input],
                                     [self.decoder_output])

        # Generate prediction
        self.gen_sample = K.function([self.decoder_input],
                                     [self.decoder(self.decoder_input)])

        # Compute discriminator score (by means of the distance)
        self.compute_score = K.function([self.encoder_input], [gen_loss])

    def _build_encoder(self):
        # Input
        encoder_input = Input(shape=self.input_shape)

        # Encoder
        h = Conv2D(64, 5, strides=2, padding='same')(encoder_input)
        h = Activation('relu')(h)
        h = Conv2D(128, 5, strides=2, padding='same')(h)
        h = BatchNormalization(momentum=0.8)(h)
        h = Activation('relu')(h)
        h = Conv2D(256, 5, strides=2, padding='same')(h)
        h = BatchNormalization(momentum=0.8)(h)
        h = Activation('relu')(h)
        h = Flatten()(h)
        encoder_mean_output = Dense(self.latent_dim)(h)
        encoder_logvar_output = Dense(self.latent_dim)(h)

        # Model
        return Model(encoder_input,
                     [encoder_mean_output, encoder_logvar_output])

    def _build_decoder(self):
        # Input
        decoder_input = Input(shape=(self.latent_dim, ))

        # Decoder
        h = Dense(self.input_shape[0] * self.input_shape[1] // 2**6 * 256,
                  activation='relu')(decoder_input)
        h = Reshape(
            (self.input_shape[0] // 2**3, self.input_shape[1] // 2**3, 256))(h)
        h = Conv2DTranspose(256, 5, strides=2, padding='same')(h)
        h = BatchNormalization()(h)
        h = Activation('relu')(h)
        h = Conv2DTranspose(128, 5, strides=2, padding='same')(h)
        h = BatchNormalization()(h)
        h = Activation('relu')(h)
        h = Conv2DTranspose(64, 5, strides=2, padding='same')(h)
        h = Activation('relu')(h)
        decoder_output = Conv2D(self.input_shape[2], 5,
                                padding='same')(h)  # linear activation

        # Model
        return Model(decoder_input, decoder_output)

    def _build_critic(self):
        # Input
        critic_input = Input(shape=self.input_shape)

        # Critic
        h = Conv2D(64, 5, strides=2, padding='same')(critic_input)
        h1 = LeakyReLU(alpha=0.2)(h)
        h = Conv2D(128, 5, strides=2, padding='same')(h1)
        h = BatchNormalization()(h)
        h2 = LeakyReLU(alpha=0.2)(h)
        h = Conv2D(256, 5, strides=2, padding='same')(h2)
        h = BatchNormalization()(h)
        h3 = LeakyReLU(alpha=0.2)(h)
        h = Flatten()(h3)
        critic_output = Dense(1)(h)

        # Model
        return Model(critic_input, [h1, h2, h3, critic_output])

    def _critic_loss(self):
        true_loss = K.mean(K.square(self.critic_output_real - 1.), axis=-1)
        false_loss = K.mean(K.square(self.critic_output_fake), axis=-1)
        return true_loss + false_loss

    def _gen_loss(self):
        kl_loss = -0.5 * K.sum(1 + self.encoder_logvar_output - K.square(
            self.encoder_mean_output) - K.exp(self.encoder_logvar_output),
                               axis=-1)
        gen_loss = K.mean(K.square(self.critic_output_real -
                                   self.critic_output_fake),
                          axis=-1)
        rec_loss = K.mean(K.abs(self.h1_real - self.h1_fake), axis=[1, 2, 3]) \
                   + K.mean(K.abs(self.h2_real - self.h2_fake), axis=[1, 2, 3]) \
                   + K.mean(K.abs(self.h3_real - self.h3_fake), axis=[1, 2, 3])
        return 0.01 * kl_loss + gen_loss + rec_loss

    def _reconstruct_samples(self, data_gen, vis_id=0):
        x, _ = data_gen.next()
        x_gen = (self.rec_sample([x])[0] *
                 255.).astype('int') if x.shape[-1] > 1 else self.rec_sample(
                     [x])[0]

        f = plt.figure()
        plt.clf()
        for i in range(min(x.shape[0], 25)):
            plt.subplot(5, 5, i + 1)
            plt.imshow(x[i]) if x.shape[-1] > 1 else plt.imshow(
                np.squeeze(x[i]), cmap='gray')
            plt.axis('off')
        f.canvas.draw()
        plt.savefig('real_samples_e%i.eps' % vis_id)
        plt.close()

        f = plt.figure()
        plt.clf()
        for i in range(min(x.shape[0], 25)):
            plt.subplot(5, 5, i + 1)
            plt.imshow(x_gen[i]) if x.shape[-1] > 1 else plt.imshow(
                np.squeeze(x_gen[i]), cmap='gray')
            plt.axis('off')
        f.canvas.draw()
        plt.savefig('fake_samples_e%i.eps' % vis_id)
        plt.close()

    def _generate_samples(self, vis_id=0):
        n = np.random.randn(25, self.latent_dim)
        #n = np.ones(shape = (25, self.latent_dim)) * 0.5
        #n[..., 8] = np.linspace(-10, 10, 25) # change
        x_gen = self.gen_sample([n])[0]

        f = plt.figure()
        plt.clf()
        for i in range(min(n.shape[0], 25)):
            plt.subplot(5, 5, i + 1)
            plt.imshow(x_gen[i]) if x_gen.shape[-1] > 1 else plt.imshow(
                np.squeeze(x_gen[i]), cmap='gray')
            plt.axis('off')
        f.canvas.draw()
        plt.savefig('generated_samples_e%i.eps' % vis_id)
        plt.close()

    def train(self, train_dir, val_dir, epochs=10, batch_size=64):
        # Generators
        color_mode = 'rgb' if self.input_shape[-1] > 1 else 'grayscale'
        datagen = ImageDataGenerator(rescale=1. / 255, fill_mode='constant')
        train_gen = datagen.flow_from_directory(
            train_dir,
            target_size=self.input_shape[:2],
            interpolation='bilinear',
            color_mode=color_mode,
            class_mode='categorical',
            batch_size=batch_size)
        val_gen = datagen.flow_from_directory(val_dir,
                                              target_size=self.input_shape[:2],
                                              interpolation='bilinear',
                                              color_mode=color_mode,
                                              class_mode='categorical',
                                              batch_size=batch_size)

        steps_per_epoch = (np.ceil(train_gen.n / batch_size)).astype('int')
        for i in range(epochs):
            print('Epoch %i/%i' % (i + 1, epochs))
            pbar = Progbar(steps_per_epoch)
            self._reconstruct_samples(val_gen, i)
            for j in range(steps_per_epoch):
                x, _ = train_gen.next()
                critic_loss = self.critic_trainer.train_on_batch(x=x, y=None)
                gen_loss = self.gen_trainer.train_on_batch(x=x, y=None)
                pbar.update(j + 1, [('critic loss', critic_loss),
                                    ('generator loss', gen_loss)])

        # Save weights
        self.encoder.save_weights('./encoder.h5')
        self.decoder.save_weights('./decoder.h5')
        self.critic.save_weights('./critic.h5')

    def restore_weights(self):
        self.encoder.load_weights('./encoder.h5')
        self.decoder.load_weights('./decoder.h5')
        self.critic.load_weights('./critic.h5')

    def reconstruct_samples(self, dir, vis_id=0):
        color_mode = 'rgb' if self.input_shape[-1] > 1 else 'grayscale'
        datagen = ImageDataGenerator(rescale=1. / 255, fill_mode='constant')
        gen = datagen.flow_from_directory(dir,
                                          target_size=self.input_shape[:2],
                                          interpolation='bilinear',
                                          color_mode=color_mode,
                                          class_mode='categorical',
                                          batch_size=25)
        self._reconstruct_samples(gen, vis_id)

    def generate_samples(self, vis_id=0):
        self._generate_samples(vis_id)

    def compute_distance(self, dir, vis_id=0):
        color_mode = 'rgb' if self.input_shape[-1] > 1 else 'grayscale'
        datagen = ImageDataGenerator(rescale=1. / 255, fill_mode='constant')
        gen = datagen.flow_from_directory(dir,
                                          target_size=self.input_shape[:2],
                                          interpolation='bilinear',
                                          color_mode=color_mode,
                                          class_mode='categorical',
                                          batch_size=25)

        x, _ = gen.next()
        dist = self.compute_score([x])[0]

        f = plt.figure()
        plt.clf()
        for i in range(min(x.shape[0], 25)):
            plt.subplot(5, 5, i + 1)
            plt.imshow(x[i]) if x.shape[-1] > 1 else plt.imshow(
                np.squeeze(x[i]), cmap='gray')
            plt.title('d_%.3f' % dist[i])
            plt.axis('off')
        f.canvas.draw()
        plt.savefig('distance_samples_e%i.eps' % vis_id)
        plt.close()
コード例 #11
0
class CustomVae(VanillaVae):
    '''
	An altered version of VanillaVae that allows us to investigate:

	- whether we have posterior collapse
	- try using a different reconstruction loss metric
	- try using different values to weight the recon loss vs KL div, if time	
	'''
    def __init__(self,
                 input_dim,
                 intermediate_dim,
                 latent_dim,
                 fn,
                 recon_type='mse',
                 beta=1.0):

        self.recon_loss = None
        self.kl_loss = None
        self.total_loss = None

        self.recon_type = recon_type  # nothing is done with this yet
        self.beta = beta

        self.z_mean = None
        self.z_log_var = None

        self.fn = fn

        # Encoder
        inputs = Input(shape=(input_dim, ))
        h = Dense(intermediate_dim, activation='relu')(inputs)
        z_mean = Dense(latent_dim)(h)
        z_log_var = Dense(latent_dim)(h)

        # Not sure if this works
        self.z_mean = z_mean
        self.z_log_var = z_log_var

        # Latent space
        args = [z_mean, z_log_var]
        z = Lambda(self.sampling, output_shape=(latent_dim, ))(args)

        self.encoder = Model(inputs, z_mean)

        # Decoder
        decoder_inputs = Input(shape=(latent_dim, ))
        decoder_h = Dense(intermediate_dim, activation='relu')(decoder_inputs)
        outputs = Dense(input_dim, activation='sigmoid')(decoder_h)

        self.decoder = Model(decoder_inputs, outputs)

        # end-to-end vae
        vae_outputs = self.decoder(z)
        self.vae = Model(inputs, vae_outputs)

        # Setup and compile
        self.vae.add_loss(
            self.vae_loss(inputs, vae_outputs, input_dim, z_mean, z_log_var))
        self.vae.compile(
            optimizer='adam',
            metrics=[
            ]  # this doesn't actually work; metrics get ignored when using add_loss, see keras issue 9459
        )

        self.vae.metrics_tensors.append(
            CustomVae.calc_mse_alone(inputs, vae_outputs, input_dim))
        self.vae.metrics_names.append("mse")

        self.vae.metrics_tensors.append(self.calc_kl_alone(beta=self.beta))
        self.vae.metrics_names.append("kl")

    def calc_kl_alone(self, beta=1.0):
        kl_loss = 1 + self.z_log_var - K.square(self.z_mean) - K.exp(
            self.z_log_var)
        kl_loss = -0.5 * K.sum(kl_loss, axis=-1)
        kl_loss = K.mean(beta * kl_loss)

        return kl_loss

    @staticmethod
    def calc_mse_alone(input_x, output_x, original_dim):
        # original_dim = 128 * 128
        return K.mean(mse(input_x, output_x) * original_dim)

    def vae_loss(self, inputs, outputs, original_dim, z_mean, z_log_var):
        """ VAE loss = mse_loss (reconstruction) + kl_loss

		Note - it may not make sense to use cross-ent loss here if the input
		images are not binarized!!

		beta is a weight that we put on the kl_loss component. Defaults to 1.

		TODO: add xent to this
		"""
        self.recon_loss = mse(inputs, outputs) * original_dim

        kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
        self.kl_loss = -0.5 * K.sum(kl_loss, axis=-1)

        self.total_loss = K.mean(self.recon_loss + self.beta * self.kl_loss)
        return self.total_loss

    def fit(self, val_split, epochs, batch_size, save_dir=None, fn=None):
        """ Train the model and save the weights if a `save_dir` is set.
		"""
        if save_dir:
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)

        temp_fn = "incomplete_" + fn

        # Custom callback to keep track of KL and Recon loss split
        # during training
        split_recon_kl = SplitReconKL(fn=self.fn)

        # Setup checkpoint to save best model
        callbacks = [
            ModelCheckpoint(save_dir + temp_fn,
                            monitor='val_loss',
                            verbose=1,
                            save_best_only=True), split_recon_kl
        ] if save_dir else []

        start = time.time()

        history = self.vae.fit(self.x_train,
                               epochs=epochs,
                               batch_size=batch_size,
                               validation_split=val_split,
                               shuffle=True,
                               callbacks=callbacks,
                               verbose=1)

        print("Total train time: {0:.2f} sec".format(time.time() - start))

        print('Recon losses saved by callback:')
        print(split_recon_kl.recon_losses)

        print('Total losses saved by callback:')
        print(split_recon_kl.total_losses)

        if save_dir:
            # Rename to proper filename after all epochs successfully run
            os.rename(save_dir + temp_fn, save_dir + fn)
            self.vae.save_weights(save_dir + fn)
            print("Saved final weights to {}".format(save_dir + fn))
        return history
コード例 #12
0
ファイル: run_UER.py プロジェクト: yuconan/rank6
def build_model(cfg, summary=False, word_embedding_matrix=None):
    def _get_model(base_dir, cfg_=None):
        if "albert" in cfg["verbose"].lower():
            from bert4keras.bert import build_bert_model
            config_file = os.path.join(base_dir, 'albert_config.json')
            checkpoint_file = os.path.join(base_dir, 'model.ckpt-best')
            model = build_bert_model(config_path=config_file,
                                     checkpoint_path=checkpoint_file,
                                     model='albert',
                                     return_keras_model=True)
            if cfg_["cls_num"] > 1:
                output = Concatenate(axis=-1)([
                    model.get_layer(
                        "Encoder-1-FeedForward-Norm").get_output_at(-i)
                    for i in range(1, cfg["cls_num"] + 1)
                ])
                model = Model(model.inputs[:2], outputs=output)
            model.trainable = cfg_["bert_trainable"]
        else:
            config_file = os.path.join(base_dir, 'bert_config.json')
            checkpoint_file = os.path.join(base_dir, 'bert_model.ckpt')
            if not os.path.exists(config_file):
                config_file = os.path.join(base_dir, 'bert_config_large.json')
                checkpoint_file = os.path.join(base_dir,
                                               'roberta_l24_large_model')
            model = load_trained_model_from_checkpoint(
                config_file,
                checkpoint_file,
                training=False,
                trainable=cfg_["bert_trainable"],
                output_layer_num=cfg_["cls_num"],
                seq_len=cfg_['maxlen'])

            # model = Model(inputs=model.inputs[: 2], outputs=model.layers[-7].output)

        return model

    def _get_opt(num_example, warmup_proportion=0.1, lr=2e-5, min_lr=None):
        total_steps, warmup_steps = calc_train_steps(
            num_example=num_example,
            batch_size=B_SIZE,
            epochs=MAX_EPOCH,
            warmup_proportion=warmup_proportion,
        )
        opt = AdamWarmup(total_steps, warmup_steps, lr=lr, min_lr=min_lr)
        if cfg.get("accum_step", None) and cfg["accum_step"] > 1:
            print("[!] using accum_step = {}".format(cfg["accum_step"]))
            from accum_optimizer import AccumOptimizer
            opt = AccumOptimizer(opt, steps_per_update=cfg["accum_step"])

        return opt

    bert_model = _get_model(cfg["base_dir"], cfg)

    if word_embedding_matrix is not None:
        embed = Embedding(input_dim=word_embedding_matrix.shape[0],
                          output_dim=word_embedding_matrix.shape[1],
                          weights=[word_embedding_matrix],
                          trainable=cfg["trainable"],
                          name="char_embed")

    t1_in = Input(shape=(None, ))
    t2_in = Input(shape=(None, ))
    o1_in = Input(shape=(1, ))
    o2_in = Input(shape=(1, ))

    t1, t2, o1, o2 = t1_in, t2_in, o1_in, o2_in

    t = bert_model([t1, t2])
    mask = Lambda(lambda x: K.cast(K.not_equal(x, cfg["x_pad"]), 'float32'))(
        t1)
    ## Char information
    if word_embedding_matrix is not None:
        word_embed = embed(t1)
        if cfg.get("use_embed_v2", False):
            _t2 = Lambda(lambda x: K.expand_dims(x, axis=-1))(t2)
            word_embed = Concatenate(axis=-1)([word_embed, _t2])
        word_embed = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))(
            [word_embed, mask])
        word_embed = Bidirectional(LSTM(cfg["unit1"], return_sequences=True),
                                   merge_mode="sum")(word_embed)
        word_embed = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))(
            [word_embed, mask])
        t = Concatenate(axis=-1)([t, word_embed])

    t = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))([t, mask])
    t = Bidirectional(LSTM(cfg["unit3"], return_sequences=True),
                      merge_mode="concat")(t)
    # t = Lambda(lambda x: x[0] * K.expand_dims(x[1], axis=-1))([t, mask])
    # t = Conv1D(cfg["conv_num"], kernel_size=3, padding="same")(t)
    t = Lambda(lambda x: x[:, 0, :], name="extract_layer")(t)
    if cfg.get("num_class", 1) == 2:
        po1_logit = Dense(1, name="po1_logit")(t)
        po1 = Activation('sigmoid', name="po1")(po1_logit)
        train_model = Model(inputs=[t1_in, t2_in, o1_in], outputs=[po1])
        o1_loss = K.binary_crossentropy(o1, po1)
        loss = K.mean(o1_loss)
    else:
        po1_logit = Dense(cfg["num_class"], name="po1_logit")(t)
        po1 = Activation('softmax', name="po1")(po1_logit)
        train_model = Model(inputs=[t1_in, t2_in, o1_in], outputs=[po1])
        loss = K.categorical_crossentropy(o1, po1, axis=-1)
        loss = K.mean(loss)

    train_model.add_loss(loss)
    opt = _get_opt(num_example=cfg["num_example"],
                   lr=cfg["lr"],
                   min_lr=cfg['min_lr'])
    train_model.compile(optimizer=opt)
    if summary:
        train_model.summary()
    return train_model
コード例 #13
0
    def _build_model(self):
        rpn_trainable = self.config.training_mode in ['rpn_only', 'all']
        head_trainable = self.config.training_mode in ['head_only', 'all']

        # backbone network
        backbone_in, backbone_out = self._model_backbone_headless()

        # rpn
        normalized_rois, rpn_offsets, objects, objects_logit \
            = self._nn_rpn(backbone_out, rpn_trainable)

        # 学習時のみ損失を計算
        if self.config.training:
            # 学習時
            # 入力
            input_gt_rois = Input(shape=[None, 4],
                                  name="input_gt_rois",
                                  dtype='float32')
            input_gt_objects = Input(shape=[None],
                                     name="input_gt_objects",
                                     dtype='int32')
            inputs = [backbone_in, input_gt_rois, input_gt_objects]

            losses = []
            if rpn_trainable:
                # 損失計算
                # RPNの損失
                rpn_offsets_loss = Lambda(lambda x: loss.rpn_offsets_loss(*x),
                                          name="rpn_offsets_loss")([
                                              input_gt_rois, input_gt_objects,
                                              rpn_offsets
                                          ])
                rpn_objects_loss = Lambda(
                    lambda x: loss.rpn_objects_loss(*x),
                    name="rpn_objects_loss")([input_gt_objects, objects])

                losses += [rpn_offsets_loss, rpn_objects_loss]

            if head_trainable:
                input_gt_boxes = Input(shape=[None, 4],
                                       name="input_gt_boxes",
                                       dtype='float32')
                input_gt_label_ids = Input(shape=[None],
                                           name="input_gt_label_ids",
                                           dtype='int32')
                inputs += [input_gt_boxes, input_gt_label_ids]

                # 正解データとRoIから評価対象のRoIを絞り込み、それに対応する正解データを得る。
                normalized_sample_rois, normalized_sample_gt_offsets, \
                    sample_gt_labels \
                    = SubsamplingRoiLayer(self.config,
                                          name='subsampling_roi_and_gt')(
                        [normalized_rois, input_gt_boxes, input_gt_label_ids])
                # 以下のようにoutput_shapeを直接指定するとIndexErrorが発生したので、
                # ↑のようにカスタムレイヤー化する
                # batch_size = K.shape(normalized_rois)[0]
                # sample_rois, sample_gt_offsets, sample_labels = \
                #     Lambda(lambda x: self._subsampling_roi_and_gt(*x),
                #            output_shape=[(batch_size, None, 4),
                #                          (batch_size, None, 4),
                #                          (batch_size, None)],
                #            name="subsampling_roi_and_gt")(
                #         [normalized_rois, input_gt_boxes,
                #         input_gt_label_ids])

                # head
                head_offsets, labels, labels_logit\
                    = self._nn_head(backbone_out, normalized_sample_rois)

                # 損失計算
                # ヘッドの損失はModel#compileで損失関数を指定する方法では対応出来ないため、
                # Layerとして定義してModel#add_lossで加算する。
                head_offsets_loss = Lambda(
                    lambda x: loss.head_offsets_loss(*x),
                    name="head_offsets_loss")([
                        normalized_sample_gt_offsets, sample_gt_labels,
                        head_offsets
                    ])
                head_labels_loss = Lambda(
                    lambda x: loss.head_labels_loss(*x),
                    name="head_labels_loss")([sample_gt_labels, labels])

                # 損失
                losses += [head_offsets_loss, head_labels_loss]

            # 出力=損失
            outputs = losses

        else:
            # 予測時
            # head
            # head_offsetsは0〜1で正規化された値
            head_offsets, labels, _ = self._nn_head(backbone_out,
                                                    normalized_rois)

            # 予測時は損失不要
            # ダミーの損失関数
            dummy_loss = Lambda(lambda x: K.constant(0),
                                name="dummy_loss")([backbone_in])
            losses = [dummy_loss, dummy_loss, dummy_loss]
            inputs = [backbone_in]
            # normalized_roisの正規化を戻した座標にhead_offsetを適用することでBBoxを得る。
            outputs = [
                normalized_rois, head_offsets, labels, rpn_offsets, objects
            ]

        model = Model(inputs=inputs, outputs=outputs, name='faser_r_cnn')
        # Kerasは複数指定した損失の合計をモデル全体の損失として評価してくれる。
        # 損失を追加
        for output in losses:
            model.add_loss(tf.reduce_mean(output, keep_dims=True))
        return model, len(outputs)
コード例 #14
0
class CycleGAN():
    def __init__(self):
        '''
        参数与结构

        Q1:为什么要定义输出?
        A1: 因为使用keras框架,只有定义了输入输出,才能定义一个模型。

        Q2: 为何输入输出都不用定义为私有变量?
        A2:如上所述,输入输出只用于定义模型,不会涉及类内参数或函数传导;简便起见,不作为私有变量。
        '''
        #定义 输入图像
        self.img_dim = 64
        img_x = Input(shape=(self.img_dim, self.img_dim, 3))
        img_y = Input(shape=(self.img_dim, self.img_dim, 3))
        #定义 循环一致性的变换函数(包含域映射结构 和 生成器)
        self.F_x2y = self.F_x2y()
        self.G_y2x = self.G_y2x()
        #定义 域映射结果
        im_fake_y = self.F_x2y(img_x)
        im_fake_x = self.G_y2x(img_y)
        #定义 域返回映射结果
        reconstr_x = self.G_y2x(im_fake_y)
        reconstr_y = self.F_x2y(im_fake_x)
        #定义 通过F、G映射进行风格转换的输出
        translation_x2y = self.F_x2y(img_x)
        translation_y2x = self.G_y2x(img_y)
        #定义 GAN的判别器D
        self.D_x = self.D_x()
        self.D_y = self.D_y()
        #定义 判别器D的输出结果
        '''
        注意,判别器输入输出都是单个量
        '''
        valid_x = self.D_x(im_fake_x)
        valid_y = self.D_y(im_fake_y)
        #向D中加入loss并编译
        loss1 = K.mean(K.log(valid_x)) + K.mean(K.log(1 - self.D_x(im_fake_x)))
        loss2 = K.mean(K.log(valid_y)) + K.mean(K.log(1 - self.D_y(im_fake_y)))
        self.D_x.add_loss(loss1)
        self.D_y.add_loss(loss2)
        self.D_x.compile(optimizer=Adam(2e-4, 0.5))
        self.D_y.compile(optimizer=Adam(2e-4, 0.5))
        #定义 整个模型
        '''
        对输入输出进行解释:
        输入:两张图
        输出:1、判别器输出;2、循环一致性输出;3、期望输出(即 进行风格转换的输出)
        '''
        self.C_gan = Model([img_x, img_y], [
            valid_x, valid_y, reconstr_x, reconstr_y, translation_x2y,
            translation_y2x
        ])
        #定义 整个CycleGAN的Loss
        lamda = 0.1
        cyc_loss = K.mean(K.sum(K.abs(translation_x2y - img_x))) + K.mean(
            K.sum(K.abs(reconstr_y - img_y)))
        total_loss = loss1 + loss2 + lamda * cyc_loss
        #加入Loss并编译
        self.C_gan.add_loss(total_loss)
        self.C_gan.compile(optimizer=Adam(2e-4, 0.5))
コード例 #15
0
ファイル: t_sagan_celeba.py プロジェクト: xiaoanshi/T-GANs
z_in = Input(shape=(z_dim, ))
g_model.trainable = False

x_fake = g_model(z_in)
x_real_encoded = e_model(x_in)
x_fake_encoded = e_model(x_fake)
x_real_fake = Subtract()([x_real_encoded, x_fake_encoded])
x_fake_real = Subtract()([x_fake_encoded, x_real_encoded])
x_real_fake_score = d_model(x_real_fake)
x_fake_real_score = d_model(x_fake_real)

d_train_model = Model([x_in, z_in],
                      [x_real_fake_score, x_fake_real_score])

d_loss = K.mean(- log_sigmoid(x_real_fake_score) - log_sigmoid(- x_fake_real_score))
d_train_model.add_loss(d_loss)
d_train_model.compile(optimizer=Adam(2e-4, 0.5))


# 整合模型(训练生成器)
g_model.trainable = True
d_model.trainable = False
e_model.trainable = False

x_fake = g_model(z_in)
x_real_encoded = e_model(x_in)
x_fake_encoded = e_model(x_fake)
x_real_fake = Subtract()([x_real_encoded, x_fake_encoded])
x_fake_real = Subtract()([x_fake_encoded, x_real_encoded])
x_real_fake_score = d_model(x_real_fake)
x_fake_real_score = d_model(x_fake_real)
コード例 #16
0
def correlation(x, y):
    x = x - K.mean(x, 1, keepdims=True)
    y = y - K.mean(y, 1, keepdims=True)
    x = K.l2_normalize(x, 1)
    y = K.l2_normalize(y, 1)
    return K.sum(x * y, 1, keepdims=True)


t1_loss = z_real_mean - z_fake_ng_mean
t2_loss = z_fake_mean - z_fake_ng_mean
z_corr = correlation(z_in, z_fake)
qp_loss = 0.25 * t1_loss[:, 0]**2 / K.mean(
    (x_real - x_fake_ng)**2, axis=[1, 2, 3])

train_model.add_loss(K.mean(t1_loss + t2_loss - 1. * z_corr) + K.mean(qp_loss))
train_model.compile(optimizer=RMSprop(1e-4, 0.99))
#train_model.metrics_names.append('t_loss')
#train_model.metrics_tensors.append(K.mean(t1_loss))
train_model.add_metric(K.mean(t1_loss), 't_loss')
#train_model.metrics_names.append('z_corr')
#train_model.metrics_tensors.append(K.mean(z_corr))
train_model.add_metric(K.mean(z_corr), 'z_loss')

# 检查模型结构
train_model.summary()


class ExponentialMovingAverage:
    """对模型权重进行指数滑动平均。
    用法:在model.compile之后、第一次训练之前使用;
コード例 #17
0
ファイル: mnist_tfrecord.py プロジェクト: AlexeySorokin/keras
x_train_batch = tf.cast(x_train_batch, tf.float32)
x_train_batch = tf.reshape(x_train_batch, shape=batch_shape)

y_train_batch = tf.cast(y_train_batch, tf.int32)
y_train_batch = tf.one_hot(y_train_batch, classes)

x_batch_shape = x_train_batch.get_shape().as_list()
y_batch_shape = y_train_batch.get_shape().as_list()

x_train_input = layers.Input(tensor=x_train_batch, batch_shape=x_batch_shape)
x_train_out = cnn_layers(x_train_input)
train_model = Model(inputs=x_train_input, outputs=x_train_out)

cce = objectives.categorical_crossentropy(y_train_batch, x_train_out)
train_model.add_loss(cce)

# Do not pass the loss directly to model.compile()
# because it is not yet supported for Input Tensors.
train_model.compile(optimizer='rmsprop',
                    loss=None,
                    metrics=['accuracy'])
train_model.summary()

coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess, coord)
train_model.fit(epochs=epochs,
                steps_per_epoch=steps_per_epoch)

train_model.save_weights('saved_wt.h5')
コード例 #18
0
class GAN():
    def __init__(self, dataset_name='mnist', load_model_name=''):

        optimizer = Adam(0.0002, 0.5)
        if (load_model_name == ''):
            X_train = self.load_gan_data(dataset_name)

            # default parameters for mnist
            self.img_rows = X_train.shape[1]
            self.img_cols = X_train.shape[2]
            self.img_channels = X_train.shape[3]
            self.img_shape = (self.img_rows, self.img_cols, self.img_channels)
            self.z_dim = 32
            self.iter_count = 0
            self.dataset_name = dataset_name
            self.model_file = "./" + self.dataset_name + '_gan_model.pickle'

            # Build and compile the discriminator and discriminator loss
            self.discriminator = self.build_discriminator()
            # set discriminator loss
            # BEGIN INSERT CODE
            self.discriminator.compile(loss='binary_crossentropy',
                                       optimizer=optimizer,
                                       metrics=['accuracy'])
            # END INSERT CODE

            # Build the generator
            self.generator = self.build_generator()

        else:
            # load gan class and models (generator, discriminator and stacked model)
            self.load_gan_model(load_model_name)

        # Create the stacked model
        # first, create the random vector z in the latent space
        z = Input(shape=(self.z_dim, ))
        # create generated (fake) image
        img = self.generator(z)

        # indicate that for the stacked model, the weights are not trained
        self.discriminator.trainable = False

        # The discriminator takes generated images as input and gives a probability of whether it is a true or
        # false image
        p_true = self.discriminator(img)

        # The combined model  (stacked generator and discriminator)
        # In this model, we train the generator only
        self.stacked_gen_disc = Model(z, p_true)

        # loss
        # START INSERT CODE HERE
        generator_loss = K.mean(K.log(1 - p_true))
        # END INSERT CODE HERE
        self.stacked_gen_disc.add_loss(generator_loss)
        self.stacked_gen_disc.compile(optimizer=optimizer)

    def build_generator(self):

        z_rand = Input(shape=(self.z_dim, ))

        # START INSERT CODE HERE
        y = Dense(256)(z_rand)
        y = LeakyReLU(alpha=0.2)(y)
        y = Dense(512)(y)
        y = LeakyReLU(alpha=0.2)(y)
        y = Dense(784)(y)
        y = Activation('tanh')(y)
        output_img = Reshape(target_shape=(28, 28, 1))(y)
        # END INSERT CODE HERE

        model_generator = Model(z_rand, output_img)
        model_generator.summary()

        return model_generator

    def build_discriminator(self):

        input_img = Input(shape=self.img_shape)
        y = Flatten()(input_img)
        y = Dense(512)(y)
        y = LeakyReLU(alpha=0.2)(y)
        y = Dense(256)(y)
        y = LeakyReLU(alpha=0.2)(y)
        y = Dense(1)(y)
        p_true = Activation('sigmoid')(y)
        model_discriminator = Model(input_img, p_true)
        model_discriminator.summary()

        return model_discriminator

    def load_gan_data(self, dataset_name):
        # Load the dataset
        if (dataset_name == 'mnist'):
            (X_train, _), (_, _) = mnist.load_data()
        elif (dataset_name == 'cifar'):
            from keras.datasets import cifar10
            (X_train, y_train), (X_test, y_test) = cifar10.load_data()
        else:
            print('Error, unknown database')

        # Rescale -1 to 1
        X_train = X_train / 127.5 - 1.
        # add a channel dimension, if need be (for mnist data)
        if (X_train.ndim == 3):
            X_train = np.expand_dims(X_train, axis=3)
        return X_train

    def save_gan_model(self, model_file):

        # save the GAN class instance
        gan_temp = GAN(self.dataset_name, '')
        gan_temp.generator = self.generator
        gan_temp.discriminator = self.discriminator
        gan_temp.stacked_gen_disc = []
        gan_temp.iter_count = self.iter_count
        with open(model_file, 'wb') as file_class:
            pickle.dump(gan_temp, file_class, -1)

    def load_gan_model(self, model_file):

        # load GAN class instance
        gan_temp = pickle.load(open(model_file, "rb", -1))
        # copy parameters
        self.img_rows = gan_temp.img_rows
        self.img_cols = gan_temp.img_cols
        self.img_channels = gan_temp.img_channels
        self.img_shape = gan_temp.img_shape
        self.z_dim = gan_temp.z_dim
        self.iter_count = gan_temp.iter_count
        self.model_file = gan_temp.model_file
        self.dataset_name = gan_temp.dataset_name

        # copy models
        self.generator = gan_temp.generator
        self.discriminator = gan_temp.discriminator

    def train(self, epochs, batch_size=128, sample_interval=50):

        k = 1  # number of internal loops

        # load dataset
        X_train = self.load_gan_data(self.dataset_name)

        # Adversarial ground truths
        d_output_true = np.ones((batch_size, 1))
        d_output_false = np.zeros((batch_size, 1))

        first_iter = self.iter_count

        for epoch in range(first_iter, epochs):

            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Train the discriminator
            for i in range(0, k):
                # Select a random batch of images
                idx = np.random.randint(0, X_train.shape[0], batch_size)
                imgs = X_train[idx]

                z_random = np.random.normal(0, 1, (batch_size, self.z_dim))

                # Generate a batch of new (fake) images
                gen_imgs = self.generator.predict(z_random)

                # START INSERT CODE
                d_loss_real = self.discriminator.train_on_batch(
                    imgs, np.ones(batch_size))
                d_loss_fake = self.discriminator.train_on_batch(
                    gen_imgs, np.zeros(batch_size))
                # END INSERT CODE
                d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # ---------------------
            #  Train Generator
            # ---------------------

            # Select a random batch of images
            idx = np.random.randint(0, X_train.shape[0], batch_size)
            imgs = X_train[idx]

            z_random = np.random.normal(0, 1, (batch_size, self.z_dim))

            # Generate a batch of new (fake) images
            gen_imgs = self.generator.predict(z_random)
            # Generator training : try to make generated images be classified as true by the discriminator
            g_loss = self.stacked_gen_disc.train_on_batch(z_random, None)

            # increase epoch counter
            self.iter_count = self.iter_count + 1
            # Plot the losses
            print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" %
                  (epoch, d_loss[0], 100 * d_loss[1], g_loss))

            # Save some random generated images and the models at every sample_interval iterations
            if (epoch % sample_interval == 0):
                self.sample_images('images/' + self.dataset_name +
                                   '_sample_%06d.png' % epoch)
                self.save_gan_model(self.model_file)

    def sample_images(self, image_filename, rand_seed=30):
        np.random.seed(rand_seed)

        r, c = 5, 5
        z_random = np.random.normal(0, 1, (r * c, self.z_dim))
        gen_imgs = self.generator.predict(z_random)

        # Rescale images 0 - 1
        gen_imgs = 0.5 * gen_imgs + 0.5

        fig, axs = plt.subplots(r, c)
        cnt = 0
        for i in range(r):
            for j in range(c):
                # black and white images
                if (gen_imgs.shape[3] == 1):
                    axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray')
                elif (gen_imgs.shape[3] == 3):  # colour images
                    axs[i, j].imshow(gen_imgs[cnt, :, :])
                else:
                    print('Error, unsupported channel size. Dude, I don'
                          't know what you want me to do.\
                            I can'
                          't handle this data. You'
                          've made me very sad ...')
                axs[i, j].axis('off')
                cnt += 1
        fig.savefig(image_filename)
        plt.close()
コード例 #19
0
ファイル: models_worked.py プロジェクト: darylperalta/vaegan
def vaegan_actual_model(original_dim=(64, 64, 3),
                        batch_size=64,
                        latent_dim=128,
                        epochs=50,
                        mse_flag=True):
    '''VAE model.'''
    # VAE model = encoder + decoder
    # build encoder model
    input_shape = original_dim
    inputs = Input(shape=input_shape, name='encoder_input')
    x = Conv2D(64, (5, 5), strides=(2, 2), padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(128, (5, 5), strides=(2, 2), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2D(256, (5, 5), strides=(2, 2), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Flatten()(x)
    x = Dense(2048)(x)
    x = BatchNormalization()(x)
    x = Activation('relu', name='z_mean')(x)

    z_mean = Dense(latent_dim, name='x_mean')(x)

    z_log_var = Dense(latent_dim, name='x_log_var')(x)

    # use reparameterization trick to push the sampling out as input
    # note that "output_shape" isn't necessary with the TensorFlow backend
    z = Lambda(sampling, output_shape=(latent_dim, ),
               name='z')([z_mean, z_log_var])
    #encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
    encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')

    encoder.summary()
    plot_model(encoder, to_file='vaegan_encoder.png', show_shapes=True)

    # build decoder model
    latent_inputs = Input(shape=(latent_dim, ), name='z_sampling')
    x = Dense(8 * 8 * 256)(latent_inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Reshape((8, 8, 256))(x)

    x = Conv2DTranspose(256, (5, 5), strides=(2, 2), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2DTranspose(128, (5, 5), strides=(2, 2), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2DTranspose(32, (5, 5), strides=(2, 2), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    x = Conv2DTranspose(3, (5, 5), strides=(1, 1), padding='same')(x)
    outputs = Activation('tanh')(x)

    # instantiate decoder model
    decoder = Model(latent_inputs, outputs, name='decoder')
    decoder.summary()
    plot_model(decoder, to_file='vaegan_decoder.png', show_shapes=True)

    # instantiate VAE model
    outputs = decoder(encoder(inputs)[2])
    vae = Model(inputs, outputs, name='vae_mlp')

    #outputs = Dense(original_dim, activation='sigmoid')(x)
    if mse_flag:
        reconstruction_loss = mse(inputs, outputs)
    else:
        reconstruction_loss = binary_crossentropy(inputs, outputs)
    reconstruction_loss *= original_dim[0] * original_dim[1] * original_dim[2]
    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5
    vae_loss = K.mean(reconstruction_loss + kl_loss)
    vae.add_loss(vae_loss)
    vae.compile(optimizer=RMSprop(lr=0.0003))
    vae.summary()
    plot_model(vae, to_file='vae.png', show_shapes=True)

    return encoder, decoder, vae
コード例 #20
0
ファイル: keras_vae.py プロジェクト: jbdatascience/DAG-EQ
def vae():
    # MNIST dataset
    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    image_size = x_train.shape[1]
    original_dim = image_size * image_size
    x_train = np.reshape(x_train, [-1, original_dim])
    x_test = np.reshape(x_test, [-1, original_dim])
    x_train = x_train.astype('float32') / 255
    x_test = x_test.astype('float32') / 255

    # network parameters
    input_shape = (original_dim, )
    intermediate_dim = 512
    batch_size = 128
    latent_dim = 2
    epochs = 50

    # VAE model = encoder + decoder
    # build encoder model
    # 784
    inputs = Input(shape=input_shape, name='encoder_input')
    # 512
    x = Dense(intermediate_dim, activation='relu')(inputs)
    # 2
    z_mean = Dense(latent_dim, name='z_mean')(x)
    # 2
    z_log_var = Dense(latent_dim, name='z_log_var')(x)

    # use reparameterization trick to push the sampling out as input
    # note that "output_shape" isn't necessary with the TensorFlow backend
    z = Lambda(sampling, output_shape=(latent_dim, ),
               name='z')([z_mean, z_log_var])

    # instantiate encoder model
    encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
    encoder.summary()
    plot_model(encoder, to_file='vae_mlp_encoder.png', show_shapes=True)

    # build decoder model
    latent_inputs = Input(shape=(latent_dim, ), name='z_sampling')
    x = Dense(intermediate_dim, activation='relu')(latent_inputs)
    outputs = Dense(original_dim, activation='sigmoid')(x)

    # instantiate decoder model
    decoder = Model(latent_inputs, outputs, name='decoder')
    decoder.summary()
    plot_model(decoder, to_file='vae_mlp_decoder.png', show_shapes=True)

    # instantiate VAE model
    outputs = decoder(encoder(inputs)[2])
    vae = Model(inputs, outputs, name='vae_mlp')

    # model and data
    models = (encoder, decoder)
    data = (x_test, y_test)

    # use either one of these loss
    # reconstruction_loss = mse(inputs, outputs)
    reconstruction_loss = binary_crossentropy(inputs, outputs)

    reconstruction_loss *= original_dim
    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5
    vae_loss = K.mean(reconstruction_loss + kl_loss)
    vae.add_loss(vae_loss)
    vae.compile(optimizer='adam')
    vae.summary()
    plot_model(vae, to_file='vae_mlp.png', show_shapes=True)
    vae.fit(x_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(x_test, None))
    plot_results(models, data, batch_size=batch_size, model_name="vae_mlp")
コード例 #21
0

o_in = Input(shape=(None, ))
train_model = Model(model.inputs + [o_in], model.outputs + [o_in])

# 交叉熵作为loss,并mask掉输入部分的预测
y_true = train_model.input[2][:, 1:]  # 目标tokens
y_mask = train_model.input[1][:, 1:]
y_pred = train_model.output[0][:, :-1]  # 预测tokens,预测与目标错开一位
cross_entropy = sparse_categorical_crossentropy(y_true, y_pred)
cross_entropy = K.sum(cross_entropy * y_mask) / K.sum(y_mask)

embeddings = search_layer(train_model.output[0], 'Embedding-Token').embeddings
gp = K.sum(K.gradients(cross_entropy, [embeddings])[0].values**2)

train_model.add_loss(cross_entropy + 0.5 * gp)
train_model.compile(optimizer=Adam(1e-5))

# train_model.add_loss(cross_entropy)
# train_model.compile(optimizer=Adam(1e-5))


class AutoTitle(AutoRegressiveDecoder):
    """seq2seq解码器
    """
    @AutoRegressiveDecoder.set_rtype('probas')
    def predict(self, inputs, output_ids, step):
        token_ids, segment_ids = inputs
        token_ids = np.concatenate([token_ids, output_ids], 1)
        segment_ids = np.concatenate(
            [segment_ids, np.ones_like(output_ids)], 1)
コード例 #22
0
ファイル: keras_vae.py プロジェクト: jbdatascience/DAG-EQ
def conv_vae():
    # MNIST dataset
    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    image_size = x_train.shape[1]
    x_train = np.reshape(x_train, [-1, image_size, image_size, 1])
    x_test = np.reshape(x_test, [-1, image_size, image_size, 1])
    x_train = x_train.astype('float32') / 255
    x_test = x_test.astype('float32') / 255

    # network parameters
    input_shape = (image_size, image_size, 1)
    batch_size = 128
    kernel_size = 3
    filters = 16
    latent_dim = 2
    epochs = 30
    # VAE model = encoder + decoder
    # build encoder model
    inputs = Input(shape=input_shape, name='encoder_input')
    x = inputs
    for i in range(2):
        filters *= 2
        x = Conv2D(filters=filters,
                   kernel_size=kernel_size,
                   activation='relu',
                   strides=2,
                   padding='same')(x)

    # shape info needed to build decoder model
    shape = K.int_shape(x)

    # generate latent vector Q(z|X)
    x = Flatten()(x)
    x = Dense(16, activation='relu')(x)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)

    # use reparameterization trick to push the sampling out as input
    # note that "output_shape" isn't necessary with the TensorFlow backend
    z = Lambda(sampling, output_shape=(latent_dim, ),
               name='z')([z_mean, z_log_var])

    # instantiate encoder model
    encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
    encoder.summary()
    plot_model(encoder, to_file='vae_cnn_encoder.png', show_shapes=True)

    # build decoder model
    latent_inputs = Input(shape=(latent_dim, ), name='z_sampling')
    x = Dense(shape[1] * shape[2] * shape[3], activation='relu')(latent_inputs)
    x = Reshape((shape[1], shape[2], shape[3]))(x)

    for i in range(2):
        x = Conv2DTranspose(filters=filters,
                            kernel_size=kernel_size,
                            activation='relu',
                            strides=2,
                            padding='same')(x)
        filters //= 2

    outputs = Conv2DTranspose(filters=1,
                              kernel_size=kernel_size,
                              activation='sigmoid',
                              padding='same',
                              name='decoder_output')(x)

    # instantiate decoder model
    decoder = Model(latent_inputs, outputs, name='decoder')
    decoder.summary()
    plot_model(decoder, to_file='vae_cnn_decoder.png', show_shapes=True)

    # instantiate VAE model
    outputs = decoder(encoder(inputs)[2])
    vae = Model(inputs, outputs, name='vae')

    models = (encoder, decoder)
    data = (x_test, y_test)
    reconstruction_loss = binary_crossentropy(K.flatten(inputs),
                                              K.flatten(outputs))
    reconstruction_loss *= image_size * image_size
    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5
    vae_loss = K.mean(reconstruction_loss + kl_loss)
    vae.add_loss(vae_loss)
    vae.compile(optimizer='rmsprop')
    vae.summary()
    plot_model(vae, to_file='vae_cnn.png', show_shapes=True)
    vae.fit(x_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(x_test, None))
    plot_results(models, data, batch_size=batch_size, model_name="vae_cnn")
コード例 #23
0
    def build(self,
              score_func='cosine',
              margin=0.04,
              max_mention=False,
              avg_mention=False,
              add_cnn=None,
              encoder_type='self_attend_max',
              ent_attend_type='add'):
        '''1. prepare input'''
        model_inputs = []
        link_model_inputs = []

        input_erl_text = Input(shape=(None, ))
        model_inputs.append(input_erl_text)
        link_model_inputs.append(input_erl_text)

        input_begin = Input(shape=(1, ))
        input_end = Input(shape=(1, ))
        model_inputs.extend([input_begin, input_end])
        link_model_inputs.extend([input_begin, input_end])

        if self.config.use_relative_pos:
            input_relative_pos = Input(shape=(None, ))
            model_inputs.append(input_relative_pos)
            link_model_inputs.append(input_relative_pos)

        input_pos_desc = Input(shape=(None, ))
        input_neg_desc = Input(shape=(None, ))
        model_inputs.extend([input_pos_desc, input_neg_desc])
        link_model_inputs.append(input_pos_desc)

        # CUDALSTM (or CNN) doesn't support masking, so we don't use mask_zero in embedding layer, instead we apply
        # masking on our own
        get_mask_layer = Lambda(
            lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), K.floatx()))
        erl_text_mask = get_mask_layer(input_erl_text)
        pos_desc_mask = get_mask_layer(input_pos_desc)
        neg_desc_mask = get_mask_layer(input_neg_desc)
        apply_mask_layer = Lambda(lambda x: x[0] * x[1])
        '''2. prepare embedding'''
        if self.config.embeddings is not None:
            embedding_layer = Embedding(input_dim=self.config.vocab_size,
                                        output_dim=self.config.embed_dim,
                                        weights=[self.config.embeddings],
                                        trainable=self.config.embed_trainable)
        else:
            embedding_layer = Embedding(input_dim=self.config.vocab_size,
                                        output_dim=self.config.embed_dim)

        erl_text_embed = SpatialDropout1D(0.2)(embedding_layer(input_erl_text))
        if self.config.use_relative_pos:
            rel_pos_embedding_layer = Embedding(
                input_dim=self.config.n_rel_pos_embed,
                output_dim=self.config.rel_pos_embed_dim)
            rel_pos_embed = rel_pos_embedding_layer(input_relative_pos)
            erl_text_embed = concatenate([erl_text_embed, rel_pos_embed])
        erl_text_embed = apply_mask_layer([erl_text_embed, erl_text_mask])

        pos_desc_embed = SpatialDropout1D(0.2)(embedding_layer(input_pos_desc))
        neg_desc_embed = SpatialDropout1D(0.2)(embedding_layer(input_neg_desc))
        pos_desc_embed = apply_mask_layer([pos_desc_embed, pos_desc_mask])
        neg_desc_embed = apply_mask_layer([neg_desc_embed, neg_desc_mask])
        '''3. encode mention & entity representation'''
        if add_cnn == 'before':
            erl_text_embed = Conv1D(filters=self.config.embed_dim,
                                    kernel_size=3,
                                    padding='same',
                                    activation='relu')(erl_text_embed)
        erl_text_lstm = Bidirectional(
            CuDNNLSTM(units=self.config.embed_dim // 2,
                      return_sequences=True))(erl_text_embed)
        if add_cnn == 'after':
            erl_text_lstm = Conv1D(filters=self.config.embed_dim,
                                   kernel_size=3,
                                   padding='same',
                                   activation='relu')(erl_text_lstm)
        erl_text_lstm = apply_mask_layer([erl_text_lstm, erl_text_mask])

        if add_cnn == 'before':
            ent_cnn_layer = Conv1D(filters=self.config.embed_dim,
                                   kernel_size=3,
                                   padding='same',
                                   activation='relu')
            pos_desc_embed = ent_cnn_layer(pos_desc_embed)
            neg_desc_embed = ent_cnn_layer(neg_desc_embed)
        ent_lstm_layer = Bidirectional(
            CuDNNLSTM(units=self.config.embed_dim // 2, return_sequences=True))
        pos_desc_lstm = ent_lstm_layer(pos_desc_embed)
        neg_desc_lstm = ent_lstm_layer(neg_desc_embed)
        if add_cnn == 'after':
            ent_cnn_layer = Conv1D(filters=self.config.embed_dim,
                                   kernel_size=3,
                                   padding='same',
                                   activation='relu')
            pos_desc_lstm = ent_cnn_layer(pos_desc_lstm)
            neg_desc_lstm = ent_cnn_layer(neg_desc_lstm)
        pos_desc_lstm = apply_mask_layer([pos_desc_lstm, pos_desc_mask])
        neg_desc_lstm = apply_mask_layer([neg_desc_lstm, neg_desc_mask])

        if encoder_type in ['self_attend_max', 'self_attend_single_attend']:
            '''mention presentation based on self_attention, first token & last token, max or avg pooling (optional)'''
            # first token & last token of mention
            index_layer = Lambda(lambda x: tf.gather_nd(
                x[0],
                tf.concat([
                    tf.expand_dims(tf.range(tf.shape(x[0])[0]), 1),
                    tf.to_int32(x[1])
                ],
                          axis=1)))
            mention_begin_embed = index_layer([erl_text_lstm, input_begin])
            mention_end_embed = index_layer([erl_text_lstm, input_end])
            mention_spand_embed, mention_index, mention_mask = Lambda(
                self.span_index)([erl_text_lstm, input_begin, input_end])

            # soft head attention
            head_score = TimeDistributed(Dense(
                1, activation='tanh'))(erl_text_lstm)
            mention_head_score = Lambda(lambda x: tf.squeeze(
                tf.gather_nd(x[0], tf.to_int32(x[1])), 2))(
                    [head_score,
                     mention_index])  # [batch_size, max_mention_length]
            # self attention
            mention_head_score = Lambda(
                self.softmax_with_mask)([mention_head_score, mention_mask])
            mention_attention = Lambda(
                lambda x: K.sum(x[0] * K.expand_dims(x[1], 2), 1))(
                    [mention_spand_embed, mention_head_score])
            mention_embed = concatenate(
                [mention_begin_embed, mention_end_embed, mention_attention])

            # max pooling & avg pooling
            if max_mention:
                mention_max_embed = Lambda(lambda x: K.max(
                    x[0] - (1 - K.expand_dims(x[1], 2)) * 1e10, 1))(
                        [mention_spand_embed, mention_mask])
                mention_embed = concatenate([mention_embed, mention_max_embed])
            if avg_mention:
                mention_avg_embed = Lambda(
                    lambda x: K.sum(x[0], 1) / K.sum(x[1], 1, keepdims=True))(
                        [mention_spand_embed, mention_mask])
                mention_embed = concatenate([mention_embed, mention_avg_embed])
            mention_embed = Dense(self.config.embed_dim, activation='relu')(
                mention_embed)  # [batch_size, embed_dim]
            mention_pos_embed = mention_embed
            mention_neg_embed = mention_embed

            if encoder_type == 'self_attend_max':
                '''entity representation based on max pooling'''
                pos_embed = Lambda(
                    self.seq_maxpool)([pos_desc_lstm, pos_desc_mask])
                neg_embed = Lambda(
                    self.seq_maxpool)([neg_desc_lstm, neg_desc_mask])
            else:
                '''entity representation based on single sided attention using mention representation as query'''
                ent_attend_layer = SingleSideAttention(ent_attend_type)
                pos_embed = ent_attend_layer([mention_embed, pos_desc_lstm])
                neg_embed = ent_attend_layer([mention_embed, neg_desc_lstm])
        elif encoder_type in ['co_attend', 'max_co_attend']:
            attend_layer = InteractiveAttention(attend_type=encoder_type)
            mention_pos_embed, pos_embed = attend_layer(
                [erl_text_lstm, pos_desc_lstm])
            mention_neg_embed, neg_embed = attend_layer(
                [erl_text_lstm, neg_desc_lstm])
        else:
            raise ValueError(
                'encoder_type not understood'.format(encoder_type))
        if score_func == 'dense':
            hidden_layer = Dense(self.config.embed_dim, activation='relu')
            score_layer = Dense(1, activation='sigmoid')
            pos_score = score_layer(
                hidden_layer(
                    concatenate([
                        mention_pos_embed, pos_embed,
                        multiply([mention_pos_embed, pos_embed]),
                        subtract([mention_pos_embed, pos_embed])
                    ])))
            neg_score = score_layer(
                hidden_layer(
                    concatenate([
                        mention_neg_embed, neg_embed,
                        multiply([mention_neg_embed, neg_embed]),
                        subtract([mention_neg_embed, neg_embed])
                    ])))
        else:
            score_layer = self.get_score_layer(score_func)
            pos_score = score_layer([mention_pos_embed, pos_embed])
            neg_score = score_layer([mention_pos_embed, neg_embed])
        link_model = Model(link_model_inputs, pos_score)
        loss = K.mean(K.relu(margin + neg_score - pos_score))
        train_model = Model(model_inputs, [pos_score, neg_score])
        train_model.add_loss(loss)
        train_model.compile(optimizer=self.config.optimizer)
        return train_model, link_model
コード例 #24
0
    def fit(self, Y, T, X, Z):
        """Estimate the counterfactual model from data.

        That is, estimate functions τ(·, ·, ·), ∂τ(·, ·).

        Parameters
        ----------
        Y: (n × d_y) matrix or vector of length n
            Outcomes for each sample
        T: (n × dₜ) matrix or vector of length n
            Treatments for each sample
        X: optional (n × dₓ) matrix
            Features for each sample
        Z: optional (n × d_z) matrix
            Instruments for each sample

        Returns
        -------
        self

        """
        # TODO: allow 1D arguments for Y and T
        assert np.ndim(X) == np.ndim(Y) == np.ndim(T) == np.ndim(Z) == 2
        assert np.shape(X)[0] == np.shape(Y)[0] == np.shape(T)[0] == np.shape(
            Z)[0]

        d_x, d_y, d_z, d_t = [np.shape(a)[1] for a in [X, Y, Z, T]]
        x_in, y_in, z_in, t_in = [L.Input((d, )) for d in [d_x, d_y, d_z, d_t]]
        n_components = self._n_components

        treatment_network = self._m(z_in, x_in)

        # the dimensionality of the output of the network
        # TODO: is there a more robust way to do this?
        d_n = K.int_shape(treatment_network)[-1]

        pi, mu, sig = mog_model(n_components, d_n, d_t)([treatment_network])

        ll = mog_loss_model(n_components, d_t)([pi, mu, sig, t_in])

        model = Model([z_in, x_in, t_in], [ll])
        model.add_loss(L.Lambda(K.mean)(ll))
        model.compile(self._optimizer)
        # TODO: do we need to give the user more control over other arguments to fit?
        model.fit([Z, X, T], [], epochs=self._s1)

        lm = response_loss_model(
            lambda t, x: self._h(t, x),
            lambda z, x: Model(
                [z_in, x_in],
                # subtle point: we need to build a new model each time,
                # because each model encapsulates its randomness
                [mog_sample_model(n_components, d_t)([pi, mu, sig])])([z, x]),
            d_z,
            d_x,
            d_y,
            self._n_samples,
            self._use_upper_bound_loss,
            self._n_gradient_samples)

        rl = lm([z_in, x_in, y_in])
        response_model = Model([z_in, x_in, y_in], [rl])
        response_model.add_loss(L.Lambda(K.mean)(rl))
        response_model.compile(self._optimizer)
        # TODO: do we need to give the user more control over other arguments to fit?
        response_model.fit([Z, X, Y], [], epochs=self._s2)

        self._effect_model = Model([t_in, x_in], [self._h(t_in, x_in)])

        # TODO: it seems like we need to sum over the batch because we can only apply gradient to a scalar,
        #       not a general tensor (because of how backprop works in every framework)
        #       (alternatively, we could iterate through the batch in addition to iterating through the output,
        #       but this seems annoying...)
        #       Therefore, it's important that we use a batch size of 1 when we call predict with this model
        def calc_grad(t, x):
            h = self._h(t, x)
            all_grads = K.concatenate([
                g for i in range(d_y)
                for g in K.gradients(K.sum(h[:, i]), [t])
            ])
            return K.reshape(all_grads, (-1, d_y, d_t))

        self._marginal_effect_model = Model(
            [t_in, x_in],
            L.Lambda(lambda tx: calc_grad(*tx))([t_in, x_in]))
コード例 #25
0
    else:
        for _ in outer_layers:
            _.append(None)

final_actnorm = Actnorm()
final_concat = Concat()
final_reshape = Reshape()

x = final_actnorm(x)
x = final_reshape(x)
x = final_concat(x_outs+[x])

encoder = Model(x_in, x)
for l in encoder.layers:
    if hasattr(l, 'logdet'):
        encoder.add_loss(l.logdet)

encoder.summary()
encoder.compile(loss=lambda y_true,y_pred: 0.5 * K.sum(y_pred**2, 1) + 0.5 * np.log(2*np.pi) * K.int_shape(y_pred)[1],
                optimizer=Adam(1e-4))


# 搭建逆模型(生成模型),将所有操作倒过来执行

x_in = Input(shape=K.int_shape(encoder.outputs[0])[1:])
x = x_in

x = final_concat.inverse()(x)
outputs = x[:-1]
x = x[-1]
x = final_reshape.inverse()(x)
コード例 #26
0
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
#todo NOTICE there is no padding here, to match the dimensions needed.
x = Conv2D(16, (3, 3), activation='relu')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

#computing the VAE loss
# xent_loss = metrics.binary_crossentropy(input_img,decoded)
xent_loss = K.mean(metrics.binary_crossentropy(input_img, decoded))
reward_based_loss = input_reward * xent_loss
# reward_based_loss =  xent_loss

#full AE model
autoencoder = Model(inputs=[input_img, input_reward], outputs=[decoded])
autoencoder.add_loss(reward_based_loss)
autoencoder.compile(optimizer='sgd')
autoencoder.summary()

#encoder model
encoder = Model(input_img, encoded)
#decoder model

if not train_model:
    autoencoder.load_weights(filepath=model_weights_file)
else:
    #todo try changing the loss function to return a single value as opposed to an array and see if tensorboard works
    autoencoder.fit([x_train, x_train_reward],
                    epochs=4,
                    batch_size=1,
                    shuffle=True,
コード例 #27
0
ファイル: generator.py プロジェクト: Dr-Piedra/GAN
#decoder = Model(inputs = [latent_input], outputs= [End], name='decoder')
#decoder.summary()

#decoded_outputs = decoder(encoder(Image_input)[2])
vae = Model(inputs=[Image_input], outputs = [End], name='vae_mlp')
vae.summary()


kl_loss = 1 + End_log_var - K.square(End_mean) - K.exp(End_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
reconstruction_loss = mse(Image_input, End)
#reconstruction_loss *= original_dim
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
adam = optimizers.Adam(lr = 0.001, beta_1 = 0.9, beta_2 = 0.999)
vae.compile(optimizer=adam)
#plot_model(vae,
               #to_file='vae_mlp.png',
               #show_shapes=True)

<<<<<<< HEAD
history = vae.fit_generator(gen_sample(), epochs = 1, steps_per_epoch = 2)
vae.save('multi_model.h5')
vae.save_weights('multi_weights.h5')
=======
vae.fit_generator(gen_sample(), epochs = 1000, steps_per_epoch = 30)
vae.save('model.h5')
vae.save_weights('weights.h5')
>>>>>>> e46f1601b0fa2664ade7fbee5e33cf18e1098ce7
コード例 #28
0
outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae')

encoder.summary()
decoder.summary()

# loss
#reconstruction_loss = mse(inputs, outputs)
reconstruction_loss = binary_crossentropy(inputs, outputs)
reconstruction_loss *= ORIG_IMG_SHAPE[0] * ORIG_IMG_SHAPE[1]
k1_loss = 1 + latent_log_var - K.square(latent_mean) - K.exp(latent_log_var)
k1_loss = K.sum(k1_loss, axis=-1)
k1_loss *= -0.5
loss = K.mean(reconstruction_loss + k1_loss)
vae.add_loss(loss)
vae.compile(optimizer='Adam')

vae.summary()

# encoder.load_weights('encoder_weights.h5')
# decoder.load_weights('decoder_weights.h5')

vae.fit(train_data, epochs=EPOCH_NUM, batch_size=BATCH_SIZE, validation_data=(test_data, None))
encoder.save_weights('encoder_weights.h5')
decoder.save_weights('decoder_weights.h5')



# plot results for some test data
for img in test_data[:3]:
コード例 #29
0
class GAN():
    def __init__(self):
        self.img_rows = 48
        self.img_cols = 48
        self.channels = 3
        self.img_shape = (self.img_rows, self.img_cols, self.channels)
        self.latent_dim = 64
        self.flat_dim = self.img_rows * self.img_cols * self.channels
        self.batch_size = 32

        # VAE model = encoder + decoder
        self.encoder = self.build_encoder()
        self.encoder.summary()
        # plot_model(self.encoder, to_file='vae_mlp_encoder.png', show_shapes=True)

        # build decoder model
        self.decoder = self.build_decoder()
        self.decoder.summary()
        # plot_model(self.decoder, to_file='vae_mlp_decoder.png', show_shapes=True)

        # instantiate VAE model
        self.outputs = self.decoder(self.encoder(self.inputs)[2])
        self.vae = Model(self.inputs, self.outputs, name='vae_mlp')

        self.sample_noise = np.random.normal(
            0, 1, (5 * 5, self.latent_dim))  # 5 * 5 = r * c

    def build_encoder(self):
        self.inputs = Input(
            shape=self.img_shape, name='encoder_input'
        )  #, batch_shape=(self.batch_size, self.img_rows, self.img_cols, self.channels),)

        h_l = Conv2D(16, 5, activation='relu', strides=2)(self.inputs)
        h_l = LeakyReLU(alpha=0.2)(h_l)
        h_l = Dropout(0.2)(h_l)

        h_l = Conv2D(32, 5, activation='relu', strides=2)(h_l)
        h_l = LeakyReLU(alpha=0.2)(h_l)
        h_l = Dropout(0.2)(h_l)

        h_l = Conv2D(64, 5, activation='relu', strides=2)(h_l)
        h_l = LeakyReLU(alpha=0.2)(h_l)
        h_l = Dropout(0.2)(h_l)

        h_l = Flatten()(h_l)

        h_l = Dense(128, activation='relu')(h_l)

        # h_l = Dense(self.latent_dim, activation='sigmoid')(h_l)

        self.z_mean = Dense(self.latent_dim, name='z_mean')(h_l)
        self.z_log_var = Dense(self.latent_dim, name='z_log_var')(h_l)

        # use reparameterization trick to push the sampling out as input
        # note that "output_shape" isn't necessary with the TensorFlow backend
        z = Lambda(self.sampling, output_shape=(self.latent_dim, ),
                   name='z')([self.z_mean, self.z_log_var])

        # instantiate encoder model
        return Model(self.inputs, [self.z_mean, self.z_log_var, z],
                     name='encoder')

    def build_decoder(self):
        latent_inputs = Input(
            shape=(self.latent_dim, ), name='z_sampling'
        )  #, batch_shape=(self.batch_size, self.latent_dim))

        h_l = Dense(12 * 12 * 128, activation='relu')(latent_inputs)
        # TODO: Batch norm?
        h_l = LeakyReLU(alpha=0.2)(h_l)

        h_l = Reshape((12, 12, 128))(h_l)

        h_l = Conv2DTranspose(64, (7, 7),
                              padding='same',
                              strides=(1, 1),
                              activation='relu')(h_l)
        h_l = LeakyReLU(alpha=0.2)(h_l)

        h_l = Conv2DTranspose(32, (5, 5),
                              padding='same',
                              strides=(2, 2),
                              activation='relu')(h_l)
        h_l = LeakyReLU(alpha=0.2)(h_l)

        self.outputs = Conv2DTranspose(3, (5, 5),
                                       padding='same',
                                       strides=(2, 2),
                                       activation='sigmoid',
                                       batch_size=self.batch_size)(h_l)
        # self.outputs = Flatten()(h_l)

        # self.outputs = Dense(self.flat_dim, activation='sigmoid')(h_l)

        # self.outputs = Dense(self.flat_dim, activation='sigmoid')(h_l)
        # self.outputs = Reshape(target_shape=self.img_shape)(self.outputs)

        # instantiate decoder model
        return Model(latent_inputs, self.outputs, name='decoder')

    # reparameterization trick
    # instead of sampling from Q(z|X), sample epsilon = N(0,I)
    # z = z_mean + sqrt(var) * epsilon
    def sampling(self, args):
        """Reparameterization trick by sampling from an isotropic unit Gaussian.
        # Arguments
            args (tensor): mean and log of variance of Q(z|X)
        # Returns
            z (tensor): sampled latent vector
        """

        z_mean, z_log_var = args
        batch = K.shape(z_mean)[0]
        dim = K.int_shape(z_mean)[1]
        # by default, random_normal has mean = 0 and std = 1.0
        epsilon = K.random_normal(shape=(batch, dim))
        return z_mean + K.exp(0.5 * z_log_var) * epsilon

    def load_images(self, path="images/preprocessed/48x48/oranges/"):
        result = np.zeros(shape=(len(os.listdir(path)), self.img_rows,
                                 self.img_cols, self.channels))
        idx = 0
        for file in os.listdir(path):
            img = Image.open(os.path.join(path, file))
            img = img.convert("RGB")
            img = np.array(img)

            result[idx] = img

            idx += 1
        return result

    def train(self,
              epochs,
              batch_size=32,
              sample_interval=50,
              save_interval=1500):
        # # Load the dataset
        # (X_train, _), (_, _) = mnist.load_data()

        loss = []

        # Load the images
        X_train = self.load_images()

        # image_size = X_train.shape[1]
        # original_dim = image_size * image_size

        # Normalize
        X_train = X_train / 255

        # Reshape
        # X_train = X_train.reshape((len(X_train), np.prod(X_train.shape[1:])))

        # VAE loss = mse_loss or xent_loss + kl_loss
        reconstruction_loss = K.mean(mse(self.inputs, self.outputs))
        reconstruction_loss *= self.img_rows * self.img_cols
        # reconstruction_loss = np.mean(reconstruction_loss, axis=(1, 2))
        kl_loss = 1 + self.z_log_var - K.square(self.z_mean) - K.exp(
            self.z_log_var)
        kl_loss = K.sum(kl_loss, axis=-1)
        kl_loss *= -0.5
        print(reconstruction_loss.shape, kl_loss.shape)
        vae_loss = K.mean(reconstruction_loss + kl_loss)
        self.vae.add_loss(vae_loss)
        self.vae.compile(optimizer='adam')
        self.vae.summary()
        # plot_model(self.vae,
        #            to_file='vae_mlp.png',
        #            show_shapes=True)

        try:
            for i in range(1, int(epochs / sample_interval) + 1):
                print("True Epoch: " + str(i * sample_interval))
                # train the autoencoder
                history = self.vae.fit(
                    X_train,
                    shuffle=True,
                    epochs=int(epochs / sample_interval),
                    batch_size=batch_size,
                    validation_data=(X_train, None))  # TODO: make test
                self.vae.save_weights('vae_mlp_fruit.h5')

                self.sample_images(X_train, i * sample_interval, noise=False)
                self.sample_images(X_train, i * sample_interval)

                loss.append(history.history['loss'])
        except KeyboardInterrupt:
            pass

        loss = np.stack(loss).flatten()

        history_file = open("histories/%d-history.pkl" % time.time(), "wb")
        pickle.dump(history, history_file)

        plt.clf()
        plt.plot(loss, label="loss")
        plt.legend()
        plt.title(label='VAE-GAN Loss')
        plt.savefig("images/plots/%d-vae-gan_loss.png" % time.time())
        plt.show()

    def sample_images(self, X_train, epoch, noise=True):
        if noise:
            r, c = 5, 5
            gen_imgs = self.decoder.predict(self.sample_noise,
                                            batch_size=5 * 5)

            # Rescale images 0 - 1
            gen_imgs = 0.5 * gen_imgs + 0.5

            gen_imgs = np.stack(gen_imgs).reshape(
                (5 * 5, self.img_rows, self.img_cols, self.channels))

            fig, axs = plt.subplots(r, c)
            cnt = 0
            for i in range(r):
                for j in range(c):
                    axs[i, j].imshow(gen_imgs[cnt, :, :, :])
                    axs[i, j].axis('off')
                    cnt += 1
            fig.savefig("images/vae-conv-generated-%d.png" % epoch)
            plt.close()
        else:
            encoded_imgs = self.encoder.predict(X_train)
            decoded_imgs = self.decoder.predict(encoded_imgs[2])

            n = 10  # how many digits we will display
            plt.figure(figsize=(20, 4))
            for i in range(n):
                # display original
                ax = plt.subplot(2, n, i + 1)
                plt.imshow(X_train[i].reshape(self.img_shape))
                plt.gray()
                ax.get_xaxis().set_visible(False)
                ax.get_yaxis().set_visible(False)

                # display reconstruction
                ax = plt.subplot(2, n, i + 1 + n)
                plt.imshow(decoded_imgs[i].reshape(self.img_shape))
                plt.gray()
                ax.get_xaxis().set_visible(False)
                ax.get_yaxis().set_visible(False)
                plt.savefig("images/vae-conv-reconstructed%d.png" % epoch)

        plt.close()
コード例 #30
0
                          my_max_sent_size_overall=max_sent_size_overall,
                          my_n_cats=n_cats)

    my_optimizer = optimizers.SGD(
        lr=base_lr,
        momentum=max_mt,  # we decrease momentum when lr increases
        decay=1e-5,
        nesterov=True)

    if regularization > 0:
        my_loss = CustomLossWrapper(
            regularization * InformationRegularizer(sent_att_vecs, context))

    cahan.compile(loss=my_loss, optimizer=my_optimizer, metrics=['accuracy'])
    if regularization > 0:
        cahan.add_loss(regularization *
                       InformationRegularizer(sent_att_vecs, context))

    lr_sch = CyclicLR(base_lr=base_lr,
                      max_lr=max_lr,
                      step_size=step_size,
                      mode='triangular')
    mt_sch = CyclicMT(base_mt=base_mt,
                      max_mt=max_mt,
                      step_size=step_size,
                      mode='triangular')

    early_stopping = EarlyStopping(
        monitor=
        'val_acc',  # go through epochs as long as accuracy on validation set increases
        patience=my_patience,
        mode='max')
コード例 #31
0
        # Model Input
        Im_In = Input(shape=(im_dim, im_dim, im_ch))
        Encoded, z_mean, z_log_sd = EncoderModel(Im_In)
        Im_Out = DecoderModel(Encoded)

        # Compile Model
        VAE = Model(Im_In, Im_Out)

        # Compute VAE loss
        xent_loss = im_dim * im_dim * metrics.binary_crossentropy(
            K.flatten(Im_In), K.flatten(Im_Out))
        kl_loss = -0.5 * K.sum(
            1 + z_log_sd - K.square(z_mean) - K.exp(z_log_sd), axis=-1)
        vae_loss = xent_loss + beta * K.mean(kl_loss)
        VAE.add_loss(vae_loss)

        VAE.compile(optimizer='adam', loss=None)

        VAE.load_weights("Completed_Training_beta={}.h5".format(beta))
        if gen_data:
            train_data = LoadData(dup=False)
            test_data = LoadData(method='test')

            VAE.load_weights(
                "Checkpoint_Training_beta_double={}.hdf5".format(beta))

            header_str = "id resp x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 x16 x17 x18 x19 x20 x21 x22 x23 x24 x25 x26 x27 x28 x29 x30 x31 x32"
            fmt_str = "%d %d %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f"

            train_1 = EncoderModel.predict(train_data.pairs[:, 0, ...])[selkey]
コード例 #32
0
class BetaConvVAE(AutoEncoder):


	def __init__(self, beta=1, data_generator=None, latent_size=2):
		self.data_generator = data_generator
		self.model_name = "beta_conv_vae_beta" + str(beta) + "_augment_" + str(self.data_generator.number_to_augment) 
		self.results_folder = "results/" + self.data_generator.data_name + "/" + self.model_name
		self.models_folder = "models/" + self.data_generator.data_name + "/" + self.model_name

		if not os.path.exists(self.results_folder):
			os.makedirs(self.results_folder)

		if not os.path.exists(self.models_folder):
			os.makedirs(self.models_folder)

		self.latent_size = latent_size

		self.beta = beta
		self.history = None
		self.encoder = None
		self.decoder = None
		self.autoencoder = None


	# define model
	# image format is channels last - (batch_size, x, y, no_filters)
	def build(self):
		input_img = 		Input(shape=(100, 100, 3))  # adapt this if using `channels_first` image data format
		conv_0_encoded = 	Conv2D(16, (3, 3), activation='relu', padding='same')(input_img) # (100, 100)
		pool_0_encoded = 	MaxPooling2D((2, 2), padding='same')(conv_0_encoded) # (50, 50)
		conv_1_encoded = 	Conv2D(8, (3, 3), activation='relu', padding='same')(pool_0_encoded) # (50, 50)
		pool_1_encoded = 	MaxPooling2D((2, 2), padding='same')(conv_1_encoded) # (25, 25)
		conv_2_encoded = 	Conv2D(8, (4, 4), activation='relu')(pool_1_encoded) # (22, 22)
		pool_2_encoded = 	MaxPooling2D((2, 2), padding='same')(conv_2_encoded) # (11, 11)
		conv_3_encoded = 	Conv2D(8, (4, 4), activation='relu')(pool_2_encoded) # (8, 8)
		reshaped_encoded = 	Flatten()(conv_3_encoded) # (1,64)
		dense_0_encoded = 	Dense(8)(reshaped_encoded) # (1,8)

		z_mean = Dense(2)(dense_0_encoded)
		z_log_var = Dense(2)(dense_0_encoded)

		def sampling(args):
		    z_mean, z_log_var = args
		    epsilon = K.random_normal(shape=(K.shape(z_mean)[0], 2), mean=0., stddev=1.0)
		    return z_mean + K.exp(z_log_var) * epsilon

		z = Lambda(sampling, output_shape=(2,))([z_mean, z_log_var]) # (1,2)

		# define layers
		decoder_dense_0 = 		Dense(8) # (1,8)
		decoder_dense_1 = 		Dense(8 * 8) # (1,64)
		decoder_reshaped = 		Reshape([8, 8, 1]) # (8,8)
		decoder_deconv_0 = 		Conv2D(8, (3, 3), activation='relu', padding='same') # (8, 8)
		decoder_up_0 = 			UpSampling2D((2, 2)) # (16, 16)
		decoder_deconv_1 = 		Conv2D(16, (3, 3), activation='relu') # (14, 14)
		decoder_up_1 = 			UpSampling2D((2, 2)) # (28, 28)
		decoder_deconv_2 = 		Conv2D(16, (4, 4), activation='relu') # (25, 25)
		decoder_up_2 = 			UpSampling2D((2, 2)) # (50, 50)
		decoder_deconv_3 = 		Conv2D(16, (3, 3), activation='relu', padding='same') # (50, 50)
		decoder_up_3 = 			UpSampling2D((2, 2)) # (100, 100)
		decoder_output_img = 	Conv2D(3, (3, 3), activation='sigmoid', padding='same') # (100, 100)

		# instantiate layers for training
		dense_0_decoded = 	decoder_dense_0(z) # (1,8)
		dense_1_decoded =	decoder_dense_1(dense_0_decoded) # (1,64)
		reshaped_decoded = 	decoder_reshaped(dense_1_decoded) # (8,8)
		deconv_0_decoded = 	decoder_deconv_0(reshaped_decoded) # (8, 8)
		up_0_decoded = 		decoder_up_0(deconv_0_decoded) # (16, 16)
		deconv_1_decoded = 	decoder_deconv_1(up_0_decoded) # (14, 14)
		up_1_decoded = 		decoder_up_1(deconv_1_decoded) # (28, 28)
		deconv_2_decoded = 	decoder_deconv_2(up_1_decoded) # (25, 25)
		up_2_decoded = 		decoder_up_2(deconv_2_decoded) # (50, 50)
		deconv_3_decoded = 	decoder_deconv_3(up_2_decoded) # (50, 50)
		up_3_decoded = 		decoder_up_3(deconv_3_decoded) # (100, 100)
		output_img = 		decoder_output_img(up_3_decoded) # (100, 100)

		# instantiate layers for test-time generation from latent space samples
		latent = 				Input(shape=(self.latent_size,))
		_dense_0_decoded = 		decoder_dense_0(latent) # (1,8)
		_dense_1_decoded =		decoder_dense_1(_dense_0_decoded) # (1,64)
		_reshaped_decoded = 	decoder_reshaped(_dense_1_decoded) # (8,8)
		_deconv_0_decoded = 	decoder_deconv_0(_reshaped_decoded) # (8, 8)
		_up_0_decoded = 		decoder_up_0(_deconv_0_decoded) # (16, 16)
		_deconv_1_decoded = 	decoder_deconv_1(_up_0_decoded) # (14, 14)
		_up_1_decoded = 		decoder_up_1(_deconv_1_decoded) # (28, 28)
		_deconv_2_decoded = 	decoder_deconv_2(_up_1_decoded) # (25, 25)
		_up_2_decoded = 		decoder_up_2(_deconv_2_decoded) # (50, 50)
		_deconv_3_decoded = 	decoder_deconv_3(_up_2_decoded) # (50, 50)
		_up_3_decoded = 		decoder_up_3(_deconv_3_decoded) # (100, 100)
		_output_img = 			decoder_output_img(_up_3_decoded) # (100, 100)
		
		# define the 3 models
		self.autoencoder =	Model(input_img, output_img)
		self.encoder = 		Model(input_img, z_mean)
		self.decoder = 		Model(latent, _output_img)

		xent_loss = metrics.binary_crossentropy(K.flatten(input_img), K.flatten(output_img))
		kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
		vae_loss = K.mean(xent_loss + self.beta*kl_loss)
		self.autoencoder.add_loss(vae_loss)
コード例 #33
0
ファイル: clean_inwork.py プロジェクト: fgimbert/Hieroglyphs
def hieroRecoModel_online(input_shape):
    """
    Implementation of the Inception model used for FaceNet

    Arguments:
    input_shape -- shape of the images of the dataset

    Returns:
    model -- a Model() instance in Keras
    """

    #Import VGG19 model for transfer learning without output layers
    vgg_model = applications.VGG19(weights = "imagenet", include_top=False, input_shape = input_shape)

    # Freeze the layers except the last 4
    for layer in vgg_model.layers[:-4]:
        layer.trainable = False

    # Check the layers
    for layer in vgg_model.layers:
        print(layer, layer.trainable)

    X_input = vgg_model.output

    # Adding custom Layers

    X = Flatten()(X_input)
    X = Dense(512, activation="relu")(X)
    X = Dropout(0.5)(X)
    X = Dense(128, activation="relu")(X)

    # L2 normalization
    X = Lambda(lambda x: K.l2_normalize(x, axis=1))(X)


    # Create model instance
    #model = Model(inputs=vgg_model.input, outputs=X, name='HieroRecoModel')
    features = Model(vgg_model.input, X, name="features")

    # Inputs of the siamese network

    anchor = Input(shape=input_shape)
    positive = Input(shape=input_shape)
    negative = Input(shape=input_shape)

    # Embedding Features of input

    anchor_features = features(anchor)
    pos_features = features(positive)
    neg_features = features(negative)

    input_triplet = [anchor, positive, negative]
    output_features = [anchor_features, pos_features, neg_features]

    # Define the trainable model
    loss_model = Model(inputs=input_triplet, outputs=output_features, name='loss')
    loss_model.add_loss(K.mean(triplet_loss(output_features)))
    loss_model.compile(loss=None, optimizer='adam')

    # Create model instance
    # model = Model(inputs=X_input, outputs=X, name='HieroRecoModel_off')

    return features, loss_model
コード例 #34
0
ファイル: attack.py プロジェクト: mabouhashem/EvadeGAN
class EvadeGAN:

    def __init__(self, target_model, x_dim=10000, z_dim=100, g_input='xz',
                 g_params={}, d_params={}, d_compile_params={},
                 gan_compile_params={}, summary=False, bin_threshold=0.5):
        self.graph = tf.compat.v1.get_default_graph()
        self.target_model = self.TargetModel(target_model)
        self.x_dim = x_dim
        self.z_dim = z_dim
        self.bin_threshold = bin_threshold
        self.g_input = g_input
        if self.g_input == 'z':
            self.name = 'EvadeGANz'
            self.setting = "Sample-Independent Perturbations Z"
            self.save_dir = 'EvadeGANz/'
        elif self.g_input == 'x':
            self.name = 'EvadeGANx'
            self.setting = "Sample-Dependent Perturbations X"
            self.save_dir = 'EvadeGANx/'
        else:
            self.name = 'EvadeGANxz'
            self.setting = "Sample-Dependent Perturbations XZ"
            self.save_dir = 'EvadeGANxz/'

        if summary:
            print(f"Summary of {self.name} Models [{self.setting}]:\n"
                  + '=' * 62 + '\n')

        # Build the generator
        self.generator = self.build_generator(**g_params, summary=summary)

        # Build & compile the discriminator
        self.discriminator = self.build_discriminator(**d_params,
                                                      **d_compile_params,
                                                      summary=summary)

        # Build & compile the adversarial network, GAN
        self.GAN = self.build_GAN(**gan_compile_params, summary=summary)

        # Combine logs
        self.log_params = {'G': [self.g_log],
                           'D': [self.d_log],
                           'GAN': [self.gan_log]}

    def build_generator(self, n_hidden=256, h_activation='relu',
                        regularizers={}, batchnorm=False,
                        out_activation='sigmoid',
                        drop_rate=0, summary=False):
        """Builds a generator using the passed hyperparameters"""

        # Input: xz, z, or x
        x = Input(shape=(self.x_dim,), name='g_x_input')
        z = Input(shape=(self.z_dim,), name='g_z_input')
        if self.g_input == 'z':
            g_input = z
        elif self.g_input == 'x':
            g_input = x
        else:
            g_input = Concatenate(axis=1, name='g_xz_input')([x, z])

        # Hidden
        hidden = Dense(n_hidden,
                       activation=h_activation,
                       name='g_hidden_relu')(g_input)

        if batchnorm:
            hidden = BatchNormalization(name='g_hidden_bn')(hidden)

        perturb = Dense(self.x_dim,
                        activation=out_activation,
                        **regularizers,
                        name='g_perturb_sigmoid')(hidden)

        # Dropout
        perturb = Dropout(drop_rate, name='perturb_dropout')(perturb)
        perturb = K.minimum(perturb,
                            1)  # NB: dropout scales up the kept inputs,
        # so clip to stay <=1 (for max later)..
        # use K.clip Or K.minimum(perturb, 1)
        # Output
        x_adv = Maximum(name='g_adv_max')([perturb, x])

        self.generator = Model([x, z], x_adv, name='Generator')
        if summary:
            self.generator.summary();
            print()

        # G parameters for logging
        self.reg = get_reg_factors(regularizers)
        self.g_log = {'in': self.g_input,
                      'h': f'({n_hidden},{h_activation})', 'bn': batchnorm,
                      'reg': self.reg, 'drop': drop_rate}

        return self.generator

    def build_discriminator(self, n_hidden=256, h_activation=None,
                            h_constraint=None, out_activation='sigmoid',
                            summary=False,
                            loss='binary_crossentropy', metrics=['accuracy'],
                            optimizer=Nadam(lr=0.001, clipvalue=1.0)):
        """Builds a discriminator using the passed hyperparameters"""
        x = Input(shape=(self.x_dim,), name='d_x_input')
        hidden = Dense(n_hidden, activation=h_activation,
                       kernel_constraint=h_constraint, name='d_hidden_linear')(
            x)
        pred = Dense(1, activation=out_activation, name='d_pred')(hidden)
        self.discriminator = Model(x, pred, name="Disriminator")
        if summary:
            self.discriminator.summary();
            print()

        self.discriminator.compile(loss=loss,
                                   optimizer=optimizer,
                                   metrics=metrics)

        self.discriminator.trainable = False

        # D parameters, for logging
        self.d_log = {"loss": 'bce' if loss == 'binary_crossentropy' else loss,
                      "opt": {type(optimizer).__name__:
                                  (optimizer.lr.numpy(), optimizer.clipvalue)}}

        return self.discriminator

    def build_GAN(self, loss='binary_crossentropy', metrics=['accuracy'],
                  optimizer=Nadam(lr=0.001, clipvalue=1.0), beta=1.0,
                  normalise_loss=False, target_label=0,
                  bound_func='mean', max_changes=12, summary=False):
        """Builds an adversarial netowrk GAN using the passed hyperparameters"""

        x = Input(shape=(self.x_dim,), name='gan_x_input')
        z = Input(shape=(self.z_dim,), name='gan_z_input')
        x_adv = self.generator([x, z])

        self.discriminator.trainable = False

        y_pred = self.discriminator(x_adv)  # predictions

        self.GAN = Model([x, z], y_pred, name='GAN')

        if summary:
            self.GAN.summary();
            print()

        # Binarise to get a valid sample
        x_adv_bin = binarise(x_adv, self.bin_threshold)

        # Optional: Minimise the score of the target model (Add to loss)
        # Target label (goodware)
        # y_target = target_label * ones(x_adv.get_shape().as_list()[0])
        # loss_target_model = self.target_model.score(x_adv_bin, y_target)
        # self.GAN.add_loss(loss_target_mode)

        # Reduction function for the bound loss: mean or max (more restrictive)
        reduce_func = tf.reduce_max if bound_func == 'max' else tf.reduce_mean

        # Whether to scale the bound loss to the range [0, 1]
        scale = 1 / self.x_dim if normalise_loss else 1.0
        loss_bound = \
            reduce_func(
                tf.maximum(0.0,  # OR tf.zeros((tf.shape(x_adv)[0])),
                           tf.norm((x_adv_bin - x), ord=1,
                                   axis=1) - max_changes) * scale)

        # combined_loss = alpha*loss_target_model + beta*loss_changes

        self.GAN.add_loss(beta * loss_bound)

        self.GAN.compile(loss='binary_crossentropy',
                         optimizer=optimizer,
                         metrics=metrics)

        self.gan_log = {
            "loss": f'custom({bound_func}, {max_changes}, {beta})',
            "opt": {type(optimizer).__name__:
                        (optimizer.lr.numpy(), optimizer.clipvalue)}}

        return self.GAN

    class TargetModel:
        def __init__(self, model):
            # Get model parameters (weights & intercept)
            if type(model) == LinearSVC:
                w = model.coef_.flatten()
            elif type(model) == SVC:
                w = model.coef_.toarray().flatten()

            b = model.intercept_[0]

            self.weights = tf.Variable([w], dtype=tf.float32, trainable=False)
            self.intercept = tf.Variable(b, dtype=tf.float32, trainable=False)
            self.classes = tf.Variable(model.classes_, trainable=False)
            self.accuracy = tf.keras.metrics.BinaryAccuracy()

        def predict(self, X):
            # Get decision function
            # X = tf.convert_to_tensor(X, dtype=tf.float32) # in case no tensor
            scores = K.dot(X, tf.transpose(self.weights)) + self.intercept

            # Classify
            idx = tf.cast(scores > 0, tf.int32)
            # y_pred = K.get_value(self.classes)[idx]     # Class Label, eager
            y_pred = tf.gather(self.classes, idx)
            return y_pred

        def score(self, X, y_target):
            y_pred = self.predict(X)
            self.accuracy.update_state(y_pred, y_target)
            return self.accuracy.result()

    def train(self, target_model, X_mal_train, X_mal_test, X_good_train,
              X_good_test, mal_label=1, good_label=0, earlystop=False,
              zmin=0, zmax=1, epochs=500, batch_size=32, combined_d_batch=False,
              d_train_mal=False, d_train_adv=True, good_batch_factor=1,
              d_times=1, gan_times=1, n_progress=1, minTPR_threshold=0,
              max_changes=np.inf, gan_dir=GAN_DIR, smooth_alpha=1.0,
              sample_train=True):
        """
        Performs GAN training.
        :param target_model: The target model of the evasion attack
        :param X_mal_train: The malware training set
        :param X_mal_test: The malware test set
        :param X_good_train: The goodware training set
        :param X_good_test: The goodware test set
        :param mal_label: The label for the malware class (original label)
        :param good_label: The label for the goodware class (target label)
        :param zmin: The lower bound of the random noise
        :param zmax: The upper bound of the random noise
        :param epochs: The number of training epochs
        :param batch_size: The size of a training batch
        :param d_train_mal: Whether to train the disciminator on malware.
        :param combined_d_batch: Whether to train the discriminator on one batch
                  that combine all classes or train on each eparately
        :param good_batch_factor: The size ratio of a goodware batch compared
                  to that of a malware batch.
        :param d_times: The number of times to train the discriminator in each
                  iteration.
        :param gan_times: The number of times to train the GAN in each iteration
        :param n_progress: The number of epochs with no improvement/output after
                  which print ouput to check for progress.
        :param minTPR_threshold: The threshold to which we wish to minimise the
                  the True Positive Rate (TPR).
        :param max_changes: A constraint on the maximum number of changes in
                  generated adversarial examples (AEs)
        :return: tuple (
                    TPR_train: The list of TPR scores on the training set at
                                each epoch,
                    TPR_test: The list of TPR scores on the test set at each
                              epoch,
                    avg_diff_train: The list of avg changes in AEs generated
                              from training set at each epoch,
                    avg_diff_test: The list of avg changes in AEs generated
                              from the test set at each epoch,
                    d_metrics: The list of the discriminator metrics
                              [loss, accuracy] at each epoch,
                    gan_metrics: The list of the GAN metrics
                              [loss, accuracy] at each epoch,
                    best_G_path: The path to the best performing G model
                  )
        """

        g_batch_size = good_batch_factor * batch_size

        # Metrics accumulators
        d_metrics = []
        gan_metrics = []

        # Initial TPR on the training & test sets
        TPR_train = [target_model.score(X_mal_train,
                                        mal_label * ones(X_mal_train.shape[0]))]
        TPR_test = [target_model.score(X_mal_test,
                                       mal_label * ones(X_mal_test.shape[0]))]
        minTPR = 1.0
        minTPR_avg_changes = -1
        minTPR_max_changes = -1
        min_epoch = output_epoch = 0
        best_G_path = None

        print(f"Initial TPR on the training set: {TPR_train}")
        print(f"Initial TPR on the test set: {TPR_test}\n")

        # Average changes (perturbations) in adversarial examples
        avg_diff_train = []
        avg_diff_test = []

        # IDs for plots
        plot_id = 1
        gan_id = 1
        tpr_id = 1

        t1 = time.perf_counter()

        for epoch in range(epochs):
            # Generate batches of size (gan_times * batch_size)
            X_mal_batches = batch(X_mal_train, gan_times * batch_size,
                                  seed=epoch)
            # Epoch metrics accumulators
            d_metrics_epoch = np.empty((0, 2))
            gan_metrics_epoch = np.empty((0, 2))

            for X_mal_batch in X_mal_batches:
                ################################################################
                # Train the discriminator for d_times iterations
                ################################################################
                # Generate minibatches of size batch_size
                minibatches = batch(X_mal_batch, batch_size, seed=epoch)
                d_metrics_batch = np.empty((0, 2))
                # Train for d_times
                for i in range(d_times):
                    # __could reseed with (epoch + i) for reproducibility__
                    X_mal = next(minibatches, None)  # Use these batches first
                    if X_mal is None:  # Then generate randomly
                        X_mal = rand_batch(X_mal_train, batch_size)

                    Y_mal = smooth_alpha * mal_label * ones(
                        X_mal.shape[0])  # Smooth

                    noise = np.random.uniform(zmin, zmax,
                                              size=[batch_size, self.z_dim])

                    # Generate adversarial examples
                    X_adv = self.generator.predict([X_mal, noise])
                    X_adv = binarise(X_adv, self.bin_threshold)
                    Y_adv = target_model.predict(X_adv)
                    Y_adv[
                        Y_adv == mal_label] = smooth_alpha * mal_label  # Smooth

                    X_good = rand_batch(X_good_train, g_batch_size)
                    Y_good = good_label * ones(X_good.shape[0])     # Good_Label

                    # Train the discriminator
                    self.discriminator.trainable = True

                    if combined_d_batch:
                        # *** Train once on a combined batch ****
                        X = X_good
                        Y = Y_good
                        if d_train_mal:
                            X = np.concatenate((X, X_mal))
                            Y = np.concatenate((Y, Y_mal))
                        if d_train_adv:
                            X = np.concatenate((X, X_adv))
                            Y = np.concatenate((Y, Y_adv))
                        metrics = self.discriminator.train_on_batch(X, Y)
                    else:
                        # ** Train on separate batches & combine metrics **
                        metrics_good = self.discriminator.train_on_batch(X_good,
                                                                         Y_good)
                        metrics_mal = self.discriminator.train_on_batch(X_mal,
                                                                        Y_mal) \
                            if d_train_mal else [np.nan, np.nan]
                        metrics_adv = self.discriminator.train_on_batch(X_adv,
                                                                        Y_adv) \
                            if d_train_adv else [np.nan, np.nan]
                        # Avg metrics
                        metrics = np.nanmean(np.array([metrics_mal,
                                                       metrics_good,
                                                       metrics_adv]), axis=0)

                    # Accumulate metrics for d_times iterations
                    d_metrics_batch = np.vstack((d_metrics_batch, metrics))

                # Average the metrics of all d_times iterations
                d_metrics_batch = np.mean(d_metrics_batch, axis=0)
                # Add to discriminator metrics for this epoch
                d_metrics_epoch = np.vstack((d_metrics_epoch, metrics))

                ################################################################
                # Train the Generator
                ################################################################
                # Generate minibatches of size batch_size
                minibatches = batch(X_mal_batch, batch_size, seed=epoch)
                gan_metrics_batch = np.empty((0, 2))
                # Train for gan_times
                for i in range(gan_times):
                    # Number of minibatches should be exactly gan_times
                    X_mal = next(minibatches, None)
                    if X_mal is None:  # Just in case, generate randomly
                        X_mal = rand_batch(X_mal_train, batch_size)

                    noise = np.random.uniform(zmin, zmax, size=[batch_size,
                                                                self.z_dim])
                    self.discriminator.trainable = False

                    # Train with target label = GOOD_LABEL
                    metrics = self.GAN.train_on_batch([X_mal, noise],  # <<<<
                                                      good_label * ones(
                                                          X_mal.shape[0]))
                    # discriminator.trainable = True

                    # Accumulate metrics for gan_times iterations
                    gan_metrics_batch = np.vstack((gan_metrics_batch, metrics))

                # Average the metrics of all gan_times iterations
                gan_metrics_batch = np.mean(gan_metrics_batch, axis=0)
                # Add to the generator metrics for this epoch
                gan_metrics_epoch = np.vstack((gan_metrics_epoch, metrics))

            # Average metrics of each epoch
            d_metrics.append(np.mean(d_metrics_epoch, axis=0).tolist())
            gan_metrics.append(np.mean(gan_metrics_epoch, axis=0).tolist())
            gan_loss = gan_metrics[-1][0]

            # TPR on adversarial training set
            noise = np.random.uniform(zmin, zmax, (X_mal_train.shape[0],
                                                   self.z_dim))
            X_adv_train = binarise(self.generator.predict([X_mal_train, noise]),
                                   self.bin_threshold)
            # Score with target label = MAL_LABEL
            Y_adv_train = mal_label * ones(X_adv_train.shape[0])  # MAL_LABEL
            TPR = target_model.score(X_adv_train, Y_adv_train)
            TPR_train.append(TPR)

            # Changes (L1 norms) in the adversarial training set
            diff_train = norm((X_adv_train - X_mal_train), ord=1, axis=1)
            avg_diff_train_current = np.mean(diff_train)
            max_diff_train_current = np.max(diff_train)
            avg_diff_train.append(avg_diff_train_current)

            # TPR on adversarial test set
            noise = np.random.uniform(zmin, zmax, (X_mal_test.shape[0],
                                                   self.z_dim))

            X_adv_test = binarise(self.generator.predict([X_mal_test, noise]),
                                  self.bin_threshold)
            Y_adv_test = mal_label * ones(X_adv_test.shape[0])  # MAL_LABEL
            TPR = target_model.score(X_adv_test, Y_adv_test)
            TPR_test.append(TPR)

            # Changes (L1 norms) in the adversarial test set
            diff_test = norm((X_adv_test - X_mal_test), ord=1, axis=1)
            avg_diff_test_current = np.mean(diff_test)
            max_diff_test_current = np.max(diff_test)
            avg_diff_test.append(avg_diff_test_current)

            # Output progress if TPR has decreased (improved evasion)
            # ... or if TPR is the same but avg changes have decreased
            if (TPR < minTPR) or \
                (TPR == minTPR and avg_diff_test_current < minTPR_avg_changes):  # check avg or max
                print("\n>>>> New Best Results: "
                      f"Previous minTPR: [{minTPR:.8f}] ==> "
                      f"New minTPR: [{TPR:0.8f}] "
                      f"GAN Loss: [{gan_loss:.8f}]  <<<<")
                output_progress(epoch, TPR_train, TPR_test,
                                diff_train, diff_test)
                minTPR = TPR
                min_epoch = output_epoch = epoch
                minTPR_avg_changes = avg_diff_test_current
                minTPR_max_changes = max_diff_test_current
                minTPR_std = np.std(diff_test)
                minTPR_quantiles = np.quantile(diff_test, [0.25, 0.5, 0.75])

                # Save weights
                minTPR_weights_path = \
                    (gan_dir + self.save_dir + 'weights/' +
                     f'GAN_minTPR_weights_epoch_{epoch}_'
                     f'TPR_{minTPR:.2f}_dtimes_{d_times}_changes_'
                     f'{avg_diff_test_current:.0f}_actReg_{self.reg[0]}_'
                     + time.strftime("%m-%d_%H-%M-%S") + '.h5')
                self.GAN.save_weights(minTPR_weights_path)

                # Generate and plot a sample of AEs
                sample_sz = 10
                sample_noise = np.random.uniform(zmin, zmax, size=[sample_sz,
                                                                   self.z_dim])

                if sample_train:  # Sample from training
                    sample_mal = rand_batch(X_mal_batch, sample_sz)
                else:  # Sample from test set
                    sample_mal = np.asarray(rand_batch(X_mal_test, sample_sz))

                plot_sample(sample_mal, sample_noise, self.generator,
                            target_model, epoch, TPR_train=TPR_train,
                            TPR_test=TPR_test, params=self.log_params,
                            avg_changes=avg_diff_test_current,
                            m_label=mal_label, g_label=good_label,
                            annotate=False, out_dir=ADV_DIR, plot_id=plot_id)
                plot_id = plot_id + 1

                if minTPR <= minTPR_threshold:
                    print(
                        "\n" + "#" * 150 + "\n"
                        f"# Target Evasion Rate {100 * (1 - TPR):.2f}% "
                        f"achieved at epoch [{epoch}], "
                        f"with avg {avg_diff_test_current:.1f} "
                        f"& max {max_diff_test_current:.1f} changes per sample "
                        f"(on the test set) ... "
                        f"GAN Loss: [{gan_loss:.8f}]"
                        "\n" + "#" * 150 + "\n"
                    )

                    if minTPR_avg_changes <= max_changes:
                        print("Training CONVERGED. "
                            "Target Evasion Rate achieved within max changes..."
                            "TRAINING ENDS HERE #")
                        # Save generator
                        best_G_path = \
                            (gan_dir + self.save_dir + 'models/' +
                            f'G_Target_TPR_epoch_{epoch}_'
                            f'TPR_{minTPR:.2f}_dtimes_{d_times}_changes_'
                            f'{avg_diff_test_current:.0f}_actReg_{self.reg[0]}_'
                            + time.strftime("%m-%d_%H-%M-%S") + '.h5')
                        self.generator.save(best_G_path)

                        if earlystop:
                            break

            # If no better than minTPR, but still achieved target evasion, ...
            elif TPR <= minTPR_threshold:
                # output_epoch = epoch
                print(
                    "\n" + "#" * 150 + "\n"
                    f"# Target Evasion Rate {100 * (1 - TPR):.2f}% "
                    f"achieved at epoch [{epoch}] "
                    f"with avg {avg_diff_test_current:.1f} "
                    f"and max {max_diff_test_current:.1f} changes per sample "
                    f"(on the test set) ... "
                    f"GAN Loss: [{gan_loss:.8f}]"
                    "\n" + "#" * 150 + "\n"
                )

                # Save weights
                weights_path = \
                    (gan_dir + self.save_dir + 'weights/' +
                     f'GAN_minTPR_weights_epoch_{epoch}_'
                     f'TPR_{minTPR:.2f}_dtimes_{d_times}_changes_'
                     f'{avg_diff_test_current:.0f}_actReg_{self.reg[0]}_'
                     + time.strftime("%m-%d_%H-%M-%S") + '.h5')
                # self.GAN.save_weights(file_path)

                # If within max changes
                if avg_diff_test_current <= max_changes:  # check avg or max?
                    print("Target Evasion Rate achieved within max changes...")
                    # Save model
                    model_path = \
                        (gan_dir + self.save_dir + 'models/' +
                        f'GAN_Target_TPR_epoch_{epoch}_'
                        f'TPR_{minTPR:.2f}_dtimes_{d_times}_changes_'
                        f'{avg_diff_test_current:.0f}_actReg_{self.reg[0]}_'
                        + time.strftime("%m-%d_%H-%M-%S") + '.h5')
                    # self.GAN.save(model_path)
                    if earlystop:
                        break
                else:
                    print()
                    # Maybe adjust weights
                    # print("Should we adjust regulizers?")
                    # generator.layers[-2].rate *= 0.1
                    # generator.layers[-3].activity_regularizer.l1 *= 0.1
                    # generator.layers[-3].activity_regularizer.l2 *= 0.1
                    # weights = generator.get_weights()
                    # generator = keras.models.clone_model(generator)
                    # generator.set_weights(weights)
                    # Adapt regularisation weights
                    # K.set_value(l1_factor, 0.1*l1_factor)
                    # K.set_value(l2_factor, 0.1*l2_factor)

            if (epoch + 1 - output_epoch) > n_progress:
                # If no new imporovement for for a while, output progress
                output_epoch = epoch
                print(f"\n*** Checking progress *** "
                      f"GAN Loss: [{gan_loss:.8f}] ***")
                output_progress(epoch, TPR_train, TPR_test,
                                diff_train, diff_test)

                # Generate and plot a sample of AEs
                sample_sz = 10
                sample_noise = np.random.uniform(zmin, zmax, size=[sample_sz,
                                                                   self.z_dim])

                sample_mal = rand_batch(X_mal_batch, sample_sz)

                plot_sample(sample_mal, sample_noise, self.generator,
                            target_model, epoch, TPR_train=TPR_train,
                            TPR_test=TPR_test, params=self.log_params,
                            avg_changes=avg_diff_test_current,
                            m_label=mal_label, g_label=good_label,
                            annotate=False, out_dir=ADV_DIR, plot_id=plot_id)
                plot_id = plot_id + 1

        t2 = time.perf_counter()
        print("\n\n" + "#" * 165 + "\n"
            f"# Finished {epoch + 1} epochs in {(t2 - t1) / 60:.2f} minutes\n"
            f"# Best Evastion Rate = {100 * (1 - minTPR):.4f}% "
            f"(lowest TPR = {100 * minTPR:.4f}%) "
            f"achieved after {min_epoch + 1} epochs, with avg "
            f"{minTPR_avg_changes:.1f} \u00b1 SD({minTPR_std:.1f}) | "
            f" Q1-3  {minTPR_quantiles.astype(int).tolist()} | "
            f" and max {minTPR_max_changes:.1f} "
            f"changes per sample.\n"
            + "#" * 165 + "\n\n")

        return TPR_train, TPR_test, \
               avg_diff_train, avg_diff_test, \
               d_metrics, gan_metrics, \
               best_G_path
コード例 #35
0
ファイル: conv_vae_2.py プロジェクト: crvogt/CodeDebauchery
outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae')


models = (encoder, decoder)
data = (x_test, y_test)

reconstruction_loss = binary_crossentropy(K.flatten(inputs),
                                              K.flatten(outputs))

reconstruction_loss *= image_size * image_size
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer='rmsprop')

# train the autoencoder
vae.fit(x_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(x_test, None))

decoded_imgs = vae.predict(x_test)

#How many digits we will display
n = 10 
plt.figure(figsize=(20, 4))
for i in range(n):
    #display original
コード例 #36
0
def build_gan(h=128,
              w=128,
              c=3,
              latent_dim=2,
              epsilon_std=1.0,
              dropout_rate=0.1,
              GRADIENT_PENALTY_WEIGHT=10):

    optimizer_g = AdamWithWeightnorm(lr=0.0001, beta_1=0.5)
    optimizer_d = AdamWithWeightnorm(lr=0.0001, beta_1=0.5)

    t_h, t_w = h // 16, w // 16
    generator = residual_decoder(t_h,
                                 t_w,
                                 c=c,
                                 latent_dim=latent_dim,
                                 dropout_rate=dropout_rate)

    discriminator = residual_discriminator(h=h,
                                           w=w,
                                           c=c,
                                           dropout_rate=dropout_rate)
    for layer in discriminator.layers:
        layer.trainable = False
    discriminator.trainable = False

    generator_input = Input(shape=(latent_dim, ))
    generator_layers = generator(generator_input)

    discriminator_layers_for_generator = discriminator(generator_layers)
    generator_model = Model(inputs=[generator_input],
                            outputs=[discriminator_layers_for_generator])
    generator_model.add_loss(K.mean(discriminator_layers_for_generator))
    generator_model.compile(optimizer=optimizer_g, loss=None)

    # Now that the generator_model is compiled, we can make the discriminator layers trainable.
    for layer in discriminator.layers:
        layer.trainable = True
    for layer in generator.layers:
        layer.trainable = False
    discriminator.trainable = True
    generator.trainable = False

    # The discriminator_model is more complex. It takes both real image samples and random noise seeds as input.
    # The noise seed is run through the generator model to get generated images. Both real and generated images
    # are then run through the discriminator. Although we could concatenate the real and generated images into a
    # single tensor, we don't (see model compilation for why).
    real_samples = Input(shape=(h, w, c))
    generator_input_for_discriminator = Input(shape=(latent_dim, ))
    generated_samples_for_discriminator = generator(
        generator_input_for_discriminator)
    discriminator_output_from_generator = discriminator(
        generated_samples_for_discriminator)
    discriminator_output_from_real_samples = discriminator(real_samples)

    averaged_samples = RandomWeightedAverage()(
        [real_samples, generated_samples_for_discriminator])
    averaged_samples_out = discriminator(averaged_samples)

    discriminator_model = Model(
        [real_samples, generator_input_for_discriminator], [
            discriminator_output_from_real_samples,
            discriminator_output_from_generator, averaged_samples_out
        ])
    discriminator_model.add_loss(
        K.mean(discriminator_output_from_real_samples) -
        K.mean(discriminator_output_from_generator) + gradient_penalty_loss(
            averaged_samples_out, averaged_samples, GRADIENT_PENALTY_WEIGHT))
    discriminator_model.compile(optimizer=optimizer_d, loss=None)

    return generator_model, discriminator_model, generator, discriminator