Esempio n. 1
0
    def __init__(self, params):
        self.name = 'Cramer GAN'
        self.params = params
        self.z_dim = params['z_dim']

        data_sampler = sample_mixture_of_gaussians(**params['data'])
        z_sampler = tf.contrib.distributions.Normal(tf.zeros(self.z_dim),
                                                    tf.ones(self.z_dim))
        self.batch_size = tf.placeholder(tf.int32, shape=())

        self.data = data_sampler.sample(self.batch_size)
        data_h = discriminator(self.data, **params['discriminator'])

        self.z = z_sampler.sample(self.batch_size)
        self.samples = generator(self.z, **params['generator'])
        samples_h = discriminator(self.samples,
                                  **params['discriminator'],
                                  reuse=True)

        self.z2 = z_sampler.sample(self.batch_size)
        self.samples2 = generator(self.z2, **params['generator'])
        samples2_h = discriminator(self.samples2,
                                   **params['discriminator'],
                                   reuse=True)

        self.discriminator_vars = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, 'discriminator')
        self.generator_vars = tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, 'generator')

        f = lambda h: tf.sqrt(tf.reduce_sum((h - samples2_h) ** 2, axis=1)) \
                        - tf.sqrt(tf.reduce_sum(h ** 2, axis=1))
        self.discriminator_loss = tf.reduce_mean(f(data_h) - f(samples_h))

        e = tf.contrib.distributions.Uniform().sample([self.batch_size, 1])
        x = e * self.data + (1 - e) * self.samples
        x_h = discriminator(x, **params['discriminator'], reuse=True)
        gradients = tf.gradients(f(x_h), [x])[0]
        gradients_l2 = tf.sqrt(tf.reduce_sum(gradients**2, axis=1))
        gradient_penalty = tf.reduce_mean((gradients_l2 - 1)**2)
        self.discriminator_loss += params['lambda'] * gradient_penalty

        g = lambda h1, h2: tf.sqrt(tf.reduce_sum((h1 - h2)**2, axis=1))
        self.generator_loss = tf.reduce_mean(g(data_h, samples_h) \
                                + g(data_h, samples2_h) \
                                - g(samples_h, samples2_h))

        self._init_optimization()
Esempio n. 2
0
    def __init__(self, params):
        self.params = params
        self.z_dim = params['z_dim']

        data_sampler = sample_mixture_of_gaussians(**params['data'])
        z_sampler = tf.contrib.distributions.Normal(tf.zeros(self.z_dim), tf.ones(self.z_dim))
        self.batch_size = tf.placeholder(tf.int32, shape=())

        self.data = data_sampler.sample(self.batch_size)
        data_score = discriminator(self.data, **params['discriminator'])

        self.z = z_sampler.sample(self.batch_size)
        self.samples = generator(self.z, **params['generator'])
        samples_score = discriminator(self.samples, **params['discriminator'], reuse=True)

        self.discriminator_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'discriminator')
        self.generator_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator')

        self.discriminator_loss = -tf.reduce_mean(data_score - samples_score)
        if params['gradient_penalty']:
            self.gradient_penalty = True
            self.name = 'WGAN gradient penalty'
            e = tf.contrib.distributions.Uniform().sample([tf.shape(self.data)[0], 1])
            x = e * self.data + (1 - e) * self.samples
            x_score = discriminator(x, **params['discriminator'], reuse=True)
            gradients = tf.gradients(x_score, [x])[0]
            gradients_l2 = tf.sqrt(tf.reduce_sum(gradients ** 2, axis=1))
            gradient_penalty = tf.reduce_mean((gradients_l2 - 1) ** 2)
            self.discriminator_loss += params['lambda'] * gradient_penalty
        else:
            self.gradient_penalty = False
            self.name = 'WGAN'

        self.generator_loss = -tf.reduce_mean(samples_score)

        self._init_optimization()
##################
# end-to-end model
# maximum number of bytes we allow per file = 256KB, power of two for faster evaluation on endpoint
max_file_length = int(2**18)
file_chunks = 16  # break file into this many chunks
file_chunk_size = max_file_length // file_chunks
batch_size = 4

import malwaresnet
import math
from keras.callbacks import LearningRateScheduler

model_malwaresnet = malwaresnet.create_model(input_shape=(file_chunks,
                                                          file_chunk_size),
                                             byte_embedding_size=2)
train_generator = common.generator(list(zip(sha256_train, y_train)),
                                   batch_size, file_chunks, file_chunk_size)
test_generator = common.generator(list(zip(sha256_test, y_test)), 1,
                                  file_chunks, file_chunk_size)
model_malwaresnet.fit_generator(
    train_generator,
    steps_per_epoch=math.ceil(len(sha256_train) / batch_size),
    epochs=20,
    callbacks=[
        LearningRateScheduler(lambda epoch: common.schedule(
            epoch, start=0.1, decay=0.5, every=1))
    ],
    validation_data=test_generator,
    validation_steps=len(sha256_test))
y_pred = []
for sha256, lab in zip(sha256_test, y_test):
    y_pred.append(
Esempio n. 4
0
##################
# end-to-end model
# maximum number of bytes we allow per file = 512KB
max_file_length = int(2**19)
file_chunks = 8  # break file into this many chunks
file_chunk_size = max_file_length // file_chunks
batch_size = 8

import endtoend
import math
from keras.callbacks import LearningRateScheduler

# create_model(input_shape, byte_embedding_size=2, input_dropout=0.2, kernel_size=16, n_filters_per_layer=[64,256,1024], n_mlp_layers=2 )
model_e2e = endtoend.create_model(input_shape=(file_chunks, file_chunk_size))
train_generator = common.generator(list(zip(sha256_train, y_train)), batch_size, file_chunks, file_chunk_size)
test_generator = common.generator(list(zip(sha256_test, y_test)), 1, file_chunks, file_chunk_size)
model_e2e.fit_generator(train_generator,
                        steps_per_epoch=math.ceil(len(sha256_train) / batch_size),
                        epochs=20,
                        callbacks=[LearningRateScheduler(
                            lambda epoch: common.schedule(epoch, start=0.1, decay=0.5, every=1)
                            )
                        ],
                        validation_data=test_generator,
                        validation_steps=len(sha256_test))
y_pred = []
for sha256, lab in zip(sha256_test, y_test):
    y_pred.append(
        model_e2e.predict_on_batch(
            np.asarray([get_file_data(sha256, lab)]).reshape(
Esempio n. 5
0
if modelConf["dropout_1"]:
    model.add(Dropout(0.5))
model.add(Dense(24 * modelConf["dense_1_factor"], activation='relu'))
if modelConf["dropout_2"]:
    model.add(Dropout(0.5))
model.add(Dense(12 * modelConf["dense_2_factor"], activation='relu'))
if modelConf["dense_3"]:
    if modelConf["dropout_3"]:
        model.add(Dropout(0.5))
    model.add(Dense(6 * modelConf["dense_3_factor"], activation='relu'))
model.add(Dense(1))

model.compile(loss='mse', optimizer='adam')

history = model.fit_generator(
    generator(partition['train']),
    TRAIN_STEPS_PER_EPOCH,
    epochs=15,
    verbose=2,
    validation_data=generator(partition['validation']),
    validation_steps=VAL_STEPS_PER_EPOCH,
)

model.save('model.h5')

# The following lines are uncommented when doing hyperparameter search; they produce logs of training runs

# modelConf["history"] = history.history
# import uuid
# unique_filename = str(uuid.uuid4())
# with open(os.path.join("history", unique_filename), "w") as history_file:
Esempio n. 6
0
    # center_loss = tf.reduce_mean(tf.square(norm_diff))
    return center_loss, centers


def safe_norm(x, axis=None, keepdims=False, eps=1e-10):
    return tf.sqrt(tf.reduce_sum(x**2, axis=axis, keepdims=keepdims) + eps)


def safe_sigma(x, eps=1e-10):
    return tf.sqrt(tf.reduce_mean(tf.square(x) + eps))
    #return tf.reduce_mean(x + eps)


###################################################################################################################

fake_datas = generator(cfg.iBatchSize, cfg)
fc, rf = discriminator(tf.concat([real_datas, fake_datas], axis=0), cfg)
labels = tf.zeros([cfg.iBatchSize * 2], tf.int32)
center_loss, centers = get_centers(fc, labels, 1)
fc = fc - tf.gather(centers, labels)

rf_norm = tf.nn.l2_normalize(rf, 1)
radius = safe_norm(fc, axis=1)
pred_radius = tf.sqrt(tf.reduce_mean(tf.square(radius)))
dis_radius_loss = tf.losses.huber_loss(
    tf.ones_like(radius) * pred_radius, radius)

global_step = tf.Variable(0, trainable=False, name='global_step')

fc = tf.nn.l2_normalize(fc, 1)
fc_proj = rf_norm * tf.reduce_sum(rf_norm * fc, 1, keepdims=True)
import cv2
from keras.models import load_model
from keras import backend as K
from common import generator, get_partition, fit_weight_normalizer, normalize_brightness

weight_normalizer = fit_weight_normalizer()

partition = get_partition(weight_normalizer)

model = load_model("model.h5")

inp = model.input
outputs = [layer.output for layer in model.layers]
functor = K.function([inp] + [K.learning_phase()], outputs)

batch = next(generator(partition['train']))[0]

# Testing
shape = list(batch.shape)
shape[0] = 1
for j in range(batch.shape[0]):
    test = np.reshape(batch[j], shape)
    layer_outs = functor([test, 1.])
    for i, out in enumerate(layer_outs):
        if len(out.shape) == 4 and out.shape[-1] >= 3:
            out = out[:, :, :, :3]
            out = out - np.min(out, axis=(0, 1, 2))
            out = out / np.max(out, axis=(0, 1, 2))
            out *= 255
            out = np.reshape(out, out.shape[1:]).astype(np.uint8)
            out = normalize_brightness(out)