Ejemplo n.º 1
0
    def fit(self, eventlog_name):
        import tensorflow as tf
        from tensorflow.contrib.keras.python.keras.engine import Input, Model
        from tensorflow.contrib.keras.python.keras.layers import Dense, GaussianNoise, Dropout

        # load data
        features = self.dataset.load(eventlog_name)

        # parameters
        input_size = features.shape[1]
        hidden_size = np.round(input_size * 4)

        # input layer
        input_layer = Input(shape=(input_size,), name='input')

        # hidden layer
        hid = Dense(hidden_size, activation=tf.nn.relu)(GaussianNoise(0.1)(input_layer))
        hid = Dense(hidden_size, activation=tf.nn.relu)(Dropout(0.5)(hid))
        hid = Dense(hidden_size, activation=tf.nn.relu)(Dropout(0.5)(hid))
        hid = Dense(hidden_size, activation=tf.nn.relu)(Dropout(0.5)(hid))
        hid = Dense(hidden_size, activation=tf.nn.relu)(Dropout(0.5)(hid))

        # output layer
        output_layer = Dense(input_size, activation='linear')(Dropout(0.5)(hid))

        # build model
        self.model = Model(inputs=input_layer, outputs=output_layer)

        # compile model
        self.model.compile(
            optimizer=tf.train.AdamOptimizer(learning_rate=0.0001),
            loss=tf.losses.mean_squared_error
        )

        # train model
        self.model.fit(
            features,
            features,
            batch_size=100,
            epochs=100,
            validation_split=0.2,
        )
Ejemplo n.º 2
0

#Encoder
x = Input(batch_shape=(batch_size, original_dim))
h = Dense(intermediate_dim, activation='relu')(x)
z_mean = Dense(latent_dim)(h)
z_log_sigma = Dense(latent_dim)(h)
z = Lambda(sampling, output_shape=(latent_dim, ))([z_mean, z_log_sigma])

#Decoder
decoder_h = Dense(intermediate_dim, activation='relu')
decoder_mean = Dense(original_dim, activation='sigmoid')
h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)

vae = Model(x, x_decoded_mean)

# encoder, from inputs to latent space
encoder = Model(x, z_mean)

# generator, from latent space to reconstructed inputs
decoder_input = Input(shape=(latent_dim, ))
_h_decoded = decoder_h(decoder_input)
_x_decoded_mean = decoder_mean(_h_decoded)
generator = Model(decoder_input, _x_decoded_mean)


def vae_loss(x, x_decoded_mean):
    xent_loss = binary_crossentropy(x, x_decoded_mean)
    kl_loss = -0.5 * K.mean(
        1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)
Ejemplo n.º 3
0
embedded_sequences = embedding_layer(sequence_input)
x = Conv1D(256, 5, activation='relu')(embedded_sequences)
x = MaxPooling1D(2)(x)
x = Conv1D(128, 5, activation='relu')(x)
x = MaxPooling1D(5)(x)
x = Conv1D(128, 5, activation='relu')(x)
x = MaxPooling1D(5)(x)
x = Conv1D(128, 5, activation='relu')(x)
x = MaxPooling1D(5)(x)
x = Conv1D(128, 5, activation='relu')(x)
x = MaxPooling1D(35)(x)  # global max pooling
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
preds = Dense(len(labels_index), activation='softmax')(x)

model = Model(sequence_input, preds)
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['acc'])

embedding_matrix = None
embeddings_index = None

for epoch in range(NUM_EPOCHS):
    i = 0
    for j in range(NUM_ROWS_SAVE_TO_TRAIN - NUM_ROWS_SAVE_TO_VAL, lendata,
                   NUM_ROWS_SAVE_TO_TRAIN - NUM_ROWS_SAVE_TO_VAL):
        x_train = np.load(
            os.path.join(SAVE_DIR, 'data_' + str(i) + '_' + str(j) + '.npy'))
        y_train = np.load(
            os.path.join(SAVE_DIR, 'labels_' + str(i) + '_' + str(j) + '.npy'))
Ejemplo n.º 4
0
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)

# at this point the representation is (4, 4, 8) i.e. 128-dimensional

x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

conv_autoencoder = Model(input_img, decoded)
conv_autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')


batch_size=128
steps_per_epoch = np.int(np.floor(x_train.shape[0] / batch_size))
conv_autoencoder.fit(x_train, x_train, epochs=50, batch_size=128,
                     shuffle=True, validation_data=(x_test, x_test),
                     callbacks=[TensorBoard(log_dir='./tf_autoencoder_logs')])


decoded_imgs = conv_autoencoder.predict(x_test)

n = 10
plt.figure(figsize=(20, 4))
for i in range(n):
Ejemplo n.º 5
0
                            trainable=False)

sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
x = Conv1D(128, 15, activation='relu')(embedded_sequences)
x = MaxPooling1D(5)(x)

x = Conv1D(128, 5, activation='relu')(x)
x = MaxPooling1D(5)(x)
x = Conv1D(128, 5, activation='relu')(x)
x = MaxPooling1D(35)(x)  # global max pooling
x = Dropout (0.2)(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)

model_text = Model(sequence_input, x)
model_gen = Model(sequence_input_gen, x_gen)
print(model_text.output.shape)

print(model_gen.output.shape)
merged = concatenate([model_text.output,model_gen.output])
preds = Dense(len(labels_index), activation='softmax')(merged)

model = Model(inputs=[sequence_input,sequence_input_gen],outputs=preds)


model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['acc'])

Ejemplo n.º 6
0
    def fit(self, eventlog_name):

        import tensorflow as tf
        from tensorflow.contrib.keras.python.keras.engine import Input, Model
        from tensorflow.contrib.keras.python.keras.layers import Dense, Dropout, GRU, Embedding, merge, Masking

        features, targets = self.dataset.load(eventlog_name, train=True)
        inputs = []
        layers = []

        with tf.device('/cpu:0'):
            # split attributes
            features = [features[:, :, i] for i in range(features.shape[2])]

            for i, t in enumerate(features):
                voc_size = np.array(self.dataset.attribute_dims[i]) + 1  # we start at 1, hence +1
                emb_size = np.floor(voc_size / 2.0).astype(int)

                i = Input(shape=(None, *t.shape[2:]))
                x = Embedding(input_dim=voc_size, output_dim=emb_size, input_length=t.shape[1], mask_zero=True)(i)
                inputs.append(i)
                layers.append(x)

            # merge layers
            x = merge.concatenate(layers)

        x = GRU(64, implementation=2)(x)

        # shared hidden layer
        x = Dense(512, activation=tf.nn.relu)(x)
        x = Dense(512, activation=tf.nn.relu)(Dropout(0.5)(x))

        # hidden layers per attribute
        outputs = []
        for i, l in enumerate(targets):
            o = Dense(256, activation=tf.nn.relu)(Dropout(0.5)(x))
            o = Dense(256, activation=tf.nn.relu)(Dropout(0.5)(o))
            o = Dense(l.shape[1], activation=tf.nn.softmax)(Dropout(0.5)(o))
            outputs.append(o)

        self.model = Model(inputs=inputs, outputs=outputs)

        # compile model

        # old setting : optimizers from tensorflow

        # self.model.compile(
        # optimizer=tf.train.AdamOptimizer(learning_rate=0.0001),
        # loss='categorical_crossentropy'
        # )

        # new setting : optimizers from keras

        self.model.compile(
            optimizer='Adadelta',
            loss='categorical_crossentropy'
        )

        # train model
        self.model.fit(
            features,
            targets,
            batch_size=100,
            epochs=100,
            validation_split=0.2,
        )
Ejemplo n.º 7
0
class RNNGRUAnomalyDetector(NNAnomalyDetector):
    def __init__(self, model=None, embedding=True):
        self.dataset = Dataset()
        super().__init__(model, abbreviation='RNNGRU')
        self.embedding = embedding

    def load(self, model):
        super().load(model)

    def fit(self, eventlog_name):

        import tensorflow as tf
        from tensorflow.contrib.keras.python.keras.engine import Input, Model
        from tensorflow.contrib.keras.python.keras.layers import Dense, Dropout, GRU, Embedding, merge, Masking

        features, targets = self.dataset.load(eventlog_name, train=True)
        inputs = []
        layers = []

        with tf.device('/cpu:0'):
            # split attributes
            features = [features[:, :, i] for i in range(features.shape[2])]

            for i, t in enumerate(features):
                voc_size = np.array(self.dataset.attribute_dims[i]) + 1  # we start at 1, hence +1
                emb_size = np.floor(voc_size / 2.0).astype(int)

                i = Input(shape=(None, *t.shape[2:]))
                x = Embedding(input_dim=voc_size, output_dim=emb_size, input_length=t.shape[1], mask_zero=True)(i)
                inputs.append(i)
                layers.append(x)

            # merge layers
            x = merge.concatenate(layers)

        x = GRU(64, implementation=2)(x)

        # shared hidden layer
        x = Dense(512, activation=tf.nn.relu)(x)
        x = Dense(512, activation=tf.nn.relu)(Dropout(0.5)(x))

        # hidden layers per attribute
        outputs = []
        for i, l in enumerate(targets):
            o = Dense(256, activation=tf.nn.relu)(Dropout(0.5)(x))
            o = Dense(256, activation=tf.nn.relu)(Dropout(0.5)(o))
            o = Dense(l.shape[1], activation=tf.nn.softmax)(Dropout(0.5)(o))
            outputs.append(o)

        self.model = Model(inputs=inputs, outputs=outputs)

        # compile model

        # old setting : optimizers from tensorflow

        # self.model.compile(
        # optimizer=tf.train.AdamOptimizer(learning_rate=0.0001),
        # loss='categorical_crossentropy'
        # )

        # new setting : optimizers from keras

        self.model.compile(
            optimizer='Adadelta',
            loss='categorical_crossentropy'
        )

        # train model
        self.model.fit(
            features,
            targets,
            batch_size=100,
            epochs=100,
            validation_split=0.2,
        )

    def predict_proba(self, eventlog_name):
        """
        Calculate the anomaly score and the probability distribution for each event in each trace.
        Anomaly score here is the probability of that event occurring given all events before.

        :param traces: traces to predict
        :return:
            anomaly_scores: anomaly scores for each attribute;
                    shape is (#traces, max_trace_length - 1, #attributes)

            distributions: probability distributions for each event and attribute;
                   list of np.arrays with shape (#traces, max_trace_length - 1, #attribute_classes),
                   one np.array for each attribute, hence list len is #attributes
        """

        def _get_all_subsequences(sequence):
            """
            Calculate all subsequences for a given sequence after removing the padding (0s).

            :param sequence:
            :return:
            """

            num_subsequences = np.sum(np.any(sequence != 0, axis=1)) - 1  # remove padding and calculate num subseqs
            subsequences = np.zeros((num_subsequences, sequence.shape[0], sequence.shape[1]))  # init array
            next_events = sequence[1:num_subsequences + 1]  # get next event

            for i in np.arange(num_subsequences):
                length = num_subsequences - i
                subsequences[i, :length, :] = sequence[:length, :]

            return subsequences[::-1], next_events

        # load data
        features, _ = self.dataset.load(eventlog_name, train=False)

        # anomaly scores for attributes
        # shape is (#traces, max_len_trace - 1, #attributes)
        # we do not predict the BOS activity, hence the -1
        anomaly_scores = np.ones((features.shape[0], features.shape[1] - 1, len(self.dataset.attribute_dims)))

        # distributions for each attribute
        attr_dims = np.array([int(o.shape[1]) for o in self.model.output])
        self.distributions = [np.ones((features.shape[0], features.shape[1] - 1, attr_dim)) for attr_dim in attr_dims]

        sub_sequences = []
        next_events = []

        for i, trace in enumerate(features):
            s, n = _get_all_subsequences(trace)
            sub_sequences.append(s)
            next_events.append(n)

        sub_sequences = np.vstack(sub_sequences)
        next_events = np.vstack(next_events).astype(int)

        if self.embedding:
            sub_sequences = [sub_sequences[:, :, i] for i in range(sub_sequences.shape[2])]
            next_events = [next_events[:, i] - 1 for i in range(next_events.shape[1])]
        else:
            offset = np.concatenate([[0], np.cumsum(attr_dims)[:-1]])
            n = np.zeros((next_events.shape[0], attr_dims.shape[0]), dtype=int)
            for index, next_event in enumerate(next_events):
                n[index] = np.where(next_event == 1)[0] - offset
                next_events = [n[:, i] for i in range(n.shape[1])]

        cumsum = np.cumsum(self.dataset.trace_lens - 1)
        cumsum2 = np.concatenate(([0], cumsum[:-1]))
        offsets = np.dstack((cumsum2, cumsum))[0]
        dist = self.model.predict(sub_sequences)

        for i, _n in enumerate(next_events):
            scores = dist[i][range(dist[i].shape[0]), _n]
            for j, trace_len in enumerate(self.dataset.trace_lens - 1):
                start, end = offsets[j]
                anomaly_scores[j][:trace_len, i] = scores[start:end]
                self.distributions[i][j, :trace_len] = dist[i][start:end]

        return anomaly_scores
Ejemplo n.º 8
0
class DAEAnomalyDetector(NNAnomalyDetector):
    def __init__(self, model=None):
        super().__init__(model=model, abbreviation='dae')

        self.dataset = FlatOneHotDataset()

    def fit(self, eventlog_name):
        import tensorflow as tf
        from tensorflow.contrib.keras.python.keras.engine import Input, Model
        from tensorflow.contrib.keras.python.keras.layers import Dense, GaussianNoise, Dropout

        # load data
        features = self.dataset.load(eventlog_name)

        # parameters
        input_size = features.shape[1]
        hidden_size = np.round(input_size * 4)

        # input layer
        input_layer = Input(shape=(input_size,), name='input')

        # hidden layer
        hid = Dense(hidden_size, activation=tf.nn.relu)(GaussianNoise(0.1)(input_layer))
        hid = Dense(hidden_size, activation=tf.nn.relu)(Dropout(0.5)(hid))
        hid = Dense(hidden_size, activation=tf.nn.relu)(Dropout(0.5)(hid))
        hid = Dense(hidden_size, activation=tf.nn.relu)(Dropout(0.5)(hid))
        hid = Dense(hidden_size, activation=tf.nn.relu)(Dropout(0.5)(hid))

        # output layer
        output_layer = Dense(input_size, activation='linear')(Dropout(0.5)(hid))

        # build model
        self.model = Model(inputs=input_layer, outputs=output_layer)

        # compile model
        self.model.compile(
            optimizer=tf.train.AdamOptimizer(learning_rate=0.0001),
            loss=tf.losses.mean_squared_error
        )

        # train model
        self.model.fit(
            features,
            features,
            batch_size=100,
            epochs=100,
            validation_split=0.2,
        )

    def predict_proba(self, eventlog_name):
        """
        Calculate the anomaly score for each event attribute in each trace. 
        Anomaly score here is the mean squared error.

        :param traces: traces to predict 
        :return: 
            anomaly_scores: anomaly scores for each attribute; 
                            shape is (#traces, max_trace_length - 1, #attributes)

        """

        features = self.dataset.load(eventlog_name)

        # get event length
        event_len = np.sum(self.dataset.attribute_dims - 1).astype(int)

        # init anomaly scores array
        anomaly_scores = np.zeros((features.shape[0], self.dataset.max_len - 1, len(self.dataset.attribute_dims)))

        # get predictions
        predictions = self.model.predict(features)
        errors = (predictions - features) ** 2

        # remove the BOS event
        errors = errors[:, event_len:]

        # split the errors according to the attribute dims
        split = np.cumsum(np.tile(self.dataset.attribute_dims - 1, self.dataset.max_len - 1), dtype=int)[:-1]
        errors = np.split(errors, split, axis=1)
        errors = np.array([np.mean(a, axis=1) for a in errors])

        for i in range(len(self.dataset.attribute_dims)):
            error = errors[i::len(self.dataset.attribute_dims)]
            anomaly_scores[:, :, i] = error.T

        # TODO: Normalize the anomaly_scores to lie between 0 and 1
        return -anomaly_scores
Ejemplo n.º 9
0
                       padding='same',
                       strides=2,
                       kernel_initializer='he_normal',
                       kernel_regularizer=l2(1e-4))(x)
        x = keras.layers.add([x, y])
        x = Activation('relu')(x)

    num_filters = 2 * num_filters

x = AveragePooling2D()(x)
y = Flatten()(x)
outputs = Dense(num_classes,
                activation='softmax',
                kernel_initializer='he_normal')(y)

model = Model(inputs=inputs, outputs=outputs)
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])
model.summary()

save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'cifar10_resnet_model.h5'
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

checkpoint = ModelCheckpoint(filepath=filepath, verbose=1, save_best_only=True)

#earning rate decaying.
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),