コード例 #1
0
    def build(self):
        inputs = Input(shape=self.input_shape, name='encoder_input')
        x = Dense(self.intermediate_dim,
                  activation=self.activation_fct)(inputs)
        z_mean = Dense(self.latent_dim, name='z_mean')(x)
        z_log_var = Dense(self.latent_dim, name='z_log_var')(x)

        # use reparameterization trick to push the sampling out as input
        # note that "output_shape" isn't necessary with the TensorFlow backend
        z = Lambda(sampling, output_shape=(self.latent_dim, ),
                   name='z')([z_mean, z_log_var])

        # instantiate encoder model
        encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')

        # build decoder model
        latent_inputs = Input(shape=(self.latent_dim, ), name='z_sampling')
        x = Dense(self.intermediate_dim,
                  activation=self.activation_fct)(latent_inputs)
        outputs = Dense(self.original_dim, activation='sigmoid')(x)

        # instantiate decoder model
        decoder = Model(latent_inputs, outputs, name='decoder')

        # instantiate VAE model
        outputs = decoder(encoder(inputs)[2])
        vae = Model(inputs, outputs, name='vae_mlp')

        # VAE Loss = mse_loss or xent_loss + kl_loss
        reconstruction_loss = mse(inputs, outputs)

        reconstruction_loss *= self.original_dim
        kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
        kl_loss = K.sum(kl_loss, axis=-1)
        kl_loss *= -0.5
        vae_loss = K.mean(reconstruction_loss + kl_loss)
        vae.add_loss(vae_loss)

        vae.compile(optimizer=self.optimizer,
                    loss=self.loss,
                    metrics=['accuracy'])

        x_train_split, x_valid_split = train_test_split(
            self.x_train,
            test_size=self.train_test_split,
            random_state=self.seed)

        vae.fit(x_train_split,
                x_train_split,
                batch_size=self.batch_size,
                epochs=self.epochs,
                verbose=self.verbosity,
                shuffle=True,
                validation_data=(x_valid_split, x_valid_split))

        x_train_pred = vae.predict(self.x_train)
        train_mse = np.mean(np.power(self.x_train - x_train_pred, 2), axis=1)
        self.threshold = np.quantile(train_mse, 0.9)
        self.vae = vae
コード例 #2
0
ファイル: cnn.py プロジェクト: tweistror/ccfraud
    def build(self):
        input_img = Input(shape=(28, 28, 1))

        cnn = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
        cnn = MaxPooling2D((2, 2), padding='same')(cnn)
        cnn = Conv2D(32, (3, 3), activation='relu', padding='same')(cnn)
        cnn = MaxPooling2D((2, 2), padding='same')(cnn)
        cnn = Conv2D(32, (3, 3), activation='relu', padding='same')(cnn)
        encoded = MaxPooling2D((2, 2), padding='same')(cnn)

        cnn = Conv2D(32, (3, 3), activation='relu', padding='same')(encoded)
        cnn = UpSampling2D((2, 2))(cnn)
        cnn = Conv2D(32, (3, 3), activation='relu', padding='same')(cnn)
        cnn = UpSampling2D((2, 2))(cnn)
        cnn = Conv2D(32, (3, 3), activation='relu')(cnn)
        cnn = UpSampling2D((2, 2))(cnn)
        decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(cnn)

        cnn_autoencoder = Model(input_img, decoded)
        cnn_autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

        x_train = self.x_train.reshape(-1, 28, 28, 1)

        x_train_split, x_valid_split = train_test_split(x_train, test_size=self.train_test_split,
                                                        random_state=self.seed)

        cnn_autoencoder.fit(x_train_split, x_train_split,
                            epochs=self.epochs,
                            batch_size=self.batch_size,
                            validation_data=(x_valid_split, x_valid_split),
                            verbose=self.verbosity)

        x_train_pred = cnn_autoencoder.predict(x_train)
        mse = np.mean(np.power(x_train - x_train_pred, 2), axis=1)

        # Semi-supervised due to given threshold
        self.threshold = np.quantile(mse, 0.9)
        self.cnn_autoencoder = cnn_autoencoder
コード例 #3
0
class Autoencoder(object):
    """docstring for Autoencoder"""

    # def __init__(self, sample_weights, sample_weight_mode):
    def __init__(self, epochs, verbosity):
        self.epochs = epochs
        self.batch_size = 256
        self.shuffle = True
        self.validation_split = 0.05
        self.optimizer = 'adadelta'
        self.loss = 'mse'
        self.verbosity = verbosity

        self.code_layer_type = None
        self.model = None
        self.sample_weight_mode = None
        self.sample_weights = None
        self.y_true = None
        self.y_pred = None

    def model(self, code_layer_type, input_dim, code_dim):
        self.code_layer_type = code_layer_type
        assert len(code_dim) > 0

        if self.code_layer_type == 'lstm':
            assert len(input_dim) == 2
            input_data = Input(shape=(input_dim[0], input_dim[1]))

            if len(code_dim) == 1:
                encoded = LSTM(code_dim[0])(input_data)
                decoded = RepeatVector(input_dim[0])(encoded)
            elif len(code_dim) > 1:
                encoded = input_data
                for i, units in enumerate(code_dim):
                    if i == len(code_dim) - 1:
                        encoded = LSTM(units)(encoded)
                        continue
                    encoded = LSTM(units, return_sequences=True)(encoded)

                for i, units in enumerate(reversed(code_dim)):
                    if i == 1:
                        decoded = LSTM(units, return_sequences=True)(
                            RepeatVector(input_dim[0])(encoded))
                    elif i > 1:
                        decoded = LSTM(units, return_sequences=True)(decoded)
            else:
                raise ValueError("The codDim must be over 0.")

            decoded = LSTM(input_dim[-1], return_sequences=True)(decoded)
            self.model = Model(input_data, decoded)

        elif self.code_layer_type == 'dense':
            assert len(input_dim) == 1
            input_data = Input(shape=(input_dim[0], ))
            encoded = input_data
            for i, units in enumerate(code_dim):
                encoded = Dense(units, activation='relu')(encoded)
            decoded = Dense(input_dim[-1], activation='sigmoid')(encoded)
            self.model = Model(input_data, decoded)

        elif self.code_layer_type == 'cov':
            pass

    def modelMasking(self, code_layer_type, input_dim, code_dim):

        self.code_layer_type = code_layer_type
        assert len(code_dim) > 0

        if self.code_layer_type == 'lstm':
            assert len(input_dim) == 2
            input_data = Input(shape=(input_dim[0], input_dim[1]))
            mask = Masking(mask_value=0.)(input_data)
            if len(code_dim) == 1:
                encoded = LSTM(code_dim[0])(mask)
                decoded = RepeatVector(input_dim[0])(encoded)
            elif len(code_dim) > 1:
                encoded = mask
                for i, units in enumerate(code_dim):
                    if i == len(code_dim) - 1:
                        encoded = LSTM(units)(encoded)
                        continue
                    encoded = LSTM(units, return_sequences=True)(encoded)

                for i, units in enumerate(reversed(code_dim)):
                    if i == 1:
                        decoded = LSTM(units, return_sequences=True)(
                            RepeatVector(input_dim[0])(encoded))
                    elif i > 1:
                        decoded = LSTM(units, return_sequences=True)(decoded)
            else:
                raise ValueError("The codDim must be over 0.")

            decoded = LSTM(input_dim[-1], return_sequences=True)(decoded)
            self.model = Model(input_data, decoded)

        elif self.code_layer_type == 'cov':
            pass
        elif self.code_layer_type == 'dense':
            assert len(input_dim) == 1
            input_data = Input(shape=(input_dim[0], ))
            # encoded = input_data
            # for i, units in enumerate(codeDim):
            # 	encoded = Dense(units, activation='relu')(encoded)
            # decoded = Dense(inputDim[-1], activation='sigmoid')(encoded)
            # self.model = Model(input_data, decoded)
            encoder = Dense(
                code_dim[0],
                activation="tanh",
                activity_regularizer=regularizers.l1(10e-5))(input_data)
            encoder = Dense(int(code_dim[0] / 2), activation="relu")(encoder)
            decoder = Dense(int(code_dim[0] / 2), activation='tanh')(encoder)
            decoder = Dense(input_dim[0], activation='relu')(decoder)
            self.model = Model(input_data, decoder)

    def compile(self, *args):

        if len(args) == 0:
            self.model.compile(optimizer=self.optimizer, loss=self.loss)
        elif len(args) == 1:
            if args[0] == 'temporal':
                self.sample_weight_mode = args[0]
                self.model.compile(optimizer=self.optimizer,
                                   loss=self.loss,
                                   sample_weight_mode=self.sample_weight_mode)
            elif args[0] == 'customFunction':
                self.model.compile(optimizer=self.optimizer,
                                   loss=self.weighted_vector_mse)
            else:
                raise ValueError(
                    "Invalid maskType, please input 'sample_weights' or 'customFunction'"
                )
        else:
            raise ValueError("argument # must be 0 or 1.")

    def fit(self, *args):

        # early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.01, patience=3, verbose=1, mode='auto')
        if len(args) == 2:
            if args[1] == 'nor':
                self.model.fit(args[0],
                               args[0],
                               epochs=self.epochs,
                               batch_size=self.batch_size,
                               shuffle=self.shuffle,
                               validation_split=self.validation_split,
                               verbose=self.verbosity)
            # callbacks = [early_stopping])
            elif args[1] == 'rev':
                self.model.fit(args[0],
                               np.flip(args[0], 1),
                               epochs=self.epochs,
                               batch_size=self.batch_size,
                               shuffle=self.shuffle,
                               validation_split=self.validation_split,
                               verbose=self.verbosity)
            # callbacks=[early_stopping])
            else:
                raise ValueError(
                    "decoding sequence type: 'normal' or 'reverse'.")

        elif len(args) == 3:
            self.sample_weights = args[2]
            if args[1] == 'nor':
                self.model.fit(args[0],
                               args[0],
                               epochs=self.epochs,
                               batch_size=self.batch_size,
                               shuffle=self.shuffle,
                               validation_split=self.validation_split,
                               sample_weight=self.sample_weights,
                               verbose=self.verbosity)
            # callbacks=[early_stopping])
            elif args[1] == 'rev':
                self.model.fit(args[0],
                               np.flip(args[0], 1),
                               epochs=self.epochs,
                               batch_size=self.batch_size,
                               shuffle=self.shuffle,
                               validation_split=self.validation_split,
                               sample_weight=self.sample_weights,
                               verbose=self.verbosity)
            # callbacks=[early_stopping])
            else:
                raise ValueError(
                    "Please input, 'data', 'nor' or 'rev', 'sample_weights'")

    def predict(self, data):
        return self.model.predict(data)

    def weighted_vector_mse(self, y_true, y_pred):

        self.y_true = y_true
        self.y_pred = y_pred

        weight = T.ceil(self.y_true)
        loss = T.square(weight * (self.y_true - self.y_pred))
        # use appropriate relations for other objectives. E.g, for binary_crossentropy:
        # loss = weights * (y_true * T.log(y_pred) + (1.0 - y_true) * T.log(1.0 - y_pred))
        return T.mean(T.sum(loss, axis=-1))