Beispiel #1
0
def StackedAutoencoder(gene_exp_dim, layer_size, dropout_size, encoding_layer):
    input_exp = Input(shape=(gene_exp_dim, ))
    # making the encoding Layers
    encoded = Dense(layer_size, activation='relu')(input_exp)
    encoded.add(Dropout(dropout_size))(encoded)
    last_layer = 0
    for enc_layer in range(1, encoding_layer):
        encoded = Dense((layer_size - enc_layer * 2),
                        activation='sigmoid')(encoded)
        encoded = Dropout(dropout_size)(encoded)
        last_layer = layer_size - enc_layer * 2

    ## making the decoding Layers
    decoded = Dense(last_layer, activation='sigmoid')(encoded)
    for dec_layer in range(1, (encoding_layer - 1)):
        decoded = Dense((dec_layer + dec_layer * 2),
                        activation='sigmoid')(decoded)
        decoded = Dropout(dropout_size)(decoded)
    decoded = Dense(shape=(gene_exp_dim, ), activation='sigmoid')(encoded)

    return decoded
Beispiel #2
0
    def build(self):

        input_img = Input(shape=(self.input_size, ))

        # Encoder
        encoder = Dense(1000, activation='relu')(input_img)
        encoder.add(Dense(500, activation='relu'))
        encoder.add(Dense(250, activation='relu'))
        encoder.add(Dense(30, activation='relu'))

        # Decoder
        decoder = Dense(250, activation='relu')(encoder)
        decoder = Dense(500, activation='relu')(decoder)
        decoder = Dense(1000, activation='relu')(decoder)
        decoder = Dense(784, activation='sigmoid')(decoder)

        self.autoencoder = Model(input=input_img, output=decoder)
        self.autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

        self.encoder = Model(input=input_img, output=encoder)
        self.encoder.compile(optimizer='adam', loss='binary_crossentropy')
Beispiel #3
0
z_log_var_encoded = Activation('relu')(z_log_var_dense_batchnorm)

z = Lambda(sampling,
           output_shape=(z_shape, ))([z_mean_encoded, z_log_var_encoded])

# ~~~~~~~~~~~~~~~~~~~~~~
# DECODER
# ~~~~~~~~~~~~~~~~~~~~~~
if depth == 1:
    decoder_to_reconstruct = Dense(original_dim,
                                   kernel_initializer='glorot_uniform',
                                   activation='sigmoid')
elif depth == 2:
    decoder_to_reconstruct = Sequential()
    decoder_to_reconstruct.add(Dense(latent_dim,
                                     kernel_initializer='glorot_uniform',
                                     activation='relu',
                                     input_dim=latent_dim2))
    decoder_to_reconstruct.add(Dense(original_dim,
                                     kernel_initializer='glorot_uniform',
                                     activation='sigmoid'))

rnaseq_reconstruct = decoder_to_reconstruct(z)

# ~~~~~~~~~~~~~~~~~~~~~~
# CONNECTIONS
# ~~~~~~~~~~~~~~~~~~~~~~
adam = optimizers.Adam(lr=learning_rate)
vae_layer = CustomVariationalLayer()([rnaseq_input, rnaseq_reconstruct])
vae = Model(rnaseq_input, vae_layer)
vae.compile(optimizer=adam, loss=None, loss_weights=[beta])
vae.summary()
Beispiel #4
0
def run_vae(rnaseq_file, learning_rate, batch_size, epochs, kappa, depth,
            first_layer, output_filename, latent_dim, scale, subset_mad_genes,
            data_basename):

    # Random seed
    seed = int(np.random.randint(low=0, high=10000, size=1))
    np.random.seed(seed)

    # Load Data
    #file = 'train_{}_expression_matrix_processed.tsv.gz'.format(dataset.lower())
    #rnaseq_file = os.path.join('..', '0.expression-download', 'data', file)
    rnaseq_df = pd.read_table(rnaseq_file, index_col=0)

    # Determine most variably expressed genes and subset
    if subset_mad_genes is not None:
        mad_genes = rnaseq_df.mad(axis=0).sort_values(ascending=False)
        top_mad_genes = mad_genes.iloc[0:int(subset_mad_genes), ].index
        rnaseq_df = rnaseq_df.loc[:,
                                  top_mad_genes]  #rnaseq_df.loc[:, top_mad_genes]

    # Zero One normalize input data
    if scale:
        scaler = MinMaxScaler()
        x = scaler.fit_transform(rnaseq_df)
        rnaseq_df = pd.DataFrame(x,
                                 index=rnaseq_df.index,
                                 columns=rnaseq_df.columns)

    # Set architecture dimensions
    original_dim = rnaseq_df.shape[1]
    epsilon_std = 1.0
    beta = K.variable(0)
    if depth == 2:
        latent_dim2 = int(first_layer)

    # Random seed
    seed = int(np.random.randint(low=0, high=10000, size=1))
    np.random.seed(seed)

    # Function for reparameterization trick to make model differentiable
    def sampling(args):

        # Function with args required for Keras Lambda function
        z_mean, z_log_var = args

        # Draw epsilon of the same shape from a standard normal distribution
        epsilon = K.random_normal(shape=tf.shape(z_mean),
                                  mean=0.,
                                  stddev=epsilon_std)

        # The latent vector is non-deterministic and differentiable
        # in respect to z_mean and z_log_var
        z = z_mean + K.exp(z_log_var / 2) * epsilon
        return z

    class CustomVariationalLayer(Layer):
        """
        Define a custom layer that learns and performs the training

        """
        def __init__(self, **kwargs):
            # https://keras.io/layers/writing-your-own-keras-layers/
            self.is_placeholder = True
            super(CustomVariationalLayer, self).__init__(**kwargs)

        def vae_loss(self, x_input, x_decoded):
            reconstruction_loss = original_dim * \
                                  metrics.binary_crossentropy(x_input, x_decoded)
            kl_loss = -0.5 * K.sum(
                1 + z_log_var_encoded - K.square(z_mean_encoded) -
                K.exp(z_log_var_encoded),
                axis=-1)
            return K.mean(reconstruction_loss + (K.get_value(beta) * kl_loss))

        def call(self, inputs):
            x = inputs[0]
            x_decoded = inputs[1]
            loss = self.vae_loss(x, x_decoded)
            self.add_loss(loss, inputs=inputs)
            # We won't actually use the output.
            return x

    class WarmUpCallback(Callback):
        def __init__(self, beta, kappa):
            self.beta = beta
            self.kappa = kappa

        # Behavior on each epoch
        def on_epoch_end(self, epoch, logs={}):
            if K.get_value(self.beta) <= 1:
                K.set_value(self.beta, K.get_value(self.beta) + self.kappa)

    # Process data

    # Split 10% test set randomly
    test_set_percent = 0.1
    rnaseq_train_df, rnaseq_test_df = train_test_split(
        rnaseq_df, test_size=test_set_percent,
        random_state=123)  #, shuffle=False

    # Input place holder for RNAseq data with specific input size
    rnaseq_input = Input(shape=(original_dim, ))

    # ~~~~~~~~~~~~~~~~~~~~~~
    # ENCODER
    # ~~~~~~~~~~~~~~~~~~~~~~
    # Depending on the depth of the model, the input is eventually compressed into
    # a mean and log variance vector of prespecified size. Each layer is
    # initialized with glorot uniform weights and each step (dense connections,
    # batch norm,and relu activation) are funneled separately
    #
    # Each vector of length `latent_dim` are connected to the rnaseq input tensor
    # In the case of a depth 2 architecture, input_dim -> latent_dim -> latent_dim2

    if depth == 1:
        z_shape = latent_dim
        z_mean_dense = Dense(latent_dim,
                             kernel_initializer='glorot_uniform')(rnaseq_input)
        z_log_var_dense = Dense(
            latent_dim, kernel_initializer='glorot_uniform')(rnaseq_input)
    elif depth == 2:
        z_shape = latent_dim2
        hidden_dense = Dense(latent_dim,
                             kernel_initializer='glorot_uniform')(rnaseq_input)
        hidden_dense_batchnorm = BatchNormalization()(hidden_dense)
        hidden_enc = Activation('relu')(hidden_dense_batchnorm)

        z_mean_dense = Dense(latent_dim2,
                             kernel_initializer='glorot_uniform')(hidden_enc)
        z_log_var_dense = Dense(
            latent_dim2, kernel_initializer='glorot_uniform')(hidden_enc)

    z_mean_dense_batchnorm = BatchNormalization()(z_mean_dense)
    z_mean_encoded = Activation('relu')(z_mean_dense_batchnorm)

    z_log_var_dense_batchnorm = BatchNormalization()(z_log_var_dense)
    z_log_var_encoded = Activation('relu')(z_log_var_dense_batchnorm)

    # return the encoded and randomly sampled z vector
    # Takes two keras layers as input to the custom sampling function layer with a
    # latent_dim` output
    z = Lambda(sampling,
               output_shape=(z_shape, ))([z_mean_encoded, z_log_var_encoded])

    # ~~~~~~~~~~~~~~~~~~~~~~
    # DECODER
    # ~~~~~~~~~~~~~~~~~~~~~~
    # The layers are different depending on the prespecified depth.
    #
    # Single layer: glorot uniform initialized and sigmoid activation.
    # Double layer: relu activated hidden layer followed by sigmoid reconstruction
    if depth == 1:
        decoder_to_reconstruct = Dense(original_dim,
                                       kernel_initializer='glorot_uniform',
                                       activation='sigmoid')
    elif depth == 2:
        decoder_to_reconstruct = Sequential()
        decoder_to_reconstruct.add(
            Dense(latent_dim,
                  kernel_initializer='glorot_uniform',
                  activation='relu',
                  input_dim=latent_dim2))
        decoder_to_reconstruct.add(
            Dense(original_dim,
                  kernel_initializer='glorot_uniform',
                  activation='sigmoid'))

    rnaseq_reconstruct = decoder_to_reconstruct(z)

    # ~~~~~~~~~~~~~~~~~~~~~~
    # CONNECTIONS
    # ~~~~~~~~~~~~~~~~~~~~~~
    adam = optimizers.Adam(lr=learning_rate)
    vae_layer = CustomVariationalLayer()([rnaseq_input, rnaseq_reconstruct])
    vae = Model(rnaseq_input, vae_layer)
    vae.compile(optimizer=adam, loss=None, loss_weights=[beta])

    # fit Model
    hist = vae.fit(np.array(rnaseq_train_df),
                   shuffle=True,
                   epochs=epochs,
                   batch_size=batch_size,
                   validation_data=(np.array(rnaseq_test_df), None),
                   callbacks=[WarmUpCallback(beta, kappa)])

    # Save training performance
    history_df = pd.DataFrame(hist.history)
    history_df = history_df.assign(num_components=latent_dim,
                                   learning_rate=learning_rate,
                                   batch_size=batch_size,
                                   epochs=epochs,
                                   kappa=kappa,
                                   seed=seed,
                                   depth=depth,
                                   first_layer=first_layer,
                                   dataset=data_basename)
    history_df.to_csv(output_filename, sep='\t')
Beispiel #5
0
#       1. DNN          #
if model_num == 1:
    input = Input(shape=(28, 5))
    model = Dense(64)(input)
    model = Dense(128)(model)
    # model = Dense(32)(model)
    drop_out = Dropout(0.3)(model)
    flatten = Flatten(name='flatten')(drop_out)
    output = Dense(1)(flatten)
    model = Model(inputs=input, outputs=output)

#       2. LSTM         #
elif model_num == 2:

    model = Sequential()
    model.add(LSTM(32, activation='relu', input_shape=(input_data_length, 5)))
    model.add(Dense(64))
    model.add(Dense(1))

#       3. DNN ENSEMBLE     #
elif model_num == 3:
    input = Input(shape=(28, 5))
    model = Dense(64)(input)
    model = Dense(128)(model)
    # model = Dense(32)(model)
    drop_out = Dropout(0.3)(model)
    flatten = Flatten(name='flatten')(drop_out)
    output = Dense(1)(flatten)

    input2 = Input(shape=(28, 5))
    model2 = Dense(32)(input2)
Beispiel #6
0
def autoencoder(X, layers, mode, act=VAR.ae_act, opt=VAR.ae_opt, \
                loss=VAR.ae_loss, dropout=VAR.ae_dropout, \
                epochs=VAR.ae_epoch, verbose=VAR.ae_verbose, \
                summary_display=VAR.ae_summary_display):

    from keras.models import Sequential, Model
    from keras.layers import Dense, Input, Dropout
    import warnings

    #ignore warnings
    warnings.filterwarnings("ignore")

    #create an input holder
    num_features = len(X[0])
    input_holder = Input(shape=(num_features, ))

    #input_holder=Input(shape=(num_features,))

    #initialise the encoder variables
    layers_copy = [num_features]
    for l in layers:
        layers_copy.append(l)

    # --------------------------------------------------------------------
    # basic autoencoder
    # --------------------------------------------------------------------
    if mode == 0:
        #initialise the decoder variables
        n = len(layers_copy)
        layers_dec = []

        for i in range(n - 1, 0, -1):
            layers_dec.append(layers_copy[i])
        layers_dec.append(num_features)

        #build an encoder
        encoder = input_holder
        for i in range(0, n - 1):
            encoder=Dense(layers_copy[i+1], input_dim=layers_copy[i], \
                         activation=act)(encoder)

            # add dropout
            if dropout != 0: encoder = Dropout(dropout)(encoder)

        decoder = encoder
        for i in range(0, len(layers_dec) - 1):
            decoder=Dense(layers_dec[i+1], input_dim=layers_dec[i], \
                            activation=act)(decoder)

            # add dropout
            if dropout != 0:
                if i < len(layers_dec) - 2: decoder = Dropout(dropout)(decoder)

        autoencoder = Model(input=input_holder, output=decoder)
        autoencoder.compile(optimizer=opt, loss=loss)

        #train the autoencoder
        loss_hiss = [autoencoder.fit(X, X, epochs=epochs, verbose=verbose)]

        # extract the encoder from the trained autoencoder
        encoder = Model(input=input_holder, output=encoder)

        if summary_display: print(autoencoder.summary())

        return encoder, loss_hiss

    # --------------------------------------------------------------------
    # stacked autoencoder
    # --------------------------------------------------------------------
    elif mode == 1:
        tmp_holder = input_holder
        x = X
        stk_encoders, loss_hist = [], []

        #train encoder layers
        for i in range(len(layers_copy) - 1):
            print("Training Layer %d/%d ..." % (i + 1, len(layers_copy) - 1))

            encoder=Dense(layers_copy[i+1], input_dim=layers_copy[i], \
                           activation=act)(tmp_holder)
            if dropout != 0: encoder = Dropout(dropout)(encoder)

            decoder=Dense(layers_copy[i], input_dim=layers_copy[i+1], \
                           activation=act)(encoder)

            autoencoder = Model(input=tmp_holder, output=decoder)
            autoencoder.compile(optimizer=opt, loss=loss)

            # train a layer
            loss_his = autoencoder.fit(x, x, epochs=epochs, verbose=verbose)
            loss_hist.append(loss_his)

            # use output of the layer as next training input
            encoder = Model(input=tmp_holder, output=encoder)
            x = encoder.predict(x)

            # update the input_holder
            tmp_holder = Input(shape=(layers_copy[i + 1], ))

            # store the trained layer in a list
            stk_encoders.append(encoder)

        # connect trained encoder layers as 'encode'
        encoder = Sequential()
        for e in stk_encoders:
            encoder.add(e)

        if summary_display == 1: print(encoder.summary())

        return encoder, loss_hist