예제 #1
0
def build_lstm_lstm():
    global n_past, n_future, n_features
    inputA = keras.Input(shape=(n_past, n_features_ori), name="cA")
    inputD = keras.Input(shape=(n_past, n_features), name="cD")
    #x is the CNN for approximate
    x = layers.CuDNNLSTM(200, return_sequences=False)(inputA)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(100, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(100, activation='relu')(x)

    #y is the LSTM for detail
    y = layers.CuDNNLSTM(200, return_sequences=False)(inputD)
    y = layers.Dropout(0.2)(y)
    y = layers.Dense(100, activation='relu')(y)
    y = layers.Dropout(0.2)(y)
    y = layers.Dense(10, activation='sigmoid')(y)
    #combining 2 lstm
    com = layers.concatenate([x, y])
    # z = LSTM(200, activation='relu', return_sequences=False)(com)
    # z = Dense(100, activation="relu")
    z = layers.Dense(n_future)(com)
    model = keras.Model(inputs=[inputA, inputD], outputs=z)
    model.compile(loss='mse', optimizer=my_optimizer)
    model.summary()
    return model
예제 #2
0
def build_cnn_autolstm():
    global n_past, n_future, n_features
    inputA = keras.Input(shape=(n_past, int(n_features)), name="cA")
    inputD = keras.Input(shape=(n_past, int(n_features)), name="cD")
    #x is the CNN for approximate
    x = layers.Conv1D(filters=64, kernel_size=2, activation='relu')(inputA)
    x = layers.Conv1D(filters=64, kernel_size=2, activation='relu')(x)
    x = layers.MaxPooling1D(pool_size=2)(x)
    x = layers.Flatten()(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(100, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(50, activation='relu')(x)
    # x = layers.Dense(n_future)(x)
    #y is the LSTM for detail
    y = layers.CuDNNLSTM(200, return_sequences=False)(inputD)
    y = layers.RepeatVector(n_future)(y)
    y = layers.CuDNNLSTM(200, return_sequences=True)(y)
    y = layers.TimeDistributed(layers.Dense(100, activation='relu'))(y)
    y = layers.TimeDistributed(layers.Dense(50))(y)
    y = layers.CuDNNLSTM(50)(y)
    # y = layers.Reshape((-1,50))(y)
    # y = layers.Dense(50,activation='sigmoid')(y)

    #combining 2 lstm
    com = layers.concatenate([x, y])
    # z = LSTM(200, activation='relu', return_sequences=False)(com)
    # z = Dense(100, activation="relu")
    z = layers.Dense(n_future)(com)

    model = keras.Model(inputs=[inputA, inputD], outputs=z)
    model.compile(loss='mse', optimizer=my_optimizer)
    model.summary()
    return model
def build_2d_model(args):
    l2r = 1e-9

    T, X = tfkl.Input((N_TOKS,)), tfkl.Input((H, W, 3 + N_OBJS))

    ti = tfkl.Embedding(N_VOCAB, N_EMBED, input_length=N_TOKS)(T)
    print(ti.shape)
    th = tfkm.Sequential([
        tfkl.Bidirectional(tfkl.CuDNNLSTM(128, return_sequences=True)),
        tfkl.Bidirectional(tfkl.CuDNNLSTM(128, return_sequences=True)),
        tfkl.Conv1D(256, (1,), activation='elu', kernel_regularizer=tfkr.l2(l2r)),
        tfkl.Conv1D(6, (1,), activation=None, kernel_regularizer=tfkr.l2(l2r)),
        tfkl.Softmax(axis=-2, name='lstm_attn'),
    ], name='lstm_layers')(ti)

    tia = tfkb.sum(tfkl.Reshape((N_TOKS, 1, -1))(th) * tfkl.Reshape((N_TOKS, N_EMBED, 1))(ti), axis=-3)

    Xi = tfkb.sum(X[:, :, :, 3:], axis=-1, keepdims=True)

    s1 = tfkl.Dense(N_OBJS, activation='softmax')(tia[:, :, 0])
    s1b = tfkm.Sequential([tfkl.RepeatVector(W * H), tfkl.Reshape((H, W, N_OBJS))])(s1)
    Xs1 = tfkb.sum(X[:, :, :, 3:] * s1b, axis=-1, keepdims=True)

    s2 = tfkl.Dense(3)(tia[:, :, 1])
    s2b = tfkm.Sequential([tfkl.RepeatVector(W * H), tfkl.Reshape((H, W, 3))])(s2)
    s2c = tfkb.sum(s2b * X[:, :, :, 2:3] - (1 - Xi) * 20, axis=-1, keepdims=True)
    Xs2 = tfkm.Sequential([tfkl.Reshape((-1, 1)), tfkl.Softmax(axis=-2), tfkl.Reshape((H, W, 1))])(s2c)
    Xs2 = Xs2 - tfkb.max(Xs2, axis=[1, 2], keepdims=True)

    s3 = tfkl.Dense(N_OBJS, activation='softmax')(tia[:, :, 2])
    s3b = tfkm.Sequential([tfkl.RepeatVector(W * H), tfkl.Reshape((H, W, N_OBJS))])(s3)
    Xs3 = tfkb.sum(X[:, :, :, 3:] * s3b, axis=-1, keepdims=True)

    s4 = tfkl.Dense(16, activation='softmax')(tia[:, :, 3])
    s4b = tfkm.Sequential([tfkl.RepeatVector(W * H), tfkl.Reshape((H, W, 16))])(s4)
    Xs4 = s4b * Xi

    s5 = tfkl.Dense(16, activation='softmax')(tia[:, :, 4])
    s5b = tfkm.Sequential([tfkl.RepeatVector(W * H), tfkl.Reshape((H, W, 16))])(s5)
    Xs5 = s5b * Xi

    s6 = tfkl.Dense(16, activation='softmax')(tia[:, :, 5])
    s6b = tfkm.Sequential([tfkl.RepeatVector(W * H), tfkl.Reshape((H, W, 16))])(s6)
    Xs6 = s6b * Xi

    xt = tfkl.concatenate([Xi, Xs1, Xs2, Xs3, Xs4, Xs5, Xs6], axis=-1)

    attn = unet(xt)
    Y = tfkb.sum(attn * X[:, :, :, :2], axis=[1, 2])

    model = tfkm.Model(inputs=[T, X], outputs=[Y])

    def acc(y_pred, y_true):
        return tfkb.mean(tfkb.min(tfkb.cast((tfkb.abs(y_true-y_pred) < args.tol), 'float32'), axis=1))

    model.compile(tfk.optimizers.Adam(args.lr), 'mse', metrics=[acc])

    return model
    def __init__(self, n_feat=22, n_lstm=1, lstm_sizes="[5]", fc_sizes="[80]", lstm_dropout=0.2, dropout=0.1, activation='sigmoid'):
        super(LSTM_one_to_one, self).__init__()

        lstm_sizes = ast.literal_eval(lstm_sizes)
        fc_sizes = ast.literal_eval(fc_sizes)

        shape = (None, n_feat)
        Input = keras.Input(shape)

        slices = layers.Lambda(
            lambda x, i: x[:, :, i: i + 1], name='slicer_lambda')
        y = layers.Masking(mask_value=0, name="masking")(Input)

        n_hidden = lstm_sizes[0]

        lstms = [layers.CuDNNLSTM(
            n_hidden, return_sequences=False, name="lstm1_feature_%d" % _) for _ in range(n_feat)]

        ys = []
        for i, lstm in enumerate(lstms):
            slices.arguments = {'i': i}
            ys.append(lstm(slices(y)))
        y = layers.concatenate(ys, axis=-1, name="merge")

        for i, fc in enumerate(fc_sizes):
            y = layers.Dense(fc, activation=activation, name="fc_%d" % i)(y)
            y = layers.Dropout(dropout, name="dropout_%i" % i)(y)
        y = layers.Dense(1, activation=activation)(y)

        self.model = keras.Model(Input, y)
예제 #5
0
def build_ende_lstm():
    global n_past,n_future,n_features
    input = keras.Input(shape=(n_past, int(n_features)))
    # x = layers.LSTM(200, activation='relu', input_shape=(n_past, n_features),return_sequences=False)(input)
    x = layers.CuDNNLSTM(200,  input_shape=(n_past, n_features),return_sequences=False)(input)
    x = layers.RepeatVector(n_future)(x)
    # x = layers.LSTM(200, activation='relu',return_sequences=True)(x)
    x = layers.CuDNNLSTM(200, return_sequences=True)(x)
    x = layers.TimeDistributed(layers.Dense(100, activation='relu'))(x)
    x = layers.TimeDistributed(layers.Dense(1))(x)
    out = layers.Reshape((-1,n_future))(x)
    model = keras.Model(inputs=[input], outputs=out)
    model.compile(loss='mse', optimizer=my_optimizer)
    model.summary()
       
    return model
예제 #6
0
    def __init__(self, config):
        super(HBMP, self).__init__()
        self.config = config
        self.max_pool = layers.GlobalMaxPool1D()

        # self.cells = config.cells

        self.hidden_dim = config.hidden_dim
        self.rnn1 = layers.CuDNNLSTM(units=config.hidden_dim,
                                     return_sequences=True)
        self.rnn2 = layers.CuDNNLSTM(units=config.hidden_dim,
                                     return_sequences=True)
        self.rnn3 = layers.CuDNNLSTM(units=config.hidden_dim,
                                     return_sequences=True)
        self.bidirectional_1 = layers.Bidirectional(self.rnn1)
        self.bidirectional_2 = layers.Bidirectional(self.rnn2)
        self.bidirectional_3 = layers.Bidirectional(self.rnn3)
예제 #7
0
def build_cnnlstm3():
    global n_past, n_future, n_features
    inputA3 = keras.Input(shape=(n_past, n_features), name="cA3")
    inputD3 = keras.Input(shape=(n_past, n_features), name="cD3")
    inputD2 = keras.Input(shape=(n_past, n_features), name="cD2")
    inputD1 = keras.Input(shape=(n_past, n_features), name="cD1")
    #x is the CNN for approximate
    x = layers.Conv1D(filters=64, kernel_size=2, activation='relu')(inputA3)
    x = layers.Conv1D(filters=64, kernel_size=2, activation='relu')(x)
    x = layers.MaxPooling1D(pool_size=2)(x)
    x = layers.Flatten()(x)
    x = layers.Dense(200, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(144, activation='relu')(x)
    x = layers.Dense(n_future)(x)

    #y is the LSTM for detail
    b = layers.CuDNNLSTM(200, return_sequences=False)(inputD3)
    b = layers.Dropout(0.2)(b)
    b = layers.Dense(100)(b)
    b = layers.Dropout(0.2)(b)
    b = layers.Dense(n_future, activation='tanh')(b)
    # b = layers.LeakyReLU()(b)

    c = layers.CuDNNLSTM(200, return_sequences=False)(inputD2)
    c = layers.Dropout(0.2)(c)
    c = layers.Dense(100)(c)
    c = layers.Dropout(0.2)(c)
    c = layers.Dense(n_future, activation='tanh')(c)
    # c = layers.LeakyReLU()(c)

    d = layers.CuDNNLSTM(200, return_sequences=False)(inputD1)
    d = layers.Dropout(0.2)(d)
    d = layers.Dense(100)(d)
    d = layers.Dropout(0.2)(d)
    d = layers.Dense(n_future, activation='tanh')(d)
    # d = layers.LeakyReLU()(d)
    #combining 2 lstm
    com = layers.concatenate([x, b, c, d])
    # z = LSTM(200, activation='relu', return_sequences=False)(com)
    # z = Dense(100, activation="relu")
    z = layers.Dense(n_future)(com)
    model = keras.Model(inputs=[inputA3, inputD3, inputD2, inputD1], outputs=z)
    model.compile(loss='mse', optimizer=my_optimizer)
    model.summary()
    return model
예제 #8
0
def build_lstm_v2():
    global n_past, n_future, n_features
    input = keras.Input(shape=(n_past, int(n_features)))
    # x = layers.LSTM(200, activation='relu', input_shape=(n_past, n_features),return_sequences=False)(input)
    x = layers.CuDNNLSTM(300, return_sequences=True)(input)
    x = layers.CuDNNLSTM(300, return_sequences=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(100, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(n_future)(x)
    model = keras.Model(inputs=[input], outputs=x)
    model.compile(loss='mse', optimizer='adam')
    model.summary()
    plot_model(model,
               to_file=save_path + 'model_{}.png'.format(syn),
               show_shapes=True)
    return model
예제 #9
0
    def __init__(self, config):
        super(BiLSTMMaxPoolEncoder, self).__init__()
        self.config = config
        self.rnn = layers.CuDNNLSTM(units=config.hidden_dim,
                                    return_sequences=True)
        self.bidirectional = layers.Bidirectional(self.rnn)
        self.dropout = layers.Dropout(config.dropout)

        self.max_pool = layers.GlobalMaxPool1D()
예제 #10
0
def RNNSpeechModel(nCategories, samplingrate=16000, inputLength=16000):
    # simple LSTM
    sr = samplingrate
    iLen = inputLength

    inputs = L.L.Input((iLen,))

    x = L.Reshape((1, -1))(inputs)

    x = Melspectrogram(n_dft=1024, n_hop=128, input_shape=(1, iLen),
                       padding='same', sr=sr, n_mels=80,
                       fmin=40.0, fmax=sr / 2, power_melgram=1.0,
                       return_decibel_melgram=True, trainable_fb=False,
                       trainable_kernel=False,
                       name='mel_stft')(x)

    x = Normalization2D(int_axis=0)(x)

    # note that Melspectrogram puts the sequence in shape (batch_size, melDim, timeSteps, 1)
    # we would rather have it the other way around for LSTMs

    x = L.Permute((2, 1, 3))(x)

    x = L.Conv2D(10, (5, 1), activation='relu', padding='same')(x)
    x = L.BatchNormalization()(x)
    x = L.Conv2D(1, (5, 1), activation='relu', padding='same')(x)
    x = L.BatchNormalization()(x)

    # x = Reshape((125, 80)) (x)
    # keras.backend.squeeze(x, axis)
    x = L.Lambda(lambda q: K.squeeze(q, -1), name='squeeze_last_dim')(x)

    x = L.Bidirectional(L.CuDNNLSTM(64, return_sequences=True))(
        x)  # [b_s, seq_len, vec_dim]
    x = L.Bidirectional(L.CuDNNLSTM(64))(x)

    x = L.Dense(64, activation='relu')(x)
    x = L.Dense(32, activation='relu')(x)

    output = L.Dense(nCategories, activation='softmax')(x)

    model = Model(inputs=[inputs], outputs=[output])

    return model
예제 #11
0
 def __init__(self,
              original_dim,
              intermediate_dim=1024,
              name='decoder',
              **kwargs):
     super(Decoder, self).__init__(name=name, **kwargs)
     self.repeat = layers.RepeatVector(original_dim)
     self.dense_proj = layers.CuDNNLSTM(intermediate_dim,
                                        return_sequences=True)
     self.dense_output = layers.TimeDistributed(layers.Dense(1))
예제 #12
0
 def __init__(self,
              latent_dim=16,
              intermediate_dim=1024,
              name='encoder',
              **kwargs):
     super(Encoder, self).__init__(name=name, **kwargs)
     self.dense_proj = layers.CuDNNLSTM(intermediate_dim,
                                        return_sequences=False)
     self.dense_mean = layers.Dense(latent_dim)
     self.dense_log_var = layers.Dense(latent_dim)
     self.sampling = Sampling()
예제 #13
0
def build_lstm():
    global n_past,n_future,n_features
    input = keras.Input(shape=(n_past, int(n_features)))
    # x = layers.LSTM(200, activation='relu', input_shape=(n_past, n_features),return_sequences=False)(input)
    x = layers.CuDNNLSTM(200,input_shape=(n_past, n_features),return_sequences=False)(input)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(100, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(n_future)(x)
    model = keras.Model(inputs=[input], outputs=x)
    model.compile(loss='mse', optimizer=my_optimizer)
    model.summary()
    return model
예제 #14
0
def decoder_smate(encoder, timesteps, data_dim, pool_step):
    input_ = encoder.output  # input: (batch_size, timesteps, latent_dim)

    out = ll.UpSampling1D(size=pool_step)(input_)

    # 1D-CNN for reconstructing the spatial information
    #cells = [rnn_cell(module_name) for _ in range(num_layers)]
    #out = ll.RNN(cells, return_sequences=True)(out)

    # temporal axis
    if (module_name == 'gru'):
        if (tf.test.is_gpu_available()):
            out_t = ll.CuDNNGRU(hidden_dim, return_sequences=True)(out)
            for i in range(num_layers - 1):
                out_t = ll.CuDNNGRU(hidden_dim, return_sequences=True)(
                    out_t)  # output: (batch_size, timesteps, hidden_dim)
        else:
            out_t = ll.GRU(hidden_dim, return_sequences=True)(out)
            for i in range(num_layers - 1):
                out_t = ll.GRU(hidden_dim, return_sequences=True)(
                    out_t)  # output: (batch_size, timesteps, hidden_dim)

    elif (module_name == 'lstm'):
        if (tf.test.is_gpu_available()):
            out_t = ll.CuDNNLSTM(hidden_dim, return_sequences=True)(out)
            for i in range(num_layers - 1):
                out_t = ll.CuDNNLSTM(hidden_dim, return_sequences=True)(
                    out_t)  # output: (batch_size, timesteps, hidden_dim)
        else:
            out_t = ll.LSTM(hidden_dim, return_sequences=True)(out)
            for i in range(num_layers - 1):
                out_t = ll.LSTM(hidden_dim, return_sequences=True)(
                    out_t)  # output: (batch_size, timesteps, hidden_dim)

    out = ll.Dense(data_dim, activation='sigmoid')(out_t)

    model = Model(encoder.input, out)
    return model
예제 #15
0
def build_tanh_lstm():
    global n_past, n_future, n_features
    input = keras.Input(shape=(n_past, int(n_features)))
    # x = layers.LSTM(200, activation='relu', input_shape=(n_past, n_features),return_sequences=False)(input)
    x = layers.CuDNNLSTM(1000)(input)
    # x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(500, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(n_future, activation='tanh')(x)
    model = keras.Model(inputs=[input], outputs=x)
    model.compile(loss='mse', optimizer='adam')
    model.summary()
    return model
예제 #16
0
def pLSTM(input, width, bn=False, cudnn=tf_cuda_available(), **kwargs):
    if bn:
        print('WARNING: Batch normalisation can be unstable with LSTM layers')
    # TODO Test batch normalisation: Does not always work
    if cudnn:
        output = kl.CuDNNLSTM(width, return_sequences=True, **kwargs)(input)
    else:
        output = kl.LSTM(width,
                         activation='tanh',
                         recurrent_activation='sigmoid',
                         return_sequences=True,
                         **kwargs)(input)  # For compatibility with CuDNNLSTM
    # l_out = kl.LeakyReLU(alpha=0.3)(l_out) # TODO Makes it unstable. tanh works though
    return output
예제 #17
0
def PretrainedLSTM(save_path,
                   input_layer=None,
                   return_sequences=False,
                   load_weights=True):
    """

        :params:
            - save_path : Folder where pretrained files have been saved
            - input_layer (optional): Should be keras Input layer with tf.string input

        :output:
            - keras model

        NOTE: you have to call tf.tables_initializer().run() somewhere before fitting
        the data.

    """
    assert os.path.exists(save_path), "{} doesn't exist".format(save_path)
    config = json.load(open(os.path.join(save_path, "config.json")))
    weights = pickle.load(open(os.path.join(save_path, "weights.pkl"), "rb"))
    vocab_file = os.path.join(save_path, "vocab.txt")

    #config
    max_vocab_size = config["max_vocab_size"]
    embed_size = config["embed_size"]
    hidden_size = config["hidden_size"]
    num_layers = config["num_layers"]

    if input_layer == None:
        input_layer = layers.Input(shape=(None, ), dtype="string")

    lookup_vocab = layers.Lambda(lambda x: lookup_layer(x, vocab_file))
    input_layer_idx = lookup_vocab(input_layer)

    embeded_input = layers.Embedding(max_vocab_size, embed_size,
                                     weights=weights["embedding"] if load_weights else None)\
                                     (input_layer_idx)
    embeded_input = layers.Dropout(0.3)(embeded_input)
    rnn_input = embeded_input
    for i in range(num_layers):
        rnn_output = layers.CuDNNLSTM(
            units=hidden_size,
            return_sequences=return_sequences,
            weights=weights["lstm"][i] if load_weights else None)(rnn_input)

    model = Model(inputs=[input_layer], outputs=[rnn_output])
    return model
예제 #18
0
def keras_model_fn(model_config, vocab_size, embedding_size, embeddings):
    """GPU version of Stacked Bi-LSTM and Bi-GRU with Two Fasttext
    """
    ## hyperparams
    model_name = model_config['model_name']
    num_class = model_config['num_class']
    lstm_hs = model_config['lstm_hs']
    gru_hs = model_config['gru_hs']
    learning_rate = model_config['learning_rate']

    ## build model - , weights=[embeddings[1]]
    inputs = ks.Input(shape=(None, ), dtype='int32', name='inputs')
    embedded_sequences_ft1 = layers.Embedding(vocab_size,
                                              embedding_size,
                                              trainable=True,
                                              mask_zero=False)(inputs)
    embedded_sequences_ft2 = layers.Embedding(vocab_size,
                                              embedding_size,
                                              trainable=True,
                                              mask_zero=False)(inputs)
    concat_embed = layers.concatenate(
        [embedded_sequences_ft1, embedded_sequences_ft2])
    concat_embed = layers.SpatialDropout1D(0.5)(concat_embed)
    x = layers.Bidirectional(layers.CuDNNLSTM(
        lstm_hs, return_sequences=True))(concat_embed)
    x, x_h, x_c = layers.Bidirectional(
        layers.CuDNNGRU(gru_hs, return_sequences=True, return_state=True))(x)
    x_1 = layers.GlobalMaxPool1D()(x)
    x_2 = layers.GlobalAvgPool1D()(x)
    x_out = layers.concatenate([x_1, x_2, x_h])
    x_out = layers.BatchNormalization()(x_out)
    outputs = layers.Dense(num_class, activation='softmax',
                           name='outputs')(x_out)  # outputs
    model = ks.Model(inputs, outputs, name=model_name)

    ## compile
    model.compile(loss='categorical_crossentropy',
                  optimizer=ks.optimizers.Adam(lr=learning_rate,
                                               clipnorm=.25,
                                               beta_1=0.7,
                                               beta_2=0.99),
                  metrics=[
                      'categorical_accuracy',
                      ks.metrics.TopKCategoricalAccuracy(k=3)
                  ])  # metric what?
    return model
예제 #19
0
def load(input_shape, output_shape, cfg):
    nb_lstm_states = int(cfg['nb_lstm_states'])

    inputs = KL.Input(shape=input_shape)
    x = KL.CuDNNLSTM(units=nb_lstm_states, unit_forget_bias=True)(inputs)

    x = KL.Dense(512)(x)
    x = KL.Activation('relu')(x)
    x = KL.Dropout(0.2)(x)

    x = KL.Dense(256)(x)
    x = KL.Activation('relu')(x)
    x = KL.Dropout(0.3)(x)

    mu = KL.Dense(1)(x)
    std = KL.Dense(1)(x)
    activation_fn = get_activation_function_by_name(cfg['activation_function'])
    std = KL.Activation(activation_fn, name="exponential_activation")(std)

    output = KL.Concatenate(axis=-1)([std, mu])
    model = KM.Model(inputs=[inputs], outputs=[output])

    return model
예제 #20
0
    def __init__(self,
                 dim_z,
                 seq_len,
                 dim_y=1,
                 dim_u=0,
                 rnn_units=32,
                 no_use_cudnn_rnn=True,
                 **kwargs):
        super(DeepState, self).__init__()

        self.seq_len = seq_len
        self.dim_z = dim_z
        self.dim_y = dim_y

        # Create model
        if no_use_cudnn_rnn:
            self.rnn = layers.LSTM(rnn_units, return_sequences=True)
        else:
            self.rnn = layers.CuDNNLSTM(rnn_units, return_sequences=True)

        self.A = layers.Dense(dim_z * dim_z)
        self.C = layers.Dense(dim_z)
        self.Q = layers.Dense(dim_z * dim_z)
        self.R = layers.Dense(dim_y * dim_y)
        self.mu = layers.Dense(dim_z)
        self.sigma = layers.Dense(dim_z * dim_z)

        self._alpha_sq = tf.constant(1.,
                                     dtype=tf.float32)  # fading memory control
        self.M = 0  # process-measurement cross correlation

        # identity matrix
        self._I = tf.eye(dim_z, name='I')

        self.state = kwargs.pop('state', None)
        self.log_likelihood = None
예제 #21
0
def language_model_graph(input_tokens, output_tokens,
                         initial_state, num_layers,
                         max_vocab_size, vocab_freqs,
                         batch_size, embed_size,
                         hidden_size, dropout,
                         optimizer,
                         num_candidate_samples,
                         maxlen, clip, 
                         type_="rnn"):
    """
        This creates language model tensorflow graph. It takes placeholder
        for input tokens, output_tokens (target), initial state for LSTM layers.
        Lanugage model graph has Embedding Layer followed by LSTM layers. Loss
        is calculated using sampled softmax layer of tensorflow.

        :params:
            - input_tokens: Placeholder for input tokens  [shape:(batch_size, None)]
            - output_tokens: Placeholder for output tokens (used as target)
                                [shape:(batch_size, None)]
            - initial_state: Initial states placeholder for feeding state in LSTM
                                Layers [shape:(num_layers, batch_size, hidden_size)]
            - num_layers: Number of LSTM Layers
            - max_vocab_size: Maximum Vocabulary size
            - vocab_freqs: Frequency of words
            - batch_size: Batch Size (should not be none)
            - embed_size: Embedding Dimensions
            - hidden_size: Hidden size of LSTM layers
            - dropout: Dropout to keep between Layers, same dropout is applied after
                        Embedding as well as between and after LSTM Layers
            - num_candidate_samples: Candidate Samples to consider for Sampled softmax
                            -1 to calculate complete softmax
            - maxlen: Sequence length of examples (bptt)
            - clip: clip gradients by `clip`

        :returns:
            - train_op: Training Op Tensorflow
            - training_flag: Var for training flag
            - sampled_loss: Sampled Loss Variable
            - loss: Complete Loss Variable
            - final_state: Output State of LSTMs
            - weights: Dictionay containing weights of Embedding and LSTM layers
            - learning_rate: Learning Rate Variable
    """

    bptt = tf.shape(input_tokens)[1]
    training_flag = tf.Variable(True)
    learning_rate = tf.Variable(20.0)
    embedding_layer = layers.Embedding(max_vocab_size, embed_size)
    rnn_layers = []

    for i in range(num_layers):
        rnn_layers.append(layers.CuDNNLSTM(units=hidden_size,
                                     return_sequences=True,
                                     return_state=True))

    embedded_input = embedding_layer(input_tokens)
    embedded_input = tf.layers.dropout(
                                    embedded_input ,
                                    rate=dropout,
                                    training=training_flag,
                                )

    states = []
    rnn_input = embedded_input
    input_state_cs =  initial_state[0]
    input_state_hs = initial_state[1]
    
    if type_ == "rnn":
        final_state_cs = []
        final_state_hs = []
        for i in range(num_layers):
            state_c, state_h = input_state_cs[i], input_state_hs[i]
            rnn_outputs = rnn_layers[i](rnn_input, initial_state=(state_c, state_h))
            rnn_output, final_state_c, final_state_h = rnn_outputs
            rnn_output = tf.layers.dropout(
                                            rnn_output ,
                                            rate=dropout,
                                            training=training_flag,
                                            noise_shape=[batch_size, 1, hidden_size]
                                        )

            final_state_cs.append(final_state_c)
            final_state_hs.append(final_state_h)

        final_state_c = tf.stack(final_state_cs, 0)
        final_state_h = tf.stack(final_state_hs, 0)

        final_state = (final_state_c, final_state_h)
    elif type_ == "gcnn":
        rnn_output = _gcnn_block(rnn_input)
        final_state = (input_state_cs, input_state_hs)
        rnn_output = layers.Dense(hidden_size, activation='relu')(rnn_output)
    # rnn_output = tf.layers.dropout(
    #                                 rnn_output ,
    #                                 rate=dropout,
    #                                 training=training_flag,
    #                                 noise_shape=[batch_size, 1, embed_size]
    #                             )

    weight = embedding_layer.weights[0]
    weight = tf.transpose(weight, [1, 0])
    # weight = None
    with tf.variable_scope("loss"):
        sampled_loss = _sampled_lm_loss(rnn_output, output_tokens,
                             max_vocab_size,
                             vocab_freqs=vocab_freqs,
                             num_candidate_samples=num_candidate_samples,
                             weight=weight)

    with tf.variable_scope("loss", reuse=True):
        loss = _sampled_lm_loss(rnn_output, output_tokens,
                             max_vocab_size,
                             vocab_freqs=vocab_freqs,
                             num_candidate_samples=-1,
                             weight=weight)

    # softmax = AdaptiveSoftmax(hidden_size, cutoff=[2800, 20000, 76000])
    # loss, _ = sampled_loss, _ = softmax.loss(rnn_output, output_tokens)
    with tf.variable_scope("optimizer"):
        # sampled_loss = loss
        t_vars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(sampled_loss*maxlen, t_vars),
                                                        clip)
        if optimizer == "adam":
            train_op = tf.compat.v1.train.AdamOptimizer(learning_rate).apply_gradients(zip(grads, t_vars))
        elif optimizer == "sgd":
            train_op = tf.train.GradientDescentOptimizer(learning_rate).apply_gradients(zip(grads, t_vars))
        else:
            train_op = tf.compat.v1.train.MomentumOptimizer(learning_rate, momentum=0.9).apply_gradients(zip(grads, t_vars))


    # Extract Weights
    weights = {}
    weights["embedding"] = embedding_layer.weights
    weights["lstm"] = [rnn_layer.weights for rnn_layer in rnn_layers]

    return train_op, training_flag, sampled_loss, loss,  final_state, weights,\
                learning_rate
def main(embedding_dim, n_neuron, lrate, remove_stop_words, n_rec_layer):

    nltk.download('stopwords')
    nltk.download('punkt')
    start = time.time()
    seed = 30
    random.seed(seed)
    np.random.seed(seed)
    tf.compat.v1.set_random_seed(seed)
    train_percent = 0.7
    val_percent = 0.15

    stemming = False

    if remove_stop_words == "F":
        words_to_remove = ["\n", "\s"]
    else:
        words_to_remove = stopwords.words("english") + ["\n", "\s"]

    path = "poems_reordered_further_cleaned.csv"

    df = pd.read_csv(path)

    #shuffle dataset
    df = df.sample(frac=1, random_state=seed).reset_index(drop=True)

    all_words = []

    count = 0

    max = 0

    for i in range(0, df.shape[0]):
        line = df.iloc[i, 1]

        words = StemmingUtil.parseTokens(line)
        words = [w for w in words if w not in words_to_remove]

        if stemming:
            words = StemmingUtil.createStems(words)

        newline = " ".join(words)
        df.iloc[i, 1] = newline
        count += len(words)

        if len(words) > max:
            max = len(words)

        #print(len(words))

        all_words = all_words + words

    average_len = int(count / df.shape[0])
    all_words = set(all_words)
    vocab_size = len(all_words)

    #print(max_length)

    max_length = 1000

    train_set, val_set, test_set = split_data(df, train_percent, val_percent,
                                              seed)

    print("train_set shape:", train_set.shape)
    print("test_set shape:", test_set.shape)
    print("val_set shape:", val_set.shape)

    train_set = train_set.to_numpy()
    test_set = test_set.to_numpy()
    val_set = val_set.to_numpy()
    dataset = df.to_numpy()

    labels = list(np.unique(dataset[:, 0]))
    num_label = len(labels)

    X_train = train_set[:, 1:]
    Y_train = train_set[:, 0]

    dummy_Y_train = np.zeros((Y_train.shape[0], num_label))
    for i in range(0, Y_train.shape[0]):
        idx = labels.index(Y_train[i])
        dummy_Y_train[i, idx] = 1

    X_test = test_set[:, 1:]
    Y_test = test_set[:, 0]

    dummy_Y_test = np.zeros((Y_test.shape[0], num_label))
    for i in range(0, Y_test.shape[0]):
        idx = labels.index(Y_test[i])
        dummy_Y_test[i, idx] = 1

    X_val = val_set[:, 1:]
    Y_val = val_set[:, 0]

    dummy_Y_val = np.zeros((Y_val.shape[0], num_label))
    for i in range(0, Y_val.shape[0]):
        idx = labels.index(Y_val[i])
        dummy_Y_val[i, idx] = 1

    #one hot encode the words

    X_train = [one_hot(line[0], vocab_size) for line in X_train]

    X_test = [one_hot(line[0], vocab_size) for line in X_test]

    X_val = [one_hot(line[0], vocab_size) for line in X_val]

    #padding

    X_train = pad_sequences(X_train, maxlen=max_length, padding='post')

    X_test = pad_sequences(X_test, maxlen=max_length, padding='post')

    X_val = pad_sequences(X_val, maxlen=max_length, padding='post')

    if n_rec_layer == 1:
        model = tf.keras.Sequential()
        #Typical nnlm models on google hub have the embedding size of 128.
        #embedding layer is the first layer
        #number of neurons in the embedding layer equals to the number of values in the encoded vector obtained from embedding, i.e. number of words
        #input_length is how many words/units you want to embed
        model.add(
            layers.Embedding(vocab_size,
                             embedding_dim,
                             input_length=max_length))
        model.add(
            layers.Bidirectional(layers.CuDNNLSTM(n_neuron))
        )  #output_dim is the number of neurons in the recurrent layer #256 neurons
        model.add(layers.Dropout(0.5))
        model.add(layers.Dense(num_label, activation='softmax'))
        model.compile(optimizer=tf.keras.optimizers.Adam(lrate),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

    elif n_rec_layer == 2:
        model = tf.keras.Sequential()
        #Typical nnlm models on google hub have the embedding size of 128.
        #embedding layer is the first layer
        #number of neurons in the embedding layer equals to the number of values in the encoded vector obtained from embedding, i.e. number of words
        #input_length is how many words/units you want to embed
        model.add(
            layers.Embedding(vocab_size,
                             embedding_dim,
                             input_length=max_length))
        model.add(
            layers.Bidirectional(
                layers.CuDNNLSTM(n_neuron, return_sequences=True))
        )  #output_dim is the number of neurons in the recurrent layer #256 neurons
        model.add(layers.Dropout(0.5))
        model.add(layers.Bidirectional(layers.CuDNNLSTM(n_neuron)))
        model.add(layers.Dropout(0.5))
        model.add(layers.Dense(num_label, activation='softmax'))
        model.compile(optimizer=tf.keras.optimizers.Adam(lrate),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

    elif n_rec_layer == 3:
        model = tf.keras.Sequential()
        #Typical nnlm models on google hub have the embedding size of 128.
        #embedding layer is the first layer
        #number of neurons in the embedding layer equals to the number of values in the encoded vector obtained from embedding, i.e. number of words
        #input_length is how many words/units you want to embed
        model.add(
            layers.Embedding(vocab_size,
                             embedding_dim,
                             input_length=max_length))
        model.add(
            layers.Bidirectional(
                layers.CuDNNLSTM(n_neuron, return_sequences=True))
        )  #output_dim is the number of neurons in the recurrent layer #256 neurons
        model.add(layers.Dropout(0.5))
        model.add(
            layers.Bidirectional(
                layers.CuDNNLSTM(n_neuron, return_sequences=True)))
        model.add(layers.Dropout(0.5))
        model.add(layers.Bidirectional(layers.CuDNNLSTM(n_neuron)))
        model.add(layers.Dropout(0.5))
        model.add(layers.Dense(num_label, activation='softmax'))
        model.compile(optimizer=tf.keras.optimizers.Adam(lrate),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

    callbacks = [
        EarlyStopping(monitor='val_loss', patience=25),
        ModelCheckpoint(filepath='best_bidirect_LSTM_keras.h5',
                        monitor='val_loss',
                        save_best_only=True)
    ]

    history = model.fit(X_train,
                        dummy_Y_train,
                        epochs=500,
                        callbacks=callbacks,
                        batch_size=128,
                        validation_data=(X_val, dummy_Y_val))

    #run test set
    confusion_matrix = [[0] * num_label for m in range(0, num_label)]

    for instance in range(0, test_set.shape[0]):  #iterate over test cases
        inputs = X_test[instance, :]

        target = dummy_Y_test[instance, :]

        prediction = model.predict_classes(inputs.reshape(1, max_length),
                                           batch_size=None,
                                           verbose=0)

        iactual = np.where(target == 1)[0][0]  #row index
        ipredict = prediction[0]  #column index
        confusion_matrix[iactual][ipredict] += 1

    n_accurate_test = sum(
        [confusion_matrix[idx][idx] for idx in range(len(confusion_matrix))])

    test_accuracy = n_accurate_test / test_set.shape[0]

    print("setting:", "embed =", embedding_dim, "n_neuron =", n_neuron,
          "lrate =", lrate, "remove =", remove_stop_words, "n_rec_layer =",
          n_rec_layer)

    print("test accuracy", test_accuracy)

    setting = "embed_" + str(embedding_dim) + "_neuron_" + str(
        n_neuron) + "_lrate_" + str(lrate) + "_remove_" + str(
            remove_stop_words) + "_layer_" + str(n_rec_layer)

    print_matrix_CI(path, labels, confusion_matrix, test_accuracy, test_set,
                    setting)

    recall(confusion_matrix, labels)

    precision(confusion_matrix, labels)

    #setting = "embed_"+str(embedding_dim)+"_neuron_"+str(n_neuron)+"_lrate_"+str(lrate)+"_remove_"+str(remove_stop_words)+"_layer_"+str(n_rec_layer)

    # Plot training and validation accuracy over time
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('AccuracyPlot_' + setting)
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.show()

    # Plot training and validation loss overtime
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title("ErrorPlot" + setting)
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Val'], loc='upper left')
    plt.show()

    end = time.time()

    print("run time", (end - start) / 60, "min")

    #setting = "embed_"+str(embedding_dim)+"neuron_"+str(n_neuron)+"lrate_"+str(lrate)+"remove_"+str(remove_stop_words)+"layer_"+str(n_rec_layer)

    return test_accuracy
예제 #23
0
def encoder_smate_rdp(in_shape, pool_step):
    input_ = Input(shape=in_shape)  # input: (samples, L, input_dims)
    L = in_shape[0]
    D = in_shape[1]
    # temporal axis
    if (module_name == 'gru'):
        if (tf.test.is_gpu_available()):
            out_t = ll.CuDNNGRU(hidden_dim, return_sequences=True)(input_)
            for i in range(num_layers - 1):
                out_t = ll.CuDNNGRU(hidden_dim, return_sequences=True)(
                    out_t)  # output: (batch_size, timesteps, hidden_dim)
        else:
            out_t = ll.GRU(hidden_dim, return_sequences=True)(input_)
            for i in range(num_layers - 1):
                out_t = ll.GRU(hidden_dim, return_sequences=True)(
                    out_t)  # output: (batch_size, timesteps, hidden_dim)

    elif (module_name == 'lstm'):
        if (tf.test.is_gpu_available()):
            out_t = ll.CuDNNLSTM(hidden_dim, return_sequences=True)(input_)
            for i in range(num_layers - 1):
                out_t = ll.CuDNNLSTM(hidden_dim, return_sequences=True)(
                    out_t)  # output: (batch_size, timesteps, hidden_dim)
        else:
            out_t = ll.LSTM(hidden_dim, return_sequences=True)(input_)
            for i in range(num_layers - 1):
                out_t = ll.LSTM(hidden_dim, return_sequences=True)(
                    out_t)  # output: (batch_size, timesteps, hidden_dim)

    # 1D-CNN
    group_size = int(D * 1.5 / 3)
    in_s = []
    for i in range(3):
        idx_list = rd.sample(range(0, D), group_size)
        idx_array = np.array(idx_list)
        in_s_i = input_[:, :, i:i + group_size]
        in_s.append(in_s_i)

    out_s_11 = Conv1D(128, 8, padding='same',
                      kernel_initializer='he_uniform')(in_s[0])
    out_s_11 = BatchNormalization()(out_s_11)
    out_s_11 = Activation('relu')(out_s_11)

    out_s_12 = Conv1D(128, 8, padding='same',
                      kernel_initializer='he_uniform')(in_s[1])
    out_s_12 = BatchNormalization()(out_s_12)
    out_s_12 = Activation('relu')(out_s_12)

    out_s_13 = Conv1D(128, 8, padding='same',
                      kernel_initializer='he_uniform')(in_s[2])
    out_s_13 = BatchNormalization()(out_s_13)
    out_s_13 = Activation('relu')(out_s_13)

    out_s_1 = [out_s_11, out_s_12, out_s_13]
    #out_s = K.concatenate((out_s_11, out_s_12, out_s_13), axis=0)

    conv1D_2 = Conv1D(256, 5, padding='same', kernel_initializer='he_uniform')
    conv1D_3 = Conv1D(128, 3, padding='same', kernel_initializer='he_uniform')
    batch_norm1 = BatchNormalization()
    batch_norm2 = BatchNormalization()
    activ_relu1 = Activation('relu')
    activ_relu2 = Activation('relu')

    s_outs = []
    for s in out_s_1:
        s_out = conv1D_2(s)
        s_out = batch_norm1(s_out)
        s_out = activ_relu1(s_out)

        s_out = conv1D_3(s_out)
        s_out = batch_norm2(s_out)
        s_out = activ_relu2(s_out)

        s_out = AveragePooling1D(pool_size=pool_step,
                                 strides=None,
                                 padding='same')(s_out)  # L * D
        s_outs.append(s_out)
    out_s = ll.Concatenate(axis=-1)(s_outs)  # L * 3D

    #reduce latent space dimension (t & s axis)
    out_t = AveragePooling1D(pool_size=pool_step, strides=None,
                             padding='same')(out_t)

    out = ll.Concatenate(axis=-1)([out_t, out_s])  # 1 * 4D
    out = Dense(128)(out)
    out = BatchNormalization()(out)
    out = ll.LeakyReLU()(out)
    out = Dense(128)(out)
    out = BatchNormalization()(out)  # (samples, L', 128)

    model = Model(inputs=input_, outputs=out)

    return model
예제 #24
0
def encoder_smate(in_shape, pool_step, d_prime, kernels=[8, 5, 3]):
    input_ = Input(shape=in_shape)  # input: (samples, L, input_dims)
    L = in_shape[0]

    # temporal axis
    if (module_name == 'gru'):
        if (tf.test.is_gpu_available()):
            out_t = ll.CuDNNGRU(hidden_dim, return_sequences=True)(input_)
            for i in range(num_layers - 1):
                out_t = ll.CuDNNGRU(hidden_dim, return_sequences=True)(
                    out_t)  # output: (batch_size, timesteps, hidden_dim)
        else:
            out_t = ll.GRU(hidden_dim, return_sequences=True)(input_)
            for i in range(num_layers - 1):
                out_t = ll.GRU(hidden_dim, return_sequences=True)(
                    out_t)  # output: (batch_size, timesteps, hidden_dim)

    elif (module_name == 'lstm'):
        if (tf.test.is_gpu_available()):
            out_t = ll.CuDNNLSTM(hidden_dim, return_sequences=True)(input_)
            for i in range(num_layers - 1):
                out_t = ll.CuDNNLSTM(hidden_dim, return_sequences=True)(
                    out_t)  # output: (batch_size, timesteps, hidden_dim)
        else:
            out_t = ll.LSTM(hidden_dim, return_sequences=True)(input_)
            for i in range(num_layers - 1):
                out_t = ll.LSTM(hidden_dim, return_sequences=True)(
                    out_t)  # output: (batch_size, timesteps, hidden_dim)

    # 1D-CNN
    out_s = spatial_dynamic_block(input_, kernels[0], d_prime)
    out_s = Conv1D(128,
                   kernels[0],
                   padding='same',
                   kernel_initializer='he_uniform')(input_)
    out_s = BatchNormalization()(out_s)
    out_s = Activation('relu')(out_s)

    out_s = spatial_dynamic_block(out_s, kernels[1], 8)
    out_s = Conv1D(256,
                   kernels[1],
                   padding='same',
                   kernel_initializer='he_uniform')(out_s)
    out_s = BatchNormalization()(out_s)
    out_s = Activation('relu')(out_s)

    out_s = spatial_dynamic_block(out_s, kernels[2], 16)
    out_s = Conv1D(128,
                   kernels[2],
                   padding='same',
                   kernel_initializer='he_uniform')(out_s)
    out_s = BatchNormalization()(out_s)
    out_s = Activation('relu')(out_s)  # L * D

    #reduce latent space dimension (t & s axis)
    out_t = AveragePooling1D(pool_size=pool_step, strides=None,
                             padding='same')(out_t)

    out_s = AveragePooling1D(pool_size=pool_step, strides=None,
                             padding='same')(out_s)  # L' * D

    out = ll.Concatenate(axis=-1)([out_t, out_s])  # (samples, L', 128*4 + 128)
    out = Dense(128)(out)
    #out = Dense(128)(out_s)
    out = BatchNormalization()(out)
    out = ll.LeakyReLU()(out)
    out = Dense(128)(out)
    out = BatchNormalization()(out)  # (samples, L', 128)

    model = Model(inputs=input_, outputs=out)

    return model
예제 #25
0
def build_model(
        input_shape, 
        num_classes,
        activation_function, 
        dropout_rate,
        use_batchnorm,
        l2_regularization,
        cnn_layers,
        lstm_units,
        combine_mode,
        fcn_layers):
    ''' Builds a CNN-RNN-FCN classification model
    
    # Parameters
        input_shape (tuple) -- expected input shape
        num_classes (int) -- number of classes
        activation_function (str) -- non linearity to apply between layers
        dropout_rate (float) -- must be between 0 and 1
        use_batchnorm (bool) -- if True, batchnorm layers are added between convolutions
        l2_regularization (float)
        cnn_layers (list) -- list specifying CNN layers. 
                             Each element must be of the form 
                             {filters: 32,  kernel_size: 3, use_maxpool: true}
        lstm_units (int) -- number of hidden units of the lstm
                            if lstm_units is None or 0 the LSTM layer is skipped
        combine_mode (str) -- specifies how the encoding of each image in the sequence 
                              is to be combined. Supports:
                                  concat : outputs are stacked on top of one another
                                  last : only last hidden state is returned
                                  attention : an attention mechanism is used to combine the hidden states
                              
        fcn_layers (list) -- list specifying Dense layers
                             example element: {units: 1024}
    # Returns
        model -- an uncompiled Keras model
    '''
    # Regularizer
    l2_reg = l2(l2_regularization)
    
    # Build a model with the functional API
    inputs = ll.Input(input_shape)
    x = inputs
    
    # Reshape entry if needed
    if len(input_shape) == 3:
        x = ll.Reshape([1] + input_shape)(x)
    elif len(input_shape) < 3:
        raise ValueError(f"Input shape {input_shape} not supported")

    # CNN feature extractor    
    for i, cnn_layer in enumerate(cnn_layers):
        # Extract layer params
        filters = cnn_layer['filters']
        kernel_size = cnn_layer['kernel_size']
        use_maxpool = cnn_layer['use_maxpool']

        # build cnn_layer
        x = ll.TimeDistributed(ll.Conv2D(
                filters, 
                kernel_size, 
                strides=(1, 1), 
                padding='same', 
                data_format=None, 
                dilation_rate=(1, 1), 
                activation=activation_function, 
                use_bias=True, 
                kernel_initializer='glorot_uniform', 
                bias_initializer='zeros', 
                kernel_regularizer=l2_reg, 
                bias_regularizer=l2_reg, 
                activity_regularizer=None, 
                kernel_constraint=None, 
                bias_constraint=None
            ), name=f'conv2D_{i}')(x)
        
        # add maxpool if needed
        if use_maxpool:
            x = ll.TimeDistributed(ll.MaxPooling2D(
                    pool_size=(2, 2), 
                    strides=None, 
                    padding='valid', 
                    data_format=None
                ), name=f'maxpool_{i}')(x)
        
        if use_batchnorm:
            x = ll.TimeDistributed(ll.BatchNormalization(
                    axis=-1, 
                    momentum=0.99, 
                    epsilon=0.001, 
                    center=True, 
                    scale=True, 
                    beta_initializer='zeros', 
                    gamma_initializer='ones', 
                    moving_mean_initializer='zeros', 
                    moving_variance_initializer='ones', 
                    beta_regularizer=None, 
                    gamma_regularizer=None, 
                    beta_constraint=None, 
                    gamma_constraint=None
                ), name=f'batchnorm_{i}')(x)

    
    x = ll.TimeDistributed(ll.Flatten(), name='flatten')(x)
    x = ll.TimeDistributed(ll.Dropout(dropout_rate), name='dropout')(x)

    # LSTM feature combinator
    if lstm_units is not None and lstm_units > 0:
        x = ll.CuDNNLSTM(
                lstm_units,
                kernel_initializer='glorot_uniform',
                recurrent_initializer='orthogonal',
                bias_initializer='zeros',
                unit_forget_bias=True,
                kernel_regularizer=l2_reg,
                recurrent_regularizer=l2_reg,
                bias_regularizer=l2_reg,
                activity_regularizer=None,
                kernel_constraint=None,
                recurrent_constraint=None,
                bias_constraint=None,
                return_sequences=(combine_mode!='last'),
                return_state=False,
                go_backwards=False,
                stateful=False
                )(x)
    
    # Combine output of each sequence
    if combine_mode == 'concat':
        x = ll.Flatten()(x)
    elif combine_mode == 'last':
        if lstm_units is None or lstm_units == 0:    # if no LSTM was used
            x = ll.Lambda(lambda x : x[:,-1,...])(x) # we extract the last element
    elif combine_mode == 'attention':
        attention = ll.TimeDistributed(ll.Dense(1), name='attention_score')(x)
        attention = ll.Flatten()(attention)
        attention = ll.Softmax()(attention)
        x = ll.dot([x, attention], axes=[-2, -1])
    else: raise ValueError(f"Combine mode {combine_mode} not supported")
    
    # FCN classifier    
    for fcn_layer in fcn_layers:
        # extract layer params
        units = fcn_layer['units']
        
        # build layer
        x = ll.Dense(
                units, 
                activation=activation_function, 
                use_bias=True, 
                kernel_initializer='glorot_uniform', 
                bias_initializer='zeros', 
                kernel_regularizer=l2_reg, 
                bias_regularizer=l2_reg, 
                activity_regularizer=None, 
                kernel_constraint=None, 
                bias_constraint=None
            )(x)
        
        x = ll.Dropout(dropout_rate)(x)

    
    prediction = ll.Dense(num_classes, activation='softmax')(x)
    
    # Build model
    model = Model(inputs=inputs, outputs=prediction)
    
    return model
예제 #26
0
 def __init__(self, config):
     super(LSTMEncoder, self).__init__()
     self.config = config
     self.rnn = layers.CuDNNLSTM(units=config.hidden_dim, )
     self.dropout = layers.Dropout(config.dropout)
     self.batch_norm = layers.BatchNormalization()
        ne_labels = mat['testIsNamedEntity'][indices]
        if single_track:
            x_features = x_features[:, :, :5]

print(str(num_hidden) + ' hidden units')

from tensorflow.keras import layers
from tensorflow.keras import regularizers

if train_mode:
    model = tensorflow.keras.Sequential()
    model.add(
        layers.Bidirectional(
            layers.CuDNNLSTM(16,
                             kernel_regularizer=regularizers.l2(beta),
                             recurrent_regularizer=regularizers.l2(beta),
                             bias_regularizer=regularizers.l2(beta),
                             return_sequences=True),
            input_shape=(timesteps, num_input)))  #+ num samples
    model.add(
        layers.Bidirectional(
            layers.CuDNNLSTM(8,
                             kernel_regularizer=regularizers.l2(beta),
                             recurrent_regularizer=regularizers.l2(beta),
                             bias_regularizer=regularizers.l2(beta),
                             return_sequences=True)))
    model.add(
        layers.Bidirectional(
            layers.CuDNNLSTM(8,
                             kernel_regularizer=regularizers.l2(beta),
                             recurrent_regularizer=regularizers.l2(beta),
예제 #28
0
    def retain(ARGS):
        """Create the model"""

        # Define the constant for model saving
        reshape_size = ARGS.emb_size + ARGS.numeric_size
        if ARGS.allow_negative:
            embeddings_constraint = FreezePadding()
            beta_activation = 'tanh'
            output_constraint = None
        else:
            embeddings_constraint = FreezePadding_Non_Negative()
            beta_activation = 'sigmoid'
            output_constraint = non_neg()

        def reshape(data):
            """Reshape the context vectors to 3D vector"""
            return K.reshape(x=data, shape=(K.shape(data)[0], 1, reshape_size))

        # Code Input
        codes = L.Input((None, None), name='codes_input')
        inputs_list = [codes]
        # Calculate embedding for each code and sum them to a visit level
        codes_embs_total = L.Embedding(
            ARGS.num_codes + 1,
            ARGS.emb_size,
            name='embedding'
            # BUG: embeddings_constraint not supported
            # https://github.com/tensorflow/tensorflow/issues/33755
            # ,embeddings_constraint=embeddings_constraint
        )(codes)
        codes_embs = L.Lambda(lambda x: K.sum(x, axis=2))(codes_embs_total)
        # Numeric input if needed
        if ARGS.numeric_size > 0:
            numerics = L.Input((None, ARGS.numeric_size), name='numeric_input')
            inputs_list.append(numerics)
            full_embs = L.concatenate([codes_embs, numerics], name='catInp')
        else:
            full_embs = codes_embs

        # Apply dropout on inputs
        full_embs = L.Dropout(ARGS.dropout_input)(full_embs)

        # Time input if needed
        if ARGS.use_time:
            time = L.Input((None, 1), name='time_input')
            inputs_list.append(time)
            time_embs = L.concatenate([full_embs, time], name='catInp2')
        else:
            time_embs = full_embs

        # Setup Layers
        # This implementation uses Bidirectional LSTM instead of reverse order
        #    (see https://github.com/mp2893/retain/issues/3 for more details)

        # If training on GPU and Tensorflow use CuDNNLSTM for much faster training
        if glist:
            alpha = L.Bidirectional(L.CuDNNLSTM(ARGS.recurrent_size,
                                                return_sequences=True),
                                    name='alpha')
            beta = L.Bidirectional(L.CuDNNLSTM(ARGS.recurrent_size,
                                               return_sequences=True),
                                   name='beta')
        else:
            alpha = L.Bidirectional(L.LSTM(ARGS.recurrent_size,
                                           return_sequences=True,
                                           implementation=2),
                                    name='alpha')
            beta = L.Bidirectional(L.LSTM(ARGS.recurrent_size,
                                          return_sequences=True,
                                          implementation=2),
                                   name='beta')

        alpha_dense = L.Dense(1, kernel_regularizer=l2(ARGS.l2))
        beta_dense = L.Dense(ARGS.emb_size + ARGS.numeric_size,
                             activation=beta_activation,
                             kernel_regularizer=l2(ARGS.l2))

        # Compute alpha, visit attention
        alpha_out = alpha(time_embs)
        alpha_out = L.TimeDistributed(alpha_dense,
                                      name='alpha_dense_0')(alpha_out)
        alpha_out = L.Softmax(axis=1)(alpha_out)
        # Compute beta, codes attention
        beta_out = beta(time_embs)
        beta_out = L.TimeDistributed(beta_dense, name='beta_dense_0')(beta_out)
        # Compute context vector based on attentions and embeddings
        c_t = L.Multiply()([alpha_out, beta_out, full_embs])
        c_t = L.Lambda(lambda x: K.sum(x, axis=1))(c_t)
        # Reshape to 3d vector for consistency between Many to Many and Many to One implementations
        contexts = L.Lambda(reshape)(c_t)

        # Make a prediction
        contexts = L.Dropout(ARGS.dropout_context)(contexts)
        output_layer = L.Dense(1,
                               activation='sigmoid',
                               name='dOut',
                               kernel_regularizer=l2(ARGS.l2),
                               kernel_constraint=output_constraint)

        # TimeDistributed is used for consistency
        # between Many to Many and Many to One implementations
        output = L.TimeDistributed(output_layer,
                                   name='time_distributed_out')(contexts)
        # Define the model with appropriate inputs
        model = Model(inputs=inputs_list, outputs=[output])

        return model
예제 #29
0
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon


original_dim = 256
intermediate_dim = 512
latent_dim = 8

# Define encoder model.
original_inputs = tf.keras.Input(shape=(original_dim,1), name='encoder_input')
input_err = Input(shape=(256,1))
x = layers.CuDNNLSTM(intermediate_dim, return_sequences=False)(original_inputs)
z_mean = layers.Dense(latent_dim, name='z_mean')(x)
z_log_var = layers.Dense(latent_dim, name='z_log_var')(x)
z = Sampling()((z_mean, z_log_var))
encoder = tf.keras.Model(inputs=original_inputs, outputs=z, name='encoder')

# Define decoder model.
latent_inputs = tf.keras.Input(shape=(latent_dim,), name='z_sampling')
x = layers.RepeatVector(original_dim)(latent_inputs)
x = layers.CuDNNLSTM(intermediate_dim, return_sequences=True)(x)
outputs = layers.TimeDistributed(layers.Dense(1))(x)
decoder = tf.keras.Model(inputs=latent_inputs, outputs=outputs, name='decoder')

# Define VAE model.
outputs = decoder(z)
vae = tf.keras.Model(inputs=[original_inputs, input_err], outputs=outputs, name='vae')
예제 #30
0
#print X_test

y_train = load_y(y_train_path)
y_test = load_y(y_test_path)
###

model = tf.keras.Sequential([
   # relu activation
   layers.Dense(n_hidden, activation='relu', 
       kernel_initializer='random_normal', 
       bias_initializer='random_normal',
       batch_input_shape=(batch_size, n_steps, n_input)
   ),
   
   # cuDNN
   layers.CuDNNLSTM(n_hidden, return_sequences=True,  unit_forget_bias=1.0),
   layers.CuDNNLSTM(n_hidden,  unit_forget_bias=1.0),
   
   # layers.LSTM(n_hidden, return_sequences=True,  unit_forget_bias=1.0),
   # layers.LSTM(n_hidden,  unit_forget_bias=1.0),

   layers.Dense(n_classes, kernel_initializer='random_normal', 
       bias_initializer='random_normal',
       kernel_regularizer=tf.keras.regularizers.l2(lambda_loss_amount),
       bias_regularizer=tf.keras.regularizers.l2(lambda_loss_amount),
       activation='softmax'
   )
])

model.compile(
   optimizer=tf.keras.optimizers.Adam(lr=learning_rate, decay=decay_rate),