def _test_save_load(self, attention):
     inputs = keras.layers.Input(shape=(None,), name='Input')
     embd = keras.layers.Embedding(input_dim=3,
                                   output_dim=5,
                                   mask_zero=True,
                                   name='Embedding')(inputs)
     lstm = keras.layers.Bidirectional(keras.layers.LSTM(units=7,
                                                         return_sequences=True),
                                       name='Bi-LSTM')(embd)
     if attention.return_attention:
         layer, weights = attention(lstm)
     else:
         layer = attention(lstm)
     dense = keras.layers.Dense(units=2, activation='softmax', name='Softmax')(layer)
     loss = {'Softmax': 'sparse_categorical_crossentropy'}
     if attention.return_attention:
         outputs = [dense, weights]
         loss[attention.name] = 'mse'
     else:
         outputs = dense
     model = keras.models.Model(inputs=inputs, outputs=outputs)
     model.compile(optimizer='adam', loss=loss)
     model_path = os.path.join(tempfile.gettempdir(), 'keras_weighted_att_test_sl_%f.h5' % np.random.random())
     model.save(model_path)
     model = keras.models.load_model(model_path, custom_objects=Attention.get_custom_objects())
     model.summary(line_length=100)
     if attention.return_attention:
         self.assertEqual(2, len(model.outputs))
     else:
         self.assertEqual(1, len(model.outputs))
Example #2
0
def lstm(x_dim) -> Model:
    model = Sequential()
    model.add(InputLayer(input_shape=(x_dim, 1)))
    model.add(LSTM(256, return_sequences=True))
    model.add(SeqWeightedAttention())

    return model
Example #3
0
def SARNNKerasCPU(embeddingMatrix=None,
                  embed_size=400,
                  max_features=20000,
                  maxlen=100):
    inp = Input(shape=(maxlen, ))
    x = Embedding(input_dim=max_features,
                  output_dim=embed_size,
                  weights=[embeddingMatrix])(inp)
    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = SeqSelfAttention(
        attention_type=SeqSelfAttention.ATTENTION_TYPE_MUL,
        attention_regularizer_weight=1e-4,
    )(x)
    x = Dropout(0.5)(x)
    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = SeqWeightedAttention()(x)
    x = Dropout(0.5)(x)
    x = Dense(64, activation="relu")(x)
    x = Dropout(0.5)(x)
    x = Dense(1, activation="sigmoid")(x)
    model = Model(inputs=inp, outputs=x)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy', f1])
    return model
Example #4
0
def HARNN(embeddingMatrix=None,
          embed_size=400,
          max_features=20000,
          max_nb_sent=3,
          max_sent_len=40,
          use_fasttext=False,
          trainable=True,
          use_additive_emb=False):
    if use_fasttext:
        sent_inp = Input(shape=(max_sent_len, embed_size))
        embed = sent_inp
    else:
        sent_inp = Input(shape=(max_sent_len, ))
        embed = Embedding(input_dim=max_features,
                          output_dim=embed_size,
                          weights=[embeddingMatrix],
                          trainable=trainable)(sent_inp)

    if use_additive_emb:
        embed = AdditiveLayer()(embed)
        embed = Dropout(0.5)(embed)

    word_lstm = Bidirectional(CuDNNLSTM(128, return_sequences=True))(embed)
    word_att = SeqWeightedAttention()(word_lstm)
    word_att = Dropout(0.5)(word_att)
    sent_encoder = Model(sent_inp, word_att)
    plot_model(sent_encoder,
               to_file='{}.png'.format("HARNN1"),
               show_shapes=True,
               show_layer_names=True)

    doc_input = Input(shape=(max_nb_sent, max_sent_len))
    doc_encoder = TimeDistributed(sent_encoder)(doc_input)
    sent_lstm = Bidirectional(CuDNNLSTM(128,
                                        return_sequences=True))(doc_encoder)
    sent_att = SeqWeightedAttention()(sent_lstm)
    sent_att = Dropout(0.5)(sent_att)
    preds = Dense(1, activation="sigmoid")(sent_att)
    model = Model(inputs=doc_input, outputs=preds)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy', f1])
    return model
def new_architecture(x_len, vocab_size, layers, bi, att, cells):
    inputs = keras.layers.Input(shape=(
        x_len,
        vocab_size,
    ), name='Input')

    prev = inputs
    for i in range(layers):
        ret_seq = True
        if i == layers - 1 and att == False:
            ret_seq = False

        this_layer = keras.layers.LSTM(cells,
                                       dropout=0.4,
                                       name='LSTM_%s' % i,
                                       return_sequences=ret_seq)
        if bi:
            this_layer = Bidirectional(this_layer, name='bi_%s' % i)
        prev = this_layer(prev)

    if att:
        attention = SeqWeightedAttention(return_attention=False,
                                         name='Attention')
        prev = attention(prev)

    dense = keras.layers.Dense(vocab_size,
                               activation='softmax',
                               name="dense_outputs")(prev)

    model = keras.Model(inputs=inputs, outputs=[dense])

    # lstm = Bidirectional(
    #     keras.layers.LSTM(
    #         cells,
    #         dropout=0.4,
    #         name="LSTM",
    #         return_sequences=True),
    #     name="bi1"
    # )(inputs)

    # lstm2 = Bidirectional(
    #     keras.layers.LSTM(
    #         cells,
    #         dropout=0.4,
    #         name="LSTM2",
    #         return_sequences=True),
    #     name="bi2"
    # )(lstm)
    return model
def build_model_attention():
    model = Sequential()
    model.add(
        Bidirectional(
            LSTM(units=128,
                 dropout=0.5,
                 recurrent_dropout=0.5,
                 return_sequences=True)))
    #model.add(Bidirectional(LSTM(128,recurrent_dropout=0.5)))
    model.add(SeqWeightedAttention())
    model.add(Dense(1, activation=sigmoid))
    model.compile(loss=losses.binary_crossentropy,
                  optimizer=Adam(1e-5),
                  metrics=['accuracy'])
    return model
def get_model(args):
    model = None
    modelname = args.model_id
    # workaround for getting vis for attention
    if modelname == "model_folk100k_melody_2lstm32_attention":
        # (100000, 64, 58)
        model = att_model(32, False, 2, True)
    elif modelname == "model_folk100k_melody_bi3lstm64_attention":
        model = att_model(64, True, 3, True)
    else:
        json_model = open(os.path.join(modelname, "model.json"), "r").read()
        model = keras.models.model_from_json(
            json_model,
            custom_objects=SeqWeightedAttention.get_custom_objects())
    model.load_weights(os.path.join(modelname, "model.h5"))
    print(model.summary(line_length=100))
    return model
def att_model(cells, bi, layers, att):
    """
    hardcoded model for vis. attention
    """
    # cells = 64
    vocab_size = 58
    # bi = True
    # layers=3
    # att=True
    inputs = keras.layers.Input(shape=(
        63,
        58,
    ), name='Input')

    prev = inputs
    for i in range(layers):
        ret_seq = True
        if i == layers - 1 and att == False:
            ret_seq = False

        this_layer = keras.layers.LSTM(cells,
                                       dropout=0.4,
                                       name='LSTM_%s' % i,
                                       return_sequences=ret_seq)
        if bi:
            this_layer = keras.layers.Bidirectional(this_layer,
                                                    name='bi_%s' % i)
        prev = this_layer(prev)

    attention = SeqWeightedAttention(return_attention=True, name='Attention')
    attention_layer = attention(prev)
    attention_layer, attention = attention_layer

    dense = keras.layers.Dense(vocab_size,
                               activation='softmax',
                               name="dense_outputs")(attention_layer)

    outputs = [dense, attention]
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam',
                  loss={
                      'dense_outputs': 'categorical_crossentropy',
                      'Attention': attention_loss(1e-4)
                  })
    return model
Example #9
0
def SARNNKeras(embeddingMatrix=None,
               embed_size=400,
               max_features=20000,
               maxlen=100,
               rnn_type=CuDNNLSTM,
               use_fasttext=False,
               trainable=True,
               use_additive_emb=False):
    if use_fasttext:
        inp = Input(shape=(maxlen, embed_size))
        x = inp
    else:
        inp = Input(shape=(maxlen, ))
        x = Embedding(input_dim=max_features,
                      output_dim=embed_size,
                      weights=[embeddingMatrix],
                      trainable=trainable)(inp)

    if use_additive_emb:
        x = AdditiveLayer()(x)
        x = Dropout(0.5)(x)

    x = Bidirectional(rnn_type(128, return_sequences=True))(x)
    x = SeqSelfAttention(
        # attention_type = SeqSelfAttention.ATTENTION_TYPE_MUL,
        attention_regularizer_weight=1e-4, )(x)
    # x = LayerNormalization()(x)
    x = Dropout(0.5)(x)

    x = Bidirectional(rnn_type(128, return_sequences=True))(x)
    x = SeqWeightedAttention()(x)
    # x = LayerNormalization()(x)
    x = Dropout(0.5)(x)

    x = Dense(64, activation="relu")(x)
    x = Dropout(0.5)(x)
    x = Dense(1, activation="sigmoid")(x)
    model = Model(inputs=inp, outputs=x)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy', f1])
    return model
def get_model(args, dshape):
    model_dir = get_model_dir(args)

    model = None
    loss = 'categorical_crossentropy'
    optimizer = keras.optimizers.Adam(lr=0.005)

    if args.new:
        print('generating NEW model...')
        model = new_architecture(dshape[1] - 1, dshape[2], args.layers,
                                 args.bi, args.att, args.cells)
        # copy arch to folder
        shutil.copy('architecture.py', model_dir)
        model_json = model.to_json()
        model_json_path = os.path.join(model_dir, "model.json")
        print('storing model json in %s' % model_json_path)
        with open(model_json_path, "w") as json_file:
            json_file.write(model_json)
        # delete epoch counters
        delete_epoch_counters(model_dir)
        model.compile(loss=loss, optimizer=optimizer)

    else:
        print('using existing model...')
        model_json_path = os.path.join(model_dir, "model.json")
        model = keras.models.model_from_json(
            open(model_json_path, "r").read(),
            custom_objects=SeqWeightedAttention.get_custom_objects())

        model_weights_path = os.path.join(model_dir, "model.h5")
        print('loading existing weights from %s...' % model_weights_path)
        model.load_weights(model_weights_path)
        model.compile(loss=loss, optimizer=optimizer)

    print(model.summary())

    return model, model_dir
def build_model(embeddings_size):
    # Inputs
    q1_embeddings_input = Input(shape=(
        None,
        embeddings_size,
    ),
                                name='q1_word_embeddings')
    q2_embeddings_input = Input(shape=(
        None,
        embeddings_size,
    ),
                                name='q2_word_embeddings')

    # RNN
    word_lstm1 = Bidirectional(
        ONLSTM(units=256,
               chunk_size=8,
               dropout=args.dropout_rate,
               return_sequences=True,
               kernel_initializer='glorot_normal'))
    q1_word_lstm1 = word_lstm1(q1_embeddings_input)
    q2_word_lstm1 = word_lstm1(q2_embeddings_input)

    word_lstm2 = Bidirectional(
        ONLSTM(units=256,
               chunk_size=8,
               dropout=args.dropout_rate,
               return_sequences=True,
               kernel_initializer='glorot_normal'))
    q1_word_lstm2 = word_lstm2(q1_word_lstm1)
    q2_word_lstm2 = word_lstm2(q2_word_lstm1)

    word_attention = SeqWeightedAttention()
    q1_word_attention = word_attention(q1_word_lstm2)
    q2_word_attention = word_attention(q2_word_lstm2)

    # Concatenate
    subtract = Subtract()([q1_word_attention, q2_word_attention])
    multiply_subtract = Multiply()([subtract, subtract])

    # Fully Connected
    dense1 = Dropout(args.dropout_rate)(
        Dense(units=1024,
              activation='relu',
              kernel_initializer='glorot_normal')(multiply_subtract))
    dense2 = Dropout(
        args.dropout_rate)(Dense(units=512,
                                 activation='relu',
                                 kernel_initializer='glorot_normal')(dense1))
    dense3 = Dropout(
        args.dropout_rate)(Dense(units=256,
                                 activation='relu',
                                 kernel_initializer='glorot_normal')(dense2))
    dense4 = Dropout(
        args.dropout_rate)(Dense(units=128,
                                 activation='relu',
                                 kernel_initializer='glorot_normal')(dense3))

    # Predict
    output = Dense(units=1,
                   activation='sigmoid',
                   kernel_initializer='glorot_normal')(dense4)

    model = Model([q1_embeddings_input, q2_embeddings_input], output)

    model.compile(optimizer=Adam(lr=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy', f1])
    model.summary()

    return model
Example #12
0
for category in categories:
    x = []
    for sentence in brown.sents(categories=[category]):
        _tokens = tokenizer.texts_to_sequences(sentence[:10])
        if len(_tokens) < 10:
            _tokens = _tokens + [[0]] * (10 - len(_tokens))
        x.append(np.array(_tokens))
        if len(x) == 3:
            X.append(np.array(x))
            x = []
            Y.append(to_categorical(categories.index(category), num_classes=3))

print(np.array(X).shape)

input = Input(shape=(3, 10, 1))
layer = TimeDistributed(lstm(10))(input)
layer = LSTM(256, return_sequences=True)(layer)
layer = SeqWeightedAttention()(layer)
layer = Dense(3)(layer)
layer = Activation('softmax')(layer)

model = Model(input, layer)

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['categorical_accuracy'])

model.fit(np.array(X),
          np.array(Y),
          epochs=10)
Example #13
0
def create_model_template(layers: int,
                          units,
                          shape,
                          use_attention_first=False,
                          use_attention_middle=False,
                          lr=3e-4,
                          optimizer='Adam',
                          dropout=0.2,
                          dropout_last_only=False):
    model_metrics = [
        metrics.BinaryAccuracy(name='acc'),
        metrics.Precision(name='precision'),
        metrics.Recall(name='recall'),
        metrics.AUC(name='auc')
    ]

    model = Sequential()

    if not isinstance(units, list):
        units = [units] * layers
    elif len(units) < layers:
        units = [units[0]] * layers

    model.add(
        Bidirectional(LSTM(units[0],
                           return_sequences=layers > 1 or use_attention_first),
                      input_shape=shape))
    # model.add(Bidirectional(tfa.rnn.cell.LayerNormLSTMCell(units[0], return_sequences=layers > 1), input_shape=shape))

    if use_attention_first:
        if layers > 1:
            model.add(SeqSelfAttention())
        else:
            model.add(SeqWeightedAttention())

    for i in range(1, layers):
        if use_attention_middle:
            model.add(SeqSelfAttention())

        if dropout_last_only is False:
            model.add(Dropout(dropout))

        model.add(
            Bidirectional(
                LSTM(units[i],
                     return_sequences=layers > i + 1 or use_attention_middle)))

    if use_attention_middle:
        model.add(SeqWeightedAttention())

    model.add(Dropout(dropout))

    model.add(Dense(1, activation='sigmoid'))

    if optimizer == 'SGD':
        optimizer = SGD(lr=lr)
    if optimizer == 'RMSprop':
        optimizer = RMSprop(lr=lr)
    if optimizer == 'Adadelta':
        optimizer = Adadelta(lr=lr)
    if optimizer == 'Adagrad':
        optimizer = Adagrad(lr=lr)
    if optimizer == 'Nadam':
        optimizer = Nadam(lr=lr)
    if optimizer == 'Adamax':
        optimizer = Adamax(lr=lr)
    else:
        optimizer = Adam(lr=lr)

    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=model_metrics)

    return model
Example #14
0
    :param text_list:
    :param token_dict:
    :return:
    """
    X1 = []
    X2 = []
    tokenizer = Tokenizer(token_dict)
    for line in text_list:
        x1, x2 = tokenizer.encode(first=line)
        X1.append(x1)
        X2.append(x2)
    X1 = sequence.pad_sequences(X1, maxlen=maxlen, padding='post', truncating='post')
    X2 = sequence.pad_sequences(X2, maxlen=maxlen, padding="post", truncating='post')
    return [X1, X2]
if __name__=="__main__":
    maxlen=100
    text_list=["TW 0:02 / 41:54 Mind Your Language Season 3 Episode 2 Who Loves Ya Baby? | Funny TV Show (GM)","I have a dream"]
    token_dict=get_token_dict(dict_path)
    [X1,X2]=get_encode(text_list,token_dict)
    print(X1)
    wordvec=build_bert_model(X1,X2)
    print(wordvec)
    #with open("test_keras_bert2.yml","r") as f:
    #    yaml_string=yaml.load(f)
    #model=keras.models.load_model(yaml_string,custom_objects=SeqSelfAttention.get_custom_objects())
    print("loading weights")
    model=keras.models.load_model("test_keras_bert4.h5",custom_objects=SeqWeightedAttention.get_custom_objects())
    result=model.predict(wordvec)
    print(result)
    del model