Esempio n. 1
0
def build_model(embeddings_path, output_dim, stored_model=None):
    model = None
    index_dict, vector_dict = read_embeddings(embeddings_path, EMBEDDING_DIM)
    if stored_model is None:
        model = Sequential()

        embedding_weights = np.zeros((len(index_dict), EMBEDDING_DIM))
        for word, index in index_dict.items():
            embedding_weights[index, :] = vector_dict[word]

        # define inputs here
        embedding = Embedding(
            output_dim=EMBEDDING_DIM, input_dim=len(index_dict),
            input_length=MAX_SEQUENCE_LENGTH, trainable=False
            )
        embedding.build((None,))
        embedding.set_weights([embedding_weights])

        # add layers
        model.add(embedding)
        model.add(Dropout(0.4))
        model.add(Conv1D(filters=10, kernel_size=5, padding="same"))
        model.add(keras.layers.PReLU())
        model.add(MaxPooling1D(pool_size=3))
        model.add(Bidirectional(LSTM(50, recurrent_dropout=0.4, return_sequences=True)))
        model.add(Bidirectional(LSTM(50, recurrent_dropout=0.4, return_sequences=False)))
        model.add(Dense(output_dim, activation="softmax"))

        model.compile(loss="binary_crossentropy", optimizer="nadam", metrics=[f1])

    else:
        # load model state
        model = keras.models.load_model(stored_model)

    return model, index_dict
def pretrained_embedding_layer(emb_matrix, input_length):
    """
    Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors.

    Arguments:
    word_to_vec_map -- dictionary mapping words to their Word2Vec vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary

    Returns:
    embedding_layer -- pretrained layer Keras instance
    """
    vocab_len = emb_matrix.shape[0]
    # vocab_len = vocab_len + 1  # adding 1 to fit Keras embedding (requirement)
    emb_dim = emb_matrix.shape[1]  # define dimensionality of your word vectors
    print("Found word2vec dimensions: ", emb_dim)

    # Use Embedding(...). Make sure to set trainable=False.
    embedding_layer = Embedding(vocab_len,
                                emb_dim,
                                input_length=input_length,
                                mask_zero=False,
                                trainable=False)

    # Build the embedding layer, it is required before setting the weights of the embedding layer.
    # Do not modify the "None".
    embedding_layer.build((None, ))

    # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained.
    embedding_layer.set_weights([emb_matrix])

    return embedding_layer
Esempio n. 3
0
def make_embedding_layer(train_descriptions, embedding_dim=50, glove=True):
    if glove == False:
        print('Just a zero matrix loaded')
        embedding_matrix = np.zeros((vocab_size, embedding_dim)) # just a zero matrix
    else:
        glove_dir = './glove.6B/'
        embeddings_index = {}
        f = open(os.path.join(glove_dir, 'glove.6B.'+str(embedding_dim)+'d.txt'), encoding="utf-8")
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
        f.close()
        # Get x-dim dense vector for each of the vocab_rocc

        vocab_size, ixtoword, wordtoix, vocab = get_vocab_size_and_indexing(train_descriptions)
        # max_length = max_length(desc, 90)

        embedding_matrix = np.zeros((vocab_size, embedding_dim)) # to import as weights for Keras Embedding layer
        for word, i in wordtoix.items():
            embedding_vector = embeddings_index.get(word)
            if embedding_vector is not None:
                # Words not found in the embedding index will be all zeros
                embedding_matrix[i] = embedding_vector
        print('GloVe loaded!')

    embedding_layer = Embedding(vocab_size, embedding_dim, mask_zero=True, trainable=False)
    embedding_layer.build((None,))
    embedding_layer.set_weights([embedding_matrix])

    return embedding_layer
Esempio n. 4
0
def pretrained_embedding_layer(word_to_vec_map, source_vocab_to_int):
    """
    构造Embedding层并加载预训练好的词向量(这里我使用的是100维)

    @param word_to_vec_map: 单词到向量的映射
    @param word_to_index: 单词到数字编码的映射
    """

    vocab_len = len(source_vocab_to_int) + 1  # Keras Embedding的API要求+1
    emb_dim = word_to_vec_map["the"].shape[0]

    # 初始化embedding矩阵
    emb_matrix = np.zeros((vocab_len, emb_dim))

    # 用词向量填充embedding矩阵
    for word, index in source_vocab_to_int.items():
        word_vector = word_to_vec_map.get(word, np.zeros(emb_dim))
        emb_matrix[index, :] = word_vector

    # 定义Embedding层,并指定不需要训练该层的权重
    embedding_layer = Embedding(vocab_len, emb_dim, trainable=False)

    # build
    embedding_layer.build((None,))

    # set weights
    embedding_layer.set_weights([emb_matrix])

    return embedding_layer
Esempio n. 5
0
def get_model(num_users,num_items,ratings,layers,l2_param):
	input_a = Input(shape=(1,),name='input-a',dtype='int32')
	input_b = Input(shape=(1,),name='input-b',dtype='int32')
	user_embedding_layer = Embedding(input_dim=num_users,output_dim=num_items,trainable=False,input_length=1,name='user-table')
	user_embedding_layer.build((None,))
	user_embedding_layer.set_weights([ratings])
	user_embedding = user_embedding_layer(input_a)
	user_embedding = Flatten()(user_embedding)
	item_embedding_layer = Embedding(input_dim=num_items,output_dim=num_users,trainable=False,input_length=1,name='item-table')
	item_embedding_layer.build((None,))
	item_embedding_layer.set_weights([ratings.T])
	item_embedding = item_embedding_layer(input_b)
	item_embedding = Flatten()(item_embedding)
	user_hidden = Dense(layers[0],activation='relu',kernel_regularizer=regularizers.l2(l2_param),name='user-encoding-layer-{}'.format(0))(user_embedding)
	item_hidden = Dense(layers[0],activation='relu',kernel_regularizer=regularizers.l2(l2_param),name='item-encoding-layer-{}'.format(0))(item_embedding)
	encoded_a = Dense(layers[1],activation='relu',kernel_regularizer=regularizers.l1(l2_param),name='user-encoding-layer-{}'.format(1))(user_hidden)
	encoded_b = Dense(layers[1],activation='relu',kernel_regularizer=regularizers.l1(l2_param),name='item-encoding-layer-{}'.format(1))(item_hidden)
	user_out_hidden = Dense(layers[0],activation='relu',kernel_regularizer=regularizers.l2(l2_param),name='user-decoding-layer-{}'.format(0))(encoded_a)
	item_out_hidden = Dense(layers[0],activation='relu',kernel_regularizer=regularizers.l2(l2_param),name='item-decoding-layer-{}'.format(0))(encoded_b)
	decoded_a = Dense(num_items,activation='sigmoid',kernel_regularizer=regularizers.l2(l2_param),name='user-decoding-layer-{}'.format(1))(user_out_hidden)
	decoded_b = Dense(num_users,activation='sigmoid',kernel_regularizer=regularizers.l2(l2_param),name='item-decoding-layer-{}'.format(1))(item_out_hidden)
	embedding_diff = Concatenate()([encoded_a,encoded_b])
	result_hidden = Dense(layers[1],activation='relu',kernel_regularizer=regularizers.l2(l2_param),name='predict-layer-{}'.format(0))(embedding_diff)
	result =  Dense(1,activation='sigmoid',kernel_regularizer=regularizers.l2(l2_param),name='predict-layer-{}'.format(1))(result_hidden)
	model = Model(inputs=[input_a,input_b],outputs=[decoded_a,decoded_b,result])
	# opt = Adam(lr=1e-4)
	# model.compile(optimizer=opt,loss=[recostruction_loss,recostruction_loss,edge_wise_loss],loss_weights=[1,1,alpha])
	predictor = Model(inputs=[input_a,input_b],outputs=[result])
	# predictor.compile(optimizer=opt,loss=[edge_wise_loss],metrics=[edge_wise_loss])
	# model.summary()
	encoder = Model(input_a,encoded_a)
	decoder = Model(input_a,decoded_a)
	# decoder.compile(optimizer='adadelta',loss=recostruction_loss)
	return model,encoder,decoder,predictor
Esempio n. 6
0
def pretrained_embedding_layer(word_to_vec_map, word_to_index, length,
                               embedding_dim):
    """
    Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors.

    Arguments:
    word_to_vec_map -- dictionary mapping words to their GloVe vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    embedding_layer -- pretrained layer Keras instance
    """

    vocab_len = len(
        word_to_index) + 1  # adding 1 to fit Keras embedding (requirement)

    emb_matrix = np.zeros((vocab_len, embedding_dim))

    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word]

    embedding_layer = Embedding(input_dim=vocab_len,
                                output_dim=embedding_dim,
                                trainable=False,
                                input_length=length)

    embedding_layer.build((None, ))

    embedding_layer.set_weights([emb_matrix])

    return embedding_layer
Esempio n. 7
0
def createEmbeddingLayer(word2index: dict, word2vec: dict) -> Embedding:
    """ Set up pretrained Embedding layer
        Credits to: https://keras.io/examples/pretrained_word_embeddings/

        Args:
            word2index:                         dictionnary mapping words to indices
            word2vec:                           dictionnary mapping words to embedding vectors

        Returns:
            embedding_layer                     layer with pretrained GloVe word embeddings
    """

    num_words = min(MAX_WORDS, len(word2index)) + 1
    emb_dim = word2vec["cucumber"].shape[0]
    emb_matrix = np.zeros((num_words, emb_dim))

    for word, index in word2index.items():
        if index > MAX_WORDS:
            break
        embedding_vector = word2vec.get(word)
        if embedding_vector is not None:
            emb_matrix[index, :] = embedding_vector

    embedding_layer = Embedding(
        num_words, emb_dim, input_length=MAX_SEQUENCE_LEN,
        trainable=False)  # Do not update word embeddings
    embedding_layer.build((None, ))
    embedding_layer.set_weights([emb_matrix])

    return embedding_layer
Esempio n. 8
0
def pretrained_embedding_layer(g_vectors, words_to_index):
    """
    Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors.
    
    Arguments:
    word_to_vec_map -- dictionary mapping words to their GloVe vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    embedding_layer -- pretrained layer Keras instance
    """
    
    vocab_len = len(words_to_index) + 1  # adding 1 to fit Keras embedding (requirement)
    emb_dim = g_vectors["hello"].shape[0]  # define dimensionality of your GloVe word vectors (= 50)
    
    # Initialize the embedding matrix as a numpy array of zeros of shape (vocab_len, dimensions of word vectors = emb_dim)
    emb_matrix = np.zeros((vocab_len, emb_dim))
    
    # Set each row "index" of the embedding matrix to be the word vector representation of the "index"th word of the vocabulary
    for word, index in words_to_index.items():
        emb_matrix[index, :] = g_vectors[word]

    # Define Keras embedding layer with the correct output/input sizes, make it trainable. Use Embedding(...). Make sure to set trainable=False. 
    embedding_layer = Embedding(vocab_len, emb_dim, trainable = False)

    # Build the embedding layer, it is required before setting the weights of the embedding layer. Do not modify the "None".
    embedding_layer.build((None,))
    
    # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained.
    embedding_layer.set_weights([emb_matrix])
    
    return embedding_layer    
def make_embedding_layer(vocab_size, wordtoix, embedding_dim=50, glove=True):
    '''
	create a vector matrix for the descriptions, make en embedding layer
	INPUT : vocabulary size (int), wordtoix (dict), embeddings dimension (int), glove (T/F)
	OUTPUT : embedding layer
	'''
    if glove == False:
        embedding_matrix = np.zeros((vocab_size, embedding_dim))
        print('Just a zero matrix loaded')
    else:
        embeddings_index = load_glove(embedding_dim)
        print('GloVe loaded!')

        # to import as weights for Keras Embedding layer
        embedding_matrix = np.zeros((vocab_size, embedding_dim))
        for word, i in wordtoix.items():
            # Get x-dim dense vector for each of the vocab_rocc
            embedding_vector = embeddings_index.get(word)
            if embedding_vector is not None:
                # Words not found in the embedding index will be all zeros
                embedding_matrix[i] = embedding_vector

    # create an embedding layer
    embedding_layer = Embedding(vocab_size,
                                embedding_dim,
                                mask_zero=True,
                                trainable=False)
    embedding_layer.build((None, ))
    embedding_layer.set_weights([embedding_matrix])

    return embedding_layer
Esempio n. 10
0
def create_nn_model(X, embeddings_dim, embeddings_matrix=None):
    num_asts = np.amax(X)
    max_timestep = np.shape(X)[1]

    model = Sequential()
    if embeddings_matrix is None:
        embedding_layer = Embedding(num_asts + 1, embeddings_dim, 
                                    input_length=max_timestep)
    else:
        embeddings_dim = np.shape(embeddings_matrix)[1]
        embedding_layer = Embedding(num_asts + 1, embeddings_dim, 
                                    input_length=max_timestep, trainable=False)
        embedding_layer.build((None,))
        embedding_layer.set_weights([embeddings_matrix])

    model.add(embedding_layer)
    
    hidden_dim = 32
    model.add(LSTM(hidden_dim, return_sequences=True, 
                   input_shape=(max_timestep, embeddings_dim)))
    model.add(TimeDistributed(Dense(1)))
    model.add(Activation("sigmoid"))
    model.add(Reshape((max_timestep,)))

    model.compile(loss="binary_crossentropy", optimizer="adam",
                  metrics=["accuracy", precision, recall, f1])
    model.summary()
    return model
Esempio n. 11
0
def get_model(max_len, embedding_size, vocab_size, embedding, hidden_size,
              distance_metric):
    seq_1 = Input(shape=(max_len, ), dtype='int32', name='sequence1')
    seq_2 = Input(shape=(max_len, ), dtype='int32', name='sequence2')

    embed_layer = Embedding(output_dim=embedding_size,
                            input_dim=vocab_size + 1,
                            input_length=max_len,
                            trainable=False)
    embed_layer.build((None, ))
    embed_layer.set_weights([embedding.embedding_matrix])

    input_1 = embed_layer(seq_1)
    input_2 = embed_layer(seq_2)

    l1 = LSTM(units=hidden_size)

    l1_out = l1(input_1)
    l2_out = l1(input_2)

    concats = concatenate([l1_out, l2_out], axis=-1)

    if distance_metric == 'cosine':
        main_output = Lambda(exponent_neg_cosine_distance,
                             output_shape=(1, ))(concats)
    else:
        main_output = Lambda(exponent_neg_manhattan_distance,
                             output_shape=(1, ))(concats)

    model = Model(inputs=[seq_1, seq_2], outputs=[main_output])

    return model
Esempio n. 12
0
def pretrained_embedding_layer(word_to_vec_map, source_vocab_to_int, emb_dim,trainable = False):
    """
    构造Embedding层并加载预训练好的词向量

    @param word_to_vec_map: 单词到向量的映射
    @param word_to_index: 单词到数字编码的映射
    @param emb_dim:embedding维度
    """

    vocab_len = len(source_vocab_to_int)

    # 初始化embedding矩阵
    emb_matrix = np.zeros((vocab_len, emb_dim))

    # 用词向量填充embedding矩阵
    for word, index in source_vocab_to_int.items():
        word_vector = word_to_vec_map.get(word, np.zeros(emb_dim))
        emb_matrix[index, :] = word_vector

    # 定义Embedding层,并指定不需要训练该层的权重
    embedding_layer = Embedding(vocab_len, emb_dim, trainable=trainable)

    # build
    embedding_layer.build((None,))

    # set weights
    embedding_layer.set_weights([emb_matrix])

    return embedding_layer
def pretrained_embedding_layer( language ):
    """Creates a Keras Embedding() layer and loads in pre-trained word2vec 300-dimensional vectors.
    Args:
        word_to_vec_map: dictionary mapping words to their word2vec vector representation.
        word_to_index: dictionary mapping from words to their indices in the vocabulary
    Returns:
        embedding_layer: pretrained layer Keras instance
    """
    emb_matrix = np.load( word2vec_path + language + "/" + language )
    vocab_len = emb_matrix.shape[0] + 1
    emb_dim   = emb_matrix.shape[1]
    emb_matrix = np.append( emb_matrix, np.zeros( ( 1, emb_dim ) ), axis = 0 )
    print( "embedding: ", language, emb_matrix.shape )

    # Define Keras embedding layer with the correct output/input sizes, make it trainable.
    # Use Embedding(...). Make sure to set trainable=False. 
    embedding_layer = Embedding( vocab_len, emb_dim, trainable = False, mask_zero = True )

    # Build the embedding layer, it is required before setting the weights of the embedding layer.
    # Do not modify the "None".
    embedding_layer.build( ( None, ) )
    
    # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained.
    embedding_layer.set_weights( [emb_matrix] )
    
    return embedding_layer
Esempio n. 14
0
def make_embedding_layer_without_glove(train_descriptions,
                                       embedding_dim=50,
                                       glove=True):

    embeddings_index = {}
    vocab_size, ixtoword, wordtoix, vocab = get_vocab_size_and_indexing(
        train_descriptions)
    embedding_matrix = np.zeros(
        (vocab_size,
         embedding_dim))  # to import as weights for Keras Embedding layer

    for word, i in wordtoix.items():
        embedding_vector = i
        if embedding_vector is not None:
            # Words not found in the embedding index will be all zeros
            embedding_matrix[i] = embedding_vector

    embedding_layer = Embedding(vocab_size,
                                embedding_dim,
                                mask_zero=True,
                                trainable=False)
    embedding_layer.build((None, ))
    embedding_layer.set_weights([embedding_matrix])

    return embedding_layer
Esempio n. 15
0
 def keras_embeddings_layer(self):
     vocab_len = len(self.model.wv.vocab)
     emb_dim = self.model.wv.vector_size
     emb_matrix = self.model.wv.syn0
     embedding_layer = Embedding(vocab_len, emb_dim, trainable=True)
     embedding_layer.build((None, ))
     embedding_layer.set_weights([emb_matrix])
     return embedding_layer
Esempio n. 16
0
def build_word_sequence_lstm_model(word_indices, word_vectors, output_dim):
    lstm_unit_size = args.lstm_unit_size
    learning_rate = args.learning_rate
    dropout = 0.6
    # from word_indices of all word vocabulary to word_embedded for one sentence of text
    word_symbols = len(word_indices) + 1
    word_embedding_weights = np.zeros((word_symbols, word_embeddings_size))
    for word, index in word_indices.items():
        try:
            word_embedding_weights[index, :] = word_vectors[word]
        except KeyError:
            word_embedding_weights[
                index, :] = np.ones(word_embeddings_size) * -1

    doc_word_input = Input(shape=(max_num_of_sentences,
                                  max_num_of_tokens_per_sentence),
                           dtype='int64',
                           name="doc_word_input")
    sent_word_input = Input(shape=(max_num_of_tokens_per_sentence, ),
                            dtype='int64',
                            name="sent_word_input")

    word_embedding_layer = Embedding(output_dim=word_embeddings_size,
                                     input_dim=word_symbols,
                                     mask_zero=True)
    word_embedding_layer.build(
        (None, ))  # if you don't do this, the next step won't work
    word_embedding_layer.set_weights([word_embedding_weights])
    word_embedded = word_embedding_layer(sent_word_input)

    bi_lstm_word_sent = Bidirectional(
        GRU(lstm_unit_size, return_sequences=rueT))(word_embedded)

    #attention_word = AttentionWithContext()(bi_lstm_word_sent)

    word_sent_encode = Dropout(dropout)(bi_lstm_word_sent)  #(attention_word)
    word_encoder = Model(inputs=sent_word_input, outputs=word_sent_encode)
    word_encoded = TimeDistributed(word_encoder)(doc_word_input)

    b_lstm_word_doc = Bidirectional(GRU(lstm_unit_size,
                                        return_sequences=False))(word_encoded)

    word_output = Dropout(dropout, name='final_layer_word')(
        b_lstm_word_doc)  #(attention_doc)
    word_output = Dense(output_dim,
                        activation='softmax')(word_output)  #(word_output)

    model = Model(inputs=doc_word_input, outputs=word_output)
    rmsprop = RMSprop(lr=learning_rate)
    model.compile(optimizer=rmsprop,
                  loss='categorical_crossentropy',
                  metrics=['categorical_accuracy'])

    final_layer = Model(inputs=model.input,
                        outputs=model.get_layer('final_layer_word').output)
    # final_layer = Model(inputs=model.input, outputs=model.get_layer('final_layer_word').output)

    return model, final_layer
Esempio n. 17
0
def pretrained_embedding_layer(word_to_vec_map, word_to_index):
    vocab_len = len(word_to_index) + 1            
    emb_dim = word_to_vec_map["cucumber"].shape[0]  
    emb_matrix = np.zeros((vocab_len, emb_dim))
    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word]
    embedding_layer = Embedding(vocab_len, emb_dim, trainable=False)
    embedding_layer.build((None,))
    embedding_layer.set_weights([emb_matrix])
    return embedding_layer
Esempio n. 18
0
def get_model(num_genes, num_diseases):
    # Input variables
    gene_input = Input(shape=(1, ), dtype='int32', name='gene_input')
    disease_input = Input(shape=(1, ), dtype='int32', name='disease_input')

    # using gene feature matrix for initialization
    gene_feas = h5py.File('gene_features.mat', 'r')
    humannet_features = gene_feas['features'][:, :].T
    gene_feature_size = humannet_features.shape
    assert gene_feature_size[0] + 1 == num_genes

    humannet_features = np.insert(humannet_features, 0,
                                  [0] * gene_feature_size[1], 0)
    humannet_embedding_layer = Embedding(input_dim=gene_feature_size[0] + 1,
                                         output_dim=gene_feature_size[1],
                                         trainable=True)
    humannet_embedding_layer.build((None, ))
    humannet_embedding_layer.set_weights([humannet_features])

    # using disease feature matrix for initializationmerge
    feas = h5py.File('disease_features.mat', 'r')
    omim_features = feas['col_features'][:, :].T
    disease_feature_size = omim_features.shape
    assert disease_feature_size[0] + 1 == num_diseases

    omim_features = np.insert(omim_features, 0, [0] * disease_feature_size[1],
                              0)
    omim_embedding_layer = Embedding(input_dim=disease_feature_size[0] + 1,
                                     output_dim=disease_feature_size[1],
                                     trainable=True)
    omim_embedding_layer.build((None, ))
    omim_embedding_layer.set_weights([omim_features])

    # get specific features
    gene_feature = Flatten(name='')(humannet_embedding_layer(gene_input))
    disease_feature = Flatten()(omim_embedding_layer(disease_input))
    # disease_feature = K.transpose(disease_feature)
    # projection matrix, using W * H' as initialization

    project_disease = Dense(gene_feature_size[1],
                            trainable=True,
                            name='disease_gene_projection',
                            activation='relu',
                            kernel_initializer=init_weights)(disease_feature)

    score = dot([gene_feature, project_disease], 1, name='inner_product')
    # calculate score
    # score = merge([gene_feature, project_disease], mode='mul', name='score')

    # prediction = Dense(1, activation='sigmoid', init='he_uniform', name='prediction')(score)

    model = Model(inputs=[gene_input, disease_input], outputs=score)

    return model
Esempio n. 19
0
def build_resnet_model(word_indices, word_vectors,
                       output_dim):  # Currently does not work
    learning_rate = args.learning_rate
    dropout = 0.5

    word_symbols = len(word_indices) + 1
    word_embedding_weights = np.zeros((word_symbols, word_embeddings_size))
    for word, index in word_indices.items():
        word_embedding_weights[index, :] = word_vectors[word]

    doc_word_input = Input(shape=(max_num_of_sentences,
                                  max_num_of_tokens_per_sentence),
                           dtype='int64',
                           name="doc_word_input")
    sent_word_input = Input(shape=(max_num_of_tokens_per_sentence, ),
                            dtype='int64',
                            name="sent_word_input")

    word_embedding_layer = Embedding(output_dim=word_embeddings_size,
                                     input_dim=word_symbols,
                                     mask_zero=True)
    word_embedding_layer.build(
        (None, ))  # if you don't do this, the next step won't work
    word_embedding_layer.set_weights([word_embedding_weights])
    word_embedded = word_embedding_layer(sent_word_input)

    blocks = [2, 2, 2, 2]
    block = keras_resnet.blocks.basic_1d

    resnet_word_sent = keras_resnet.models.ResNet50(word_embedded,
                                                    blocks,
                                                    block,
                                                    classes=output_dim)

    word_sent_encode = Dropout(dropout)(resnet_word_sent)
    word_encoder = Model(inputs=sent_word_input, outputs=word_sent_encode)
    word_encoded = TimeDistributed(word_encoder)(doc_word_input)

    resnet_word_doc = keras_resnet.models.ResNet(word_encoded,
                                                 blocks,
                                                 block,
                                                 classes=output_dim)

    word_output = Dropout(dropout)(resnet_word_doc)
    word_output = Dense(output_dim, activation='softmax')(word_output)

    model = Model(inputs=doc_word_input, outputs=word_output)
    rmsprop = RMSprop(lr=learning_rate)
    model.compile(optimizer=rmsprop,
                  loss='categorical_crossentropy',
                  metrics=['categorical_accuracy'])
    return model
Esempio n. 20
0
def build_char_sequence_lstm_model(char_indices, char_vectors, output_dim):
    lstm_unit_size = args.lstm_unit_size
    learning_rate = args.learning_rate
    lstm_dropout = 0.15
    dropout = 0.3

    doc_char_input = Input(shape=(max_num_of_sentences, max_num_of_chars), dtype='int64', name="doc_char_input")
    sent_char_input = Input(shape=(max_num_of_chars,), dtype='int64', name="sent_char_input")

    char_symbols = len(char_indices) + 1
    char_embedding_weights = np.zeros((char_symbols, char_embeddings_size))
    for char, index in char_indices.items():
        char_embedding_weights[index, :] = char_vectors[char]
    char_embedding_layer = Embedding(output_dim=char_embeddings_size, input_dim=char_symbols, mask_zero=True)
    char_embedding_layer.build((None,))  # if you don't do this, the next step won't work
    char_embedding_layer.set_weights([char_embedding_weights])
    char_embedded = char_embedding_layer(sent_char_input)

    # filter_length = [5, 3, 3]
    # nb_filter = [196, 196, 256]
    # pool_length = 2
    # char_embedded = Lambda(binarize_char, output_shape=binarize_char_outshape)(sent_char_input)
    # for i in range(len(nb_filter)):
    #     char_embedded = Conv1D(filters=nb_filter[i], kernel_size=filter_length[i], padding='valid', activation='relu',
    #                            kernel_initializer='glorot_normal', strides=1)(char_embedded)
    #
    #     char_embedded = Dropout(0.1)(char_embedded)
    #     char_embedded = MaxPooling1D(pool_size=pool_length)(char_embedded)

    bi_lstm_char_sent = Bidirectional(
        LSTM(lstm_unit_size, return_sequences=False, dropout=lstm_dropout, recurrent_dropout=lstm_dropout))(
        char_embedded)

    char_sent_encode = Dropout(dropout)(bi_lstm_char_sent)
    char_encoder = Model(inputs=sent_char_input, outputs=char_sent_encode)
    char_encoded = TimeDistributed(char_encoder)(doc_char_input)

    b_lstm_char_doc = Bidirectional(
        LSTM(lstm_unit_size, return_sequences=False, dropout=lstm_dropout, recurrent_dropout=lstm_dropout))(
        char_encoded)

    char_output = Dropout(dropout, name='final_layer_char')(b_lstm_char_doc)
    char_output = Dense(output_dim, activation='softmax')(char_output)

    model = Model(inputs=doc_char_input, outputs=char_output)
    rmsprop = RMSprop(lr=learning_rate)
    model.compile(optimizer=rmsprop, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

    final_layer = Model(inputs=model.input, outputs=model.get_layer('final_layer_char').output)

    return model, final_layer
class PositionEmbedding(Layer):

    def __init__(self, max_time=1000, n_waves=16, d_model=64, name='PositionEmbedding', **kwargs):
        """
        Position embedding via sin and cos functions
        For incoming ``position`` produces embedding of dimension ``n_waves * 2``
        ``embedding[2*i] = sin(positions / 10. ** (2. * i / d_model))``
        ``embedding[2*i+1] = cos(positions / 10. ** (2. * i / d_model))``
        :param max_time: maximum time dimension of input sequence
        """
        self.max_time = max_time
        self.n_waves = n_waves
        self.d_model = d_model
        emb_weights = pos_encoding(max_time, n_waves,d_model)
        self.embedding_layer = Embedding(max_time, n_waves * 2,
                                         weights=[emb_weights],
                                         trainable=False)

        super(PositionEmbedding, self).__init__(**kwargs)
        self.name = name

    def build(self, input_shapes):
        self.embedding_layer.build((None, None))
        self.built = True

    def call(self, x):
        samples = K.shape(x)[0]
        time = K.shape(x)[1]
        pos_enc = self.embedding_layer(
            K.reshape(K.arange(time, dtype='int32'), (1, -1)))
        return K.tile(pos_enc, (samples, 1, 1))

    def compute_output_shape(self, input_shape):
        """
            For Keras internal compatability checking
        """
        return (None, None, self.n_waves * 2)

    def get_config(self):
        """
            For rebuilding models on load time.
        """
        # TODO: teacher forcing
        config = {
            'n_waves': self.n_waves,
            'max_time': self.max_time,
            'd_model': self.d_model
        }
        base_config = super(PositionEmbedding, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
    def create_model(**kwargs):
        """ Function creating the model's graph in Keras.
        
        Argument:
        input_shape -- shape of the model's input data (using Keras conventions)
        embedding_matrix -- matrix to map word index to word embedding vector 
        vocab_len -- the size of vocaburary  
        n_d1 -- ouput dimension for 1st GRU layer
        n_d2 -- ouput dimension for 2nd GRU layer
        n_c -- ouput dimension for output layer

        Returns:
        model -- Keras model instance
        """

        embedding_matrix = kwargs.get('embedding_matrix')
        vocab_len = kwargs.get('vocab_len')
        n_d1 = kwargs.get('n_d1')
        n_d2 = kwargs.get('n_d2')
        n_c = kwargs.get('n_c')

        #define input
        X_input = Input(shape=kwargs.get('input_shape'))

        #define and create mbedding layer
        embedding_layer = Embedding(vocab_len + 1,
                                    embedding_matrix.shape[1],
                                    trainable=False)
        embedding_layer.build((None, ))
        embedding_layer.set_weights([embedding_matrix])

        #add embedding layer
        X = embedding_layer(X_input)

        #add bidirectional GRU layer
        X = Bidirectional(GRU(n_d1, return_sequences=True))(X)
        X = Dropout(0.5)(X)
        X = BatchNormalization()(X)

        #add another GRU layer (unidirectional)
        X = GRU(n_d2, return_sequences=True)(X)
        X = Dropout(0.5)(X)
        X = BatchNormalization()(X)

        #get output for each time slot
        outputs = TimeDistributed(Dense(n_c, activation=softmax2))(X)

        #create and return keras model instance
        return Model(inputs=[X_input], outputs=outputs)
Esempio n. 23
0
def create_embedding_layer(embeddings_index,
                           words_to_index,
                           vocab_length=100,
                           output_dim=300):
    emb_matrix = np.zeros((vocab_length + 1, output_dim))

    for word, index in words_to_index.items():
        if word in embeddings_index:
            emb_matrix[index, :] = embeddings_index[word]

    embedding_layer = Embedding(vocab_length + 1, output_dim, trainable=True)
    embedding_layer.build((None, ))
    embedding_layer.set_weights([emb_matrix])

    return embedding_layer
def pretrained_embedding_layer(word_vec, word_index):
    vocab_len = cf['MAX_WORDS'] + 1
    emb_dim = 300
    emb_matrix = np.zeros((vocab_len, emb_dim))

    for word, index in word_index.items():
        vec = word_vec.get(word, np.zeros(emb_dim))
        if (index > cf['MAX_WORDS']):
            break
        emb_matrix[index, :] = vec
        # 定义Embedding层,并指定不需要训练该层的权重
    embedding_layer = Embedding(vocab_len, emb_dim, trainable=False)
    embedding_layer.build((None, ))  # build
    embedding_layer.set_weights([emb_matrix])
    return embedding_layer
def _create_binary_embedding():
    """
    Creates an embedding matrix based on the binary representation
    Keras will transform each one byte to a one-hot vector of size 256
    This is required for RNN networks
    :return: An embedding layer
    """
    emb_matrix = np.zeros((256, 256))
    for i in range(0, 256):
        emb_matrix[i, i] = 1

    embedding_layer = Embedding(256, 256, trainable=False)
    embedding_layer.build((None, ))
    embedding_layer.set_weights([emb_matrix])
    return embedding_layer
Esempio n. 26
0
def word_sequence_GRU_model(word_indices, word_model, unique_labels_count):
    data_dim = word_embeddings_size
    timesteps = 50
    num_classes = unique_labels_count
    batch_size = 25

    #word_embedding_weights is the (index, vector) of words vocabulary
    word_symbols = len(word_indices) + 1
    word_embedding_weights = np.zeros((word_symbols, word_embeddings_size))

    for word, index in word_indices.items():
        try:
            word_embedding_weights[index, :] = word_model[word]
        except KeyError:
            word_embedding_weights[
                index, :] = np.ones(word_embeddings_size) * -1

    # word_model["some"] return the vector of data "some"
    # word_data[0] is the data for text0 with the index of 0.

    sent_word_input = Input(shape=(max_num_of_tokens_per_sentence, ),
                            dtype='int64',
                            name="sent_word_input")

    word_embedding_layer = Embedding(output_dim=word_embeddings_size,
                                     input_dim=word_symbols,
                                     mask_zero=True)
    word_embedding_layer.build(
        (None, ))  # if you don't do this, the next step won't work
    word_embedding_layer.set_weights([word_embedding_weights])
    word_embedded = word_embedding_layer(sent_word_input)

    model = Sequential()(word_embedded)
    model.add(
        LSTM(lstm_unit_size,
             return_sequences=True,
             input_shape=(timesteps, word_embeddings_size)))
    model.add(
        LSTM(lstm_unit_size,
             return_sequences=True,
             stateful=True,
             batch_input_shape=(batch_size, timesteps, lstm_unit_size)))
    model.add(Dense(11, activation='softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])
    return model
Esempio n. 27
0
def pre_trained_emb_model(word_to_vec_map, word_to_idx):
    vocab_len = len(word_to_index)
    emb_len = word_to_vec_map['the'].shape[0]

    embed_matrix = np.zeros((vocab_len, emb_len))

    for word, index in word_to_idx.items():
        embed_matrix[index:] = word_to_vec_map[word]

    embedding_layer = Embedding(input_shape=vocab_len, output_dim=emb_len, trainable=False)

    embedding_layer.build((None,))

    embedding_layer.set_weights([embed_matrix])

    return embedding_layer
Esempio n. 28
0
def get_chebi_common_channel(chebi_input, id_to_index):
    e_ancestors = Embedding(len(id_to_index),
                            chebi_embbed_size,
                            input_length=max_ancestors_length * 2,
                            trainable=True)
    e_ancestors.build((None, ))
    e_ancestors = e_ancestors(chebi_input)

    e_ancestors = Dropout(0.5)(e_ancestors)
    ancestors_lstm = LSTM(LSTM_units,
                          input_shape=(max_ancestors_length * 2,
                                       chebi_embbed_size),
                          return_sequences=True)(e_ancestors)
    #kernel_regularizer=regularizers.l2(sigmoid_l2_reg))(e_ancestors)

    ancestors_pool = GlobalMaxPooling1D()(ancestors_lstm)
    return ancestors_pool
Esempio n. 29
0
def get_chebi_concat_channel(chebi_input, id_to_index):
    e_ancestors_left = Embedding(len(id_to_index),
                                 chebi_embbed_size,
                                 input_length=max_ancestors_length,
                                 trainable=True)
    e_ancestors_left.build((None, ))
    e_ancestors_left = e_ancestors_left(chebi_input[0])

    e_ancestors_left = Dropout(dropout1)(e_ancestors_left)

    e_ancestors_right = Embedding(len(id_to_index),
                                  chebi_embbed_size,
                                  input_length=max_ancestors_length,
                                  trainable=True)
    e_ancestors_right.build((None, ))
    # e_right.set_weights([embedding_matrix])
    e_ancestors_right = e_ancestors_right(chebi_input[1])

    e_ancestors_right = Dropout(dropout1)(e_ancestors_right)

    #ancestors_lstm_left = LSTM(LSTM_units, input_shape=(max_ancestors_length, chebi_embbed_size), return_sequences=True,
    #                           kernel_regularizer=regularizers.l2(sigmoid_l2_reg))(e_ancestors_left)
    #ancestors_lstm_right = LSTM(LSTM_units, input_shape=(max_ancestors_length, chebi_embbed_size),
    #                            return_sequences=True,
    #                            kernel_regularizer=regularizers.l2(sigmoid_l2_reg))(e_ancestors_right)
    #ancestors_pool_left = GlobalMaxPooling1D()(ancestors_lstm_left)
    #ancestors_pool_right = GlobalMaxPooling1D()(ancestors_lstm_right)

    attention_rnn_left = LSTM(LSTM_units,
                              input_shape=(max_ancestors_length,
                                           chebi_embbed_size),
                              return_sequences=True)(e_ancestors_left)
    #kernel_regularizer=regularizers.l2(sigmoid_l2_reg))(e_ancestors_left)
    attention_rnn_right = LSTM(LSTM_units,
                               input_shape=(max_ancestors_length,
                                            chebi_embbed_size),
                               return_sequences=True)(e_ancestors_right)
    #  kernel_regularizer=regularizers.l2(sigmoid_l2_reg))(e_ancestors_right)
    ancestors_pool_left = GlobalMaxPooling1D()(attention_rnn_left)
    ancestors_pool_right = GlobalMaxPooling1D()(attention_rnn_right)

    #ancestors_dense_left = Dense(LSTM_units, input_shape=(max_ancestors_length, chebi_embbed_size))(e_ancestors_left)
    #ancestors_dense_right = Dense(LSTM_units, input_shape=(max_ancestors_length, chebi_embbed_size))(e_ancestors_right)
    #return ancestors_dense_left, ancestors_dense_right
    #return ancestors_dense_left, ancestors_dense_right
    return ancestors_pool_left, ancestors_pool_right
Esempio n. 30
0
def pretrained_embedding_layer(word_to_vec_map, source_vocab_to_int):
    vocab_len = len(source_vocab_to_int) + 1
    emb_dim = word_to_vec_map["you"].shape[0]

    emb_matrix = np.zeros((vocab_len, emb_dim))

    for word, index in source_vocab_to_int.items():
        word_vector = word_to_vec_map.get(word, np.zeros(emb_dim))
        emb_matrix[index, :] = word_vector

    embedding_layer = Embedding(vocab_len, emb_dim, trainable=False)

    embedding_layer.build((None, ))

    embedding_layer.set_weights([emb_matrix])

    return embedding_layer
Esempio n. 31
0
def create_model(G=None, max_len=0):
    ###### CREACION DEL MODELO ######
    # Embedding
    # create Keras embedding layer
    word_to_idx, idx_to_word, word_embeddings = G.read_embedding()
    # vocabulary_len = len(word_to_idx) + 1
    vocabulary_len = len(word_to_idx)
    emb_dimension = G.get_dimensions()
    # get the matrix for the sentences
    embedding_matrix = word_embeddings
    # embedding layer
    embedding_layer = Embedding(input_dim=vocabulary_len,
                                output_dim=emb_dimension,
                                input_length=max_len,
                                trainable=False,
                                name="EMBEDDING")
    embedding_layer.build((None, ))
    embedding_layer.set_weights([embedding_matrix])

    first_layer_units = 128
    first_layer_dropout = 0.5
    second_layer_units = 128
    second_layer_dropout = 0.5
    relu_dense_layer = 64
    dense_layer_units = 3

    #Modelo
    model = Sequential()
    model.add(embedding_layer)
    model.add(
        LSTM(first_layer_units,
             return_sequences=True,
             name='LSTM_1',
             recurrent_dropout=0.4))
    model.add(Dropout(first_layer_dropout, name="DROPOUT_1"))
    model.add(Dense(200, activation='relu'))
    model.add(LSTM(second_layer_units, return_sequences=False, name="LSTM_2"))
    model.add(Dropout(second_layer_dropout, name="DROPOUT_2"))
    model.add(Dense(relu_dense_layer, activation='relu'))
    model.add(Dense(dense_layer_units))
    model.add(Activation("softmax", name="softmax_final"))
    model.compile(optimizer=RMSprop(decay=0.001),
                  loss="categorical_crossentropy",
                  metrics=['accuracy'])
    return model