Example #1
0
def pretrained_word_emb(vocab, emb_dim):
    word2emb = vocab['word'].load_word2emb()
    word_emb = Embedding(len(vocab['word']), emb_dim)
    W = word_emb.get_weights()[0]
    for i, word in enumerate(word2emb.keys()):
        W[i] = word2emb[word]
    word_emb.set_weights([W])
    return word_emb
def pretrained_embedding_layer(word_to_vec_map, word_to_index):
    vocab_len = len(
        word_to_index) + 1  # adding 1 to fit Keras embedding (requirement)
    emb_dim = word_to_vec_map["THIS"].shape[
        0]  # define dimensionality of your GloVe word vectors (= 50)
    emb_matrix = np.zeros((vocab_len, emb_dim))
    for word, index in word_to_index.items():
        try:
            emb_matrix[index, :] = word_to_vec_map[word]
        except:
            print("failed here")
            print("word = " + str(word))
            print("index = " + str(index))
            print("word to vec:")
            print(word_to_vec_map[word])
            print("length of w3v:")
            print(str(len(word_to_vec_map[word])))
            emb_matrix[index, :] = word_to_vec_map[word][:50]
    embedding_layer = Embedding(vocab_len, emb_dim)
    embedding_layer.build((None, ))
    embedding_layer.set_weights([emb_matrix])
    return embedding_layer
Example #3
0
def pretrained_embedding_layer(word_to_vec_map, word_to_index):
    """
    Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors.
    
    Arguments:
    word_to_vec_map -- dictionary mapping words to their GloVe vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    embedding_layer -- pretrained layer Keras instance
    """

    vocab_len = len(
        word_to_index) + 1  # adding 1 to fit Keras embedding (requirement)
    emb_dim = word_to_vec_map["cucumber"].shape[
        0]  # define dimensionality of your GloVe word vectors (= 50)

    ### START CODE HERE ###
    # Initialize the embedding matrix as a numpy array of zeros of shape (vocab_len, dimensions of word vectors = emb_dim)
    emb_matrix = np.zeros((vocab_len, emb_dim))

    # Set each row "index" of the embedding matrix to be the word vector representation of the "index"th word of the vocabulary
    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word]

    # Define Keras embedding layer with the correct output/input sizes, make it trainable.
    # Use Embedding(...). Make sure to set trainable=False.
    embedding_layer = Embedding(input_dim=vocab_len,
                                output_dim=emb_dim,
                                trainable=False)
    ### END CODE HERE ###

    # Build the embedding layer, it is required before setting the weights of the embedding layer. Do not modify the "None".
    embedding_layer.build((None, ))

    # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained.
    embedding_layer.set_weights([emb_matrix])

    return embedding_layer
Example #4
0
def pretrained_embedding_layer(word_to_vec_map, word_to_index):

    vocab_len = len(word_to_index) + 1                  # adding 1 to fit Keras embedding (requirement)
    emb_dim = word_to_vec_map["cucumber"].shape[0]      # define dimensionality of your GloVe word vectors (= 50)
    
    # Initialize the embedding matrix as a numpy array of zeros of shape (vocab_len, dimensions of word vectors = emb_dim)
    emb_matrix = np.zeros((vocab_len, emb_dim))
    
    # Set each row "index" of the embedding matrix to be the word vector representation of the "index"th word of the vocabulary
    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word]

    # Define Keras embedding layer with the correct output/input sizes, make it trainable. Use Embedding(...). Make sure to set trainable=False. 
    embedding_layer = Embedding(vocab_len , emb_dim, trainable=False)

    # Build the embedding layer, it is required before setting the weights of the embedding layer. Do not modify the "None".
    embedding_layer.build((None,))
    
    # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained.
    embedding_layer.set_weights([emb_matrix])
    
    return embedding_layer
def pretrained_embedding_layer(word_to_vec, word_to_index):
    
    """
    Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors.
    Arguments:
    word_to_vec -- dictionary mapping words to their GloVe vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)
    
    """
    vocal_len=len(word_to_index)+1
    vec_len=50  
    
    emb_matrix=np.zeros((vocal_len,vec_len))
    
    for word,index in word_to_index.items():
        emb_matrix[index,:]=word_to_vec[word]
    
    embedding_layer = Embedding(vocal_len,50,trainable=False)
    embedding_layer.build((None,))
    embedding_layer.set_weights([emb_matrix])
    
    return embedding_layer
Example #6
0
def pretrained_embedding_layer(word_to_vec_map, word_to_index):

    vocab_len = len(word_to_index) + 1
    emb_dim = word_to_vec_map["cucumber"].shape[0]

    # 初始化嵌入矩阵
    emb_matrix = np.zeros((vocab_len, emb_dim))

    # 将嵌入矩阵的每行的“index”设置为词汇“index”的词向量表示
    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word]

    # 定义Keras的embbeding层
    embedding_layer = Embedding(vocab_len, emb_dim, trainable=False)

    # 构建embedding层。
    embedding_layer.build((None, ))

    # 将嵌入层的权重设置为嵌入矩阵。
    embedding_layer.set_weights([emb_matrix])

    return embedding_layer
def pretrained_embedding_layer(word_to_vec_map, word_to_index):
    """
    创建keras的embedding层并加载训练好的权值
    :param word_to_vec_map:
    :param word_to_index:
    :return: embedding_layer: 一个训练好的keras层
    """
    vocab_len = len(word_to_index) + 1
    emb_dim = word_to_vec_map["cucumber"].shape[0]

    # 初始化嵌入矩阵
    emb_matrix = np.zeros((vocab_len, emb_dim))  # 行存词,列存向量

    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word]

    # 定义keras的embedding层并设置为不可训练(因为我们的数据集太小没有训练的必要)
    embedding_layer = Embedding(vocab_len, emb_dim, trainable=False)  # 输入一个一维的输出一个二维的
    embedding_layer.build((None,))
    embedding_layer.set_weights([emb_matrix])

    return embedding_layer
Example #8
0
def pretrained_embedding_layer(word_to_vec_map, word_to_index):
    """
    Creates a Keras Embedding() layer and loads in pre-trained GloVe 300-dimensional vectors.
    
    Arguments:
    word_to_vec_map -- dictionary mapping words to their GloVe vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    embedding_layer -- pretrained layer Keras instance
    """
    
    vocab_len = len(word_to_index) + 1                  # adding 1 to fit Keras embedding (requirement)
    emb_dim = word_to_vec_map["cucumber"].shape[0]      # define dimensionality of your GloVe word vectors (= 50)
    
    # Step 1
    # Initialize the embedding matrix as a numpy array of zeros.
    emb_matrix = np.zeros([vocab_len, emb_dim])
    
    # Step 2
    # Set each row "idx" of the embedding matrix to be 
    # the word vector representation of the idx'th word of the vocabulary
    for word, idx in word_to_index.items():
        emb_matrix[idx, :] = get_word_value(word_to_vec_map, word)

    # Step 3
    # Define Keras embedding layer with the correct input and output sizes
    # Make it non-trainable.
    embedding_layer = Embedding(vocab_len, emb_dim, trainable=False)

    # Step 4
    # Build the embedding layer, it is required before setting the weights of the embedding layer. 
    embedding_layer.build((None,))
    
    # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained.
    embedding_layer.set_weights([emb_matrix])
    
    return embedding_layer
Example #9
0
def pretrained_embedding_layer(word_to_vec_map, word_to_index):
    """
    Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors.

    Arguments:
    word_to_vec_map -- dictionary mapping words to their GloVe vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    embedding_layer -- pretrained layer Keras instance
    """
    vocab_len = len(word_to_index) + 1              # adding 1 to fit Keras embedding (requirement)
    emb_dim = word_to_vec_map["cucumber"].shape[0]  # define dimensionality of your GloVe word vectors (= 50)

    emb_matrix = np.zeros((vocab_len, emb_dim))
    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word]

    embedding_layer = Embedding(vocab_len, emb_dim, trainable=False)  # 可在需要时,进行fine tune
    embedding_layer.build((None,))  #  it is required before setting the weights of the embedding layer
    embedding_layer.set_weights([emb_matrix])

    return embedding_layer
Example #10
0
def pretrained_embedding_layer(word_to_vec_map, word_to_index, maxLen):
    vocab_len = len(
        word_to_index) + 1  # adding 1 to fit Keras embedding (requirement)
    emb_dim = word_to_vec_map["cucumber"].shape[
        0]  # define dimensionality of your GloVe word vectors (= 50)

    emb_matrix = np.zeros((vocab_len, emb_dim))

    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word]

    embedding_layer = Embedding(vocab_len,
                                emb_dim,
                                trainable=False,
                                input_shape=(maxLen, ))

    # Build the embedding layer, it is required before setting the weights of the embedding layer. Do not modify the "None".
    embedding_layer.build((None, ))

    # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained.
    embedding_layer.set_weights([emb_matrix])

    return embedding_layer
Example #11
0
def pretrained_embedding_layer(word_to_vec_map, word_to_index):
    """
	Adapted from Deep Learning Specialization by deeplearning.ai: https://www.coursera.org/specializations/deep-learning?

    Creates a Keras Embedding() layer and loads in pre-trained GloVe 100-dimensional vectors.
    
    Arguments:
    word_to_vec_map -- dictionary mapping words to their GloVe vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary

    Returns:
    embedding_layer -- pretrained layer Keras instance
    """

    # adding 1 to fit Keras embedding (requirement)
    vocab_len = len(word_to_index) + 1

    # define dimensionality of your GloVe word vectors (in our case 100)
    emb_dim = word_to_vec_map["cucumber"].shape[0]

    # Initialize the embedding matrix as a numpy array of zeros of shape (vocab_len, dimensions of word vectors = emb_dim)
    emb_matrix = np.zeros((vocab_len, emb_dim))

    # Set each row "index" of the embedding matrix to be the word vector representation of the "index"th word of the vocabulary
    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word]

    # Define Keras embedding layer with the correct output/input sizes
    embedding_layer = Embedding(vocab_len, emb_dim, trainable=False)

    # Build the embedding layer, required before setting the weights of the embedding layer
    embedding_layer.build((None, ))

    # Set the weights of the embedding layer to the embedding matrix
    embedding_layer.set_weights([emb_matrix])

    return embedding_layer
Example #12
0
def pretrained_embedding_layer(word_to_vec_map, word_to_index):
    """
    Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors.

    Arguments:
    word_to_vec_map -- dictionary mapping words to their GloVe vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    embedding_layer -- pretrained layer Keras instance
    """

    vocab_len = len(word_to_index) + 1
    emb_dim = word_to_vec_map["cucumber"].shape[0]

    emb_matrix = np.zeros((vocab_len, emb_dim))
    for word, idx in word_to_index.items():
        emb_matrix[idx, :] = word_to_vec_map[word]

    embedding_layer = Embedding(input_dim=vocab_len, output_dim=emb_dim, trainable=False) # don't modify the embeddings
    embedding_layer.build((None,))
    embedding_layer.set_weights([emb_matrix])

    return embedding_layer
    try:
        embedding_matrix[index,:] = word_to_vec_map[word]
    # if word is not present in GloVe vectors, that index position is already filled with zeros, as we had initialized
    # all rows to zero in the first place
    except:
        continue 
                 
# make a Keras embedding layer of shape (vocab_size, emb_dim) and set 'trainable' argument to 'True' if you want to train your
# own word embeddings on top of pre trained GloVe vectors(can improve performance, as we'll get embeddings more suited to current text corpus)
embed_layer = Embedding(input_dim = vocab_len, output_dim = emb_dim, trainable = False)

# build the embedding layer
embed_layer.build((None,))

# Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained.
embed_layer.set_weights([embedding_matrix])

# Start defining the Keras model
# Define sentence_indices as the input to the model, of shape (maxlen,) and dtype 'int32'
sentence_indices = Input(shape = (maxlen,) dtype = 'int32')

# Propagate sentence_indices through the embedding layer, to get the embeddings
embeddings = embed_layer(sentence_indices)  

# Propagate the embeddings through an LSTM layer with 300-dimensional hidden state
X = LSTM(300, return_sequences = True)(embeddings)

# Add dropout with a probability of 0.5
X = Dropout(0.5)(X)

# Propagate X trough another LSTM layer with 300-dimensional hidden state
Example #14
0
from keras.models import Model
from keras.preprocessing.text import Tokenizer
from keras.layers.embeddings import Embedding



# START BUILDING THE KERAS MODEL FOR TRAINING
input_target = input((1,))
input_context = input((1,))

# make a Keras embedding layer of shape (vocab_size, vector_dim) and set 'trainable' argument to 'True'
embedding = Embedding(input_dim = vocab_size, output_dim = vector_dim, input_length = 1, name='embedding', trainable = True)

# load pre-trained weights(embeddings) from 'embedding_matrix' into the Keras embedding layer
embedding.build((None,))
embedding.set_weights([embedding_matrix])

# run the context and target words through the embedding layer
context = embedding(input_context)
context = Reshape((vector_dim, 1))(context)

target = embedding(input_target)
target = Reshape((vector_dim, 1))(target)

# compute the dot product of the context and target words, to find the similarity (dot product is usually a measure of similarity)
dot = Dot(axes = 1)([context, target])
dot = Reshape((1,))(dot)
# pass it through a 'sigmoid' activation neuron; this is then comapared with the value in 'label' generated from the skipgram
out = Dense(1, activation = 'sigmoid')(dot)

# create model instance
Example #15
0
    def build_model(self):
        # first concept side
        n2v_input_1 = Input(shape=(1, ))
        n2v_emb_layer_1 = Embedding(len(self.concept2id) + 1,
                                    128,
                                    input_length=1,
                                    name="n2vembedding_1",
                                    trainable=False)
        n2v_emb_1 = n2v_emb_layer_1(n2v_input_1)
        n2v_emb_1 = Flatten()(n2v_emb_1)

        glove_input_1 = Input(shape=(1, ))
        glove_emb_layer_1 = Embedding(len(self.concept2id) + 1,
                                      128,
                                      input_length=1,
                                      name="gloveembedding_1",
                                      trainable=False)
        glove_emb_1 = glove_emb_layer_1(glove_input_1)
        glove_emb_1 = Flatten()(glove_emb_1)

        concat_emb_1 = concatenate([n2v_emb_1, glove_emb_1])
        MLP_layer_1_1 = Dense(196, use_bias=False, name="mlp11")(concat_emb_1)
        MLP_layer_1_1 = BatchNormalization(name="batchnorm11")(MLP_layer_1_1)
        MLP_layer_1_1 = PReLU(name="prelu11")(MLP_layer_1_1)
        MLP_layer_1_2 = Dense(128, use_bias=False, name="mlp12")(MLP_layer_1_1)
        MLP_layer_1_2 = BatchNormalization(name="batchnorm12")(MLP_layer_1_2)
        MLP_layer_1_2 = PReLU(name="mlp12")(MLP_layer_1_2)

        # second_concept_side
        n2v_input_2 = Input(shape=(1, ))
        n2v_emb_layer_2 = Embedding(len(self.concept2id) + 1,
                                    128,
                                    input_length=1,
                                    name="n2vembedding_2",
                                    trainable=False)
        n2v_emb_2 = n2v_emb_layer_2(n2v_input_2)
        n2v_emb_2 = Flatten()(n2v_emb_2)

        glove_input_2 = Input(shape=(1, ))
        glove_emb_layer_2 = Embedding(len(self.concept2id) + 1,
                                      128,
                                      input_length=1,
                                      name="gloveembedding_2",
                                      trainable=False)
        glove_emb_2 = glove_emb_layer_2(glove_input_2)
        glove_emb_2 = Flatten()(glove_emb_2)

        concat_emb_2 = concatenate([n2v_emb_2, glove_emb_2])
        MLP_layer_2_1 = Dense(196, use_bias=False, name="mlp21")(concat_emb_2)
        MLP_layer_2_1 = BatchNormalization(name="batchnorm21")(MLP_layer_2_1)
        MLP_layer_2_1 = PReLU(name="prelu21")(MLP_layer_2_1)
        MLP_layer_2_2 = Dense(128, name="mlp22")(MLP_layer_2_1)
        MLP_layer_2_2 = BatchNormalization(name="batchnorm22")(MLP_layer_2_2)
        MLP_layer_2_2 = PReLU(name="prelu22")(MLP_layer_2_2)

        # loss function to train
        dot_layer = dot([MLP_layer_1_2, MLP_layer_2_2], axes=1, normalize=True)
        output = Dense(1,
                       kernel_initializer="random_uniform",
                       activation="sigmoid")(dot_layer)

        self.model = Model(
            inputs=[n2v_input_1, glove_input_1, n2v_input_2, glove_input_2],
            outputs=output)
        n2v_emb_layer_1.set_weights(self.weights_n2v)
        n2v_emb_layer_2.set_weights(self.weights_n2v)
        glove_emb_layer_1.set_weights(self.weights_glove)
        glove_emb_layer_2.set_weights(self.weights_glove)

        # make it a multi-gpu model.
        self.model = multi_gpu_model(self.model, gpus=self.num_gpus)
        adam_my = optimizers.Adam(lr=0.01,
                                  beta_1=0.9,
                                  beta_2=0.999,
                                  amsgrad=False)
        self.model.compile(loss="binary_crossentropy", optimizer=adam_my)
Example #16
0
def createNetworks(input_size, labels, n_layers, embedding_type,
                   embedding_similarity, transmode, same_weights,
                   graph_distance, scale_negative, activation_function):
    """ Creates neural network that learns node embeddings of given graph(s)
    
    Inputs:
        INPUT_SIZE List [n,m,k,l] where:
            N Number of samples, 
            M Number of original features (equal to n for one-hot coding)
            K Number of embedding features
            L Number of node labels
        EMBEDDING_TYPE Type of embedding approach, e.g. 'unified' (unified embedding for target and context nodes) or 'skipgram' (different embeddings for target and context nodes)
        EMBEDDING_SIMILARITY Measure of similarity between node embeddings within one graph
        TRANSMODE Flag to specify transfer learning mode
        GRAPH_DISTANCE Distance between node embeddings of different graphs
        
    Outputs: 
        Neural network for graph node embeddings
    """

    # what is the correct dictionary size?
    dict_size, feature_size, embedding_size, class_size, negative_size = input_size
    inputsEmbedding = []
    inputsEmbeddingA = []
    inputsEmbeddingB = []
    outputsEmbedding = []
    inputsPrediction = []
    outputsPrediction = []

    if embedding_similarity == 'l2':
        from keras.constraints import UnitNorm
        constraints = UnitNorm(axis=1)
    else:
        constraints = None

    # create embedding branch for graph A
    if feature_size == 1:
        input_shape = (1, )
        input_type = 'int32'
        Embedding_targetA = Embedding(dict_size,
                                      embedding_size,
                                      embeddings_constraint=constraints,
                                      name='Embedding_TargetA')
        Embedding_contextA = Embedding(dict_size,
                                       embedding_size,
                                       embeddings_constraint=constraints,
                                       name='Embedding_ContextA')
    else:
        input_shape = (
            1,
            feature_size,
        )
        input_type = 'float'
        Embedding_targetA = Dense(embedding_size,
                                  activation='tanh',
                                  kernel_constraint=constraints,
                                  name='Embedding_TargetA')
        Embedding_contextA = Dense(embedding_size,
                                   activation='tanh',
                                   kernel_constraint=constraints,
                                   name='Embedding_ContextA')

    input_targetA = Input(shape=input_shape,
                          dtype=input_type,
                          name='Input_TargetA')
    input_contextA = Input(shape=input_shape,
                           dtype=input_type,
                           name='Input_ContextA')
    inputsEmbeddingA.extend([input_targetA, input_contextA])

    # use different or the same encodings for target and context
    embedding_targetA = Embedding_targetA(input_targetA)
    target_weights = np.random.multivariate_normal(
        np.zeros(embedding_size), 0.1 * np.identity(embedding_size), dict_size)
    Embedding_targetA.set_weights([target_weights])
    if embedding_type == 'skipgram':  # separate embeddings for target and context nodes
        embedding_contextA = Embedding_contextA(input_contextA)
        context_weights = np.random.multivariate_normal(
            np.zeros(embedding_size), 0.1 * np.identity(embedding_size),
            dict_size)
        Embedding_contextA.set_weights([context_weights])
    elif embedding_type == 'unified':  # unified embedding
        embedding_contextA = Embedding_targetA(input_contextA)

    # add more dense layers to embedding branch if predicting pagerank
    if labels == 'pagerank':
        embedding_targetA = Dense(embedding_size,
                                  activation='tanh')(embedding_targetA)
        embedding_contextA = Dense(embedding_size,
                                   activation='tanh')(embedding_contextA)

    # create similarity branch for graph A
    inputsSimilarityA = [embedding_targetA, embedding_contextA]
    if embedding_similarity == 'softmax':
        # add negative samples
        input_negativeA = Input(shape=(negative_size, ) + input_shape[1:],
                                dtype=input_type,
                                name='Input_NegativeA')
        inputsEmbeddingA.extend([input_negativeA])
        embedding_negativeA = Embedding_targetA(input_negativeA)
        # add more dense layers to embedding branch if predicting pagerank
        if labels == 'pagerank':
            embedding_negativeA = Dense(embedding_size,
                                        activation='tanh')(embedding_negativeA)
        inputsSimilarityA.extend([embedding_negativeA])
    similarityA = createSimilarityBranch(
        embedding_size,
        mode=embedding_similarity,
        negative_size=negative_size,
        graph='A',
        scale_negative=scale_negative)(inputsSimilarityA)
    outputsEmbedding.extend([similarityA])
    inputsEmbedding.extend(inputsEmbeddingA)

    # create prediction branch
    inputsPrediction.extend([input_targetA])
    predictionBranch = createPredictionBranch(
        embedding_size, n_layers, class_size,
        activation_function)(embedding_targetA)
    predictionOutput = Reshape((class_size, ),
                               name='PredictionOutput')(predictionBranch)
    outputsPrediction.extend([predictionOutput])

    if transmode != '1graph':
        input_targetB = Input(shape=input_shape,
                              dtype=input_type,
                              name='Input_TargetB')
        input_contextB = Input(shape=input_shape,
                               dtype=input_type,
                               name='Input_ContextB')
        inputsEmbeddingB.extend([input_targetB, input_contextB])

        # create embedding branch for graph B
        if feature_size == 1:
            Embedding_targetB = Embedding(dict_size,
                                          embedding_size,
                                          embeddings_constraint=constraints,
                                          name='Embedding_TargetB')
            Embedding_contextB = Embedding(dict_size,
                                           embedding_size,
                                           embeddings_constraint=constraints,
                                           name='Embedding_ContextB')
        else:
            Embedding_targetB = Dense(embedding_size,
                                      activation='tanh',
                                      kernel_constraint=constraints,
                                      name='Embedding_TargetB')
            Embedding_contextB = Dense(embedding_size,
                                       activation='tanh',
                                       kernel_constraint=constraints,
                                       name='Embedding_ContextB')

        # use different or the same encodings for target and context
        embedding_targetB = Embedding_targetB(input_targetB)
        Embedding_targetB.set_weights(
            [
                target_weights if same_weights else np.zeros(
                    (dict_size, embedding_size))
            ]
        )  # np.random.multivariate_normal(np.zeros(embedding_size), 0.1*np.identity(embedding_size), dict_size)])
        if embedding_type == 'skipgram':  # separate embeddings for target and context nodes
            embedding_contextB = Embedding_contextB(input_contextB)
            Embedding_contextB.set_weights(
                [
                    context_weights if same_weights else np.zeros(
                        (dict_size, embedding_size))
                ]
            )  # np.random.multivariate_normal(np.zeros(embedding_size), 0.1*np.identity(embedding_size), dict_size)])
        elif embedding_type == 'unified':  # unified embedding
            embedding_contextB = Embedding_targetB(input_contextB)

        # add more dense layers to embedding branch if predicting pagerank
        if labels == 'pagerank':
            embedding_targetB = Dense(embedding_size,
                                      activation='tanh')(embedding_targetB)
            embedding_contextB = Dense(embedding_size,
                                       activation='tanh')(embedding_contextB)

        # create similarity branch for graph B
        inputsSimilarityB = [embedding_targetB, embedding_contextB]
        if embedding_similarity == 'softmax':
            # add negative samples
            input_negativeB = Input(shape=(negative_size, ) + input_shape[1:],
                                    dtype=input_type,
                                    name='Input_NegativeB')
            inputsEmbeddingB.extend([input_negativeB])
            embedding_negativeB = Embedding_targetB(input_negativeB)
            # add more dense layers to embedding branch if predicting pagerank
            if labels == 'pagerank':
                embedding_negativeB = Dense(
                    embedding_size, activation='tanh')(embedding_negativeB)
            inputsSimilarityB.extend([embedding_negativeB])
        similarityB = createSimilarityBranch(
            embedding_size,
            mode=embedding_similarity,
            negative_size=negative_size,
            graph='B',
            scale_negative=scale_negative)(inputsSimilarityB)
        outputsEmbedding.extend([similarityB])
        inputsEmbedding.extend(inputsEmbeddingB)

        # create graph distance branch
        if transmode != 'noP':
            distanceAB = createDistanceBranch(
                embedding_size,
                mode=graph_distance)([embedding_targetA, embedding_targetB])
            outputsEmbedding.extend([distanceAB])

    modelEmbedding = Model(inputs=inputsEmbedding, outputs=outputsEmbedding)
    branchEmbeddingA = Model(inputs=inputsEmbeddingA,
                             outputs=outputsEmbedding[0])
    branchEmbeddingB = Model(
        inputs=inputsEmbeddingB,
        outputs=outputsEmbedding[1]) if transmode != '1graph' else None
    modelPrediction = Model(inputs=inputsPrediction, outputs=outputsPrediction)

    return modelEmbedding, branchEmbeddingA, branchEmbeddingB, modelPrediction
Example #17
0
File: ncf.py Project: rn5l/rsc18
 def init_model(self, train, std=0.01):
     
     #current_item = kl.Input( ( 1, ), name="current_item" )
     
     item = kl.Input( (1,), dtype=self.intX )#, batch_shape=(self.,self.steps) )
     user = kl.Input( (1,), dtype=self.intX )#, batch_shape=(self.batch,1) )
     
     if self.include_artist:
         artist = kl.Input( (1,), dtype=self.intX )#, batch_shape=(self.batch,1) )
     
     trainable = True
     if self.embeddings == 'fixed':
         trainable = False
     
     emb_user_mf = Embedding( embeddings_initializer='random_normal', output_dim=self.factors, input_dim=self.num_users, embeddings_regularizer=l2(self.emb_reg), trainable=trainable )
     emb_user = Embedding( embeddings_initializer='random_normal', output_dim=self.factors, input_dim=self.num_users, embeddings_regularizer=l2(self.emb_reg), trainable=trainable )
     emb_item_mf = Embedding( embeddings_initializer='random_normal', output_dim=self.factors, input_dim=self.num_items, embeddings_regularizer=l2(self.emb_reg), trainable=trainable )
     emb_item = Embedding( embeddings_initializer='random_normal', output_dim=self.factors, input_dim=self.num_items, embeddings_regularizer=l2(self.emb_reg), trainable=trainable )
     
     if self.embeddings != None:
         userw = self.get_latent( self.usermap.index, size=self.factors, col='playlist_id' )
         itemw = self.get_latent( self.itemmap.index, size=self.factors )
         
         emb_user_mf.build((None,))
         emb_item_mf.build((None,))
         emb_user_mf.set_weights([userw])
         emb_item_mf.set_weights([itemw])
         
         emb_user.build((None,))
         emb_item.build((None,))
         emb_user.set_weights([userw])
         emb_item.set_weights([itemw])
     
     if self.include_artist:
         
         emb_user_artist_mf = Embedding( embeddings_initializer='random_normal', output_dim=self.factors, input_dim=self.num_users, embeddings_regularizer=l2(self.emb_reg), trainable=trainable )
         emb_artist_mf = Embedding( embeddings_initializer='random_normal', output_dim=self.factors, input_dim=self.num_artists, embeddings_regularizer=l2(self.emb_reg), trainable=trainable )
         emb_artist = Embedding( embeddings_initializer='random_normal', output_dim=self.factors, input_dim=self.num_artists, embeddings_regularizer=l2(self.emb_reg), trainable=trainable )
         
         if self.embeddings != None:
             userw = self.get_latent( self.usermap.index, size=self.factors, col='playlist_id', count=True )
             artistw = self.get_latent( self.itemmap.index, size=self.factors, col='artist_id', count=True )
         
             emb_user_artist_mf.build((None,))
             emb_artist_mf.build((None,))
             emb_user_artist_mf.set_weights([userw])
             emb_artist_mf.set_weights([artistw])
         
             emb_artist.build((None,))
             emb_artist.set_weights([artistw])
             
     #MF PART
             
     uemb = kl.Flatten()( emb_user_mf( user ) )
     iemb = kl.Flatten()( emb_item_mf( item ) )
     
     #mf_dot = merge_dot( [uemb, iemb] )
     mf_vector = kl.Multiply()( [uemb, iemb] )
     
     #mf_vector = cat_mf( [mf_mul, mf_dot] )
             
     if self.include_artist:
         uemb = kl.Flatten()( emb_user_artist_mf( user ) )
         aemb = kl.Flatten()( emb_artist_mf( artist ) )
         #mf_dot = merge_dot( [uemb, aemb] )
         mf_mul = kl.Multiply()( [uemb, aemb] )
         
         mf_vector = kl.Concatenate()( [mf_vector, mf_mul] ) #, mf_dot] )
         
     
     #MLP PART
     
     uemb = kl.Flatten()( emb_user( user ) )
     iemb = kl.Flatten()( emb_item( item ) )
     
     mlp_vector = kl.Concatenate()( [uemb, iemb] )
     if self.include_artist:
         emba = kl.Flatten()( emb_artist( artist ) )
         mlp_vector = kl.Concatenate()( [mlp_vector, emba] )
     
     for i in range( len(self.layers) ):
         layer = kl.Dense( self.layers[i], activation=self.hidden_act, name="layer%d" %i, kernel_regularizer=l2(self.layer_reg) )
         mlp_vector = layer(mlp_vector)
     
     #PRED PART
     
     comb = kl.Concatenate()( [ mf_vector , mlp_vector ] ) #, uemb ] )
     
     fff = kl.Dense( 1, activation=self.final_act, kernel_initializer='lecun_uniform', bias_regularizer=l2(self.final_reg) )
     res = fff(comb)
     
     inputs = [ user, item ] #+ [artist]
     if self.include_artist:
         inputs += [artist]
     outputs = [ res ]
     
     model = km.Model( inputs, outputs )
     
     if self.optimizer == 'adam': 
         opt = keras.optimizers.Adam(lr=self.learning_rate)
     elif self.optimizer == 'nadam':
         opt = keras.optimizers.Nadam(lr=self.learning_rate)
     elif self.optimizer == 'adamax':
         opt = keras.optimizers.Adamax(lr=self.learning_rate)
     elif self.optimizer == 'adagrad':
         opt = keras.optimizers.Adagrad(lr=self.learning_rate)
     elif self.optimizer == 'adadelta':
         opt = keras.optimizers.Adadelta(lr=self.learning_rate)
     
     model.compile( optimizer=opt, loss='binary_crossentropy' )
     plot_model( model, to_file='ncf.png' )
     
     return model
Example #18
0
    def get_pmodel(self):

        item = kl.Input((1, ),
                        dtype=self.intX)  #, batch_shape=(self.,self.steps) )
        user = kl.Input((1, ),
                        dtype=self.intX)  #, batch_shape=(self.batch,1) )

        if self.include_artist:
            artist = kl.Input((1, ),
                              dtype=self.intX)  #, batch_shape=(self.batch,1) )

        trainable = True
        if self.embeddings == 'fixed':
            trainable = False

        emb_user = Embedding(output_dim=self.factors,
                             input_dim=self.num_users,
                             embeddings_regularizer=l2(self.emb_reg),
                             trainable=trainable)
        emb_item = Embedding(output_dim=self.factors,
                             input_dim=self.num_items,
                             embeddings_regularizer=l2(self.emb_reg),
                             trainable=trainable)

        if self.embeddings != None:
            userw = self.get_latent(self.usermap.index,
                                    size=self.factors,
                                    col='playlist_id')
            itemw = self.get_latent(self.itemmap.index, size=self.factors)
            emb_user.build((None, ))
            emb_item.build((None, ))
            emb_user.set_weights([userw])
            emb_item.set_weights([itemw])

        if self.include_artist:
            emb_artist = Embedding(output_dim=self.factors,
                                   input_dim=self.num_artists,
                                   embeddings_regularizer=l2(self.emb_reg))

        #MLP PART

        uemb = kl.Flatten()(emb_user(user))
        iemb = kl.Flatten()(emb_item(item))

        mlp_vector = kl.Concatenate()([uemb, iemb])
        if self.include_artist:
            emba = kl.Flatten()(emb_artist(artist))
            mlp_vector = kl.Concatenate()([mlp_vector, emba])

        for i in range(len(self.layers)):
            layer = kl.Dense(self.layers[i],
                             activation=self.hidden_act,
                             name="layer%d" % i,
                             kernel_regularizer=l2(self.layer_reg))
            #bn = kl.BatchNormalization()
            #act = kl.Activation('relu')
            #mlp_vector = act( bn( layer(mlp_vector) ) )
            mlp_vector = layer(mlp_vector)

        #PRED PART

        fff = kl.Dense(1,
                       activation=self.final_act,
                       kernel_initializer='lecun_uniform',
                       kernel_regularizer=l2(self.final_reg))
        res = fff(mlp_vector)

        inputs = [user, item]  #+ [artist]
        if self.include_artist:
            inputs += [artist]
        outputs = [res]

        model = km.Model(inputs, outputs)

        return model