Beispiel #1
0
def build_model(input_classes, input_vectors_sizes, output_classes_1,
                output_classes_2):
    """ Build the model
    
    Build a keras model that with multiple classification heads. It also 
    takes input in the form of classes, one hot encoded. First layer
    is an embedding layer for those classes. This model can 
    
    Arguments:
        input_classes {[type]} -- Number of classes as input
        input_vectors_sizes {[type]} -- Size of class vectors (1 for one hot encoded)
        output_classes_1 {[number]} -- Classes in classifier 1
        output_classes_2 {[number]} -- Classes in classifier 2
    
    Returns:
        [keras.model] -- Multi classification model
    """
    dimensions = 20
    inputs = []
    embedded_outputs = []
    for i in input_classes:
        input_layer = Input((1, ))
        inputs.append(input_layer)
        embedder = Embedding(input_dim=i,
                             output_dim=dimensions,
                             input_length=1,
                             embeddings_constraint=UnitNorm(axis=0))
        embedded_layer = embedder(input_layer)
        embedded_outputs.append(embedded_layer)

    for i in input_vector_sizes:
        input_layer = Input((1, i))
        inputs.append(input_layer)
        embedded_outputs.append(input_layer)

    embedded_concats = Concatenate()(embedded_outputs)
    flatten_layer = Flatten()

    dense_output_1 = Dense(output_classes_1, activation='softmax')
    dense_output_2 = Dense(output_classes_2, activation='softmax')

    flattened_output = flatten_layer(embedded_concats)
    dense_output_1 = dense_output_1(flattened_output)
    dense_output_2 = dense_output_2(flattened_output)

    outputs = [dense_output_1, dense_output_2]
    # dense_output = dense_layer(embedded_concats)
    second_classifier_scale = 0.5
    model = Model(inputs, outputs)
    print(model.summary())
    loss = keras.losses.sparse_categorical_crossentropy
    model.compile(loss=[loss, second_classifier_scale * loss],
                  optimizer='adam')

    return model
Beispiel #2
0
def build_model(input_classes, output_classes):
    """Build model
    
    Builds a simple model with classes as input and output
    
    Arguments:
        input_classes {[list]} -- List of input classes as features
        output_classes {[int]} -- Number of output classes to classify 
    
    Returns:
        [keras.model] -- Compiled model
    """
    dimensions = 20
    inputs = []
    embedded_outputs = []
    for i in input_classes:
        input_layer = Input((1, ))
        inputs.append(input_layer)
        embedder = Embedding(input_dim=i,
                             output_dim=dimensions,
                             input_length=1,
                             embeddings_constraint=UnitNorm(axis=0))
        embedded_layer = embedder(input_layer)
        embedded_outputs.append(embedded_layer)

    embedded_concats = Concatenate()(embedded_outputs)
    flatten_layer = Flatten()

    dense_layer = Dense(output_classes)

    flattened_output = flatten_layer(embedded_concats)
    dense_output = dense_layer(flattened_output)

    # dense_output = dense_layer(embedded_concats)

    model = Model(inputs, dense_output)
    print(model.summary())
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

    return model
Beispiel #3
0
def get_s2v_module(encoder_type, word_embedding_matrix, n_hidden,
                   dropout_rate):
    input = Input(shape=(None, ), dtype='int32')

    embedding_layer = Embedding(
        word_embedding_matrix.shape[0],
        word_embedding_matrix.shape[1],
        embeddings_initializer=Constant(word_embedding_matrix),
        trainable=False,
        mask_zero=True)

    output = embedding_layer(input)

    if encoder_type == 'lstm':
        lstm_layer = LSTM(units=n_hidden,
                          activation='tanh',
                          return_sequences=False)
        output = lstm_layer(output)

    if encoder_type == 'bilstm':
        bilstm_layer = Bidirectional(
            LSTM(units=n_hidden, activation='tanh', return_sequences=False))
        output = bilstm_layer(output)

    if encoder_type == 'att-bilstm':
        bilstm_layer = Bidirectional(
            LSTM(units=n_hidden, activation='tanh', return_sequences=True))
        attention_layer = AttentionWithContext(u_constraint=UnitNorm())
        output = attention_layer(bilstm_layer(output))

    dropout_layer = Dropout(dropout_rate)

    output = dropout_layer(output)

    model = Model(input, output)
    model.summary()

    return model
Beispiel #4
0
    (np.ones(batch_size // 2), np.zeros(batch_size // 2)))[:, None]
val_pairs = len(x_val)
val_pair_labels = np.concatenate(
    (np.ones(val_pairs), np.zeros(val_pairs)))[:, None]

# Input tensors
input_img = Input(shape=(64, 64, 3))

# Dynamic architecture
# Load a VGG16
core_model = VGG16(input_shape=(64, 64, 3), include_top=False)
encoded = core_model(input_img)
# Feature layer
encoded = Flatten()(encoded)
encoded = Dense(latent_dim, activation='linear',
                kernel_constraint=UnitNorm())(encoded)

# Create shared model
shared_model = Model(input_img, encoded)

# Two input tensors
img_real = Input(shape=(64, 64, 3))
img_gen = Input(shape=(64, 64, 3))

# Get features
features_real = shared_model(img_real)
features_gen = shared_model(img_gen)
# Compute distance
sim_score = Lambda(euclidean_distance)([features_real, features_gen])

# Siamese model
hidden_enc = Dense(units=S * 2**(k),
                   activation=activation,
                   activity_regularizer=UncorrelatedFeaturesConstraint(
                       S * 2**(k), weightage=1.))(inputs_encoder)
batch_enc = BatchNormalization()(hidden_enc)
outputs_encoder = Dense(units=N, activation='sigmoid')(batch_enc)
### Model Build
model_enc = keras.Model(inputs=inputs_encoder,
                        outputs=outputs_encoder,
                        name='encoder_model')

### Decoder Layers definitions
inputs_decoder = keras.Input(shape=N)
hidden_dec = Dense(units=S * 2**(k),
                   activation=activation,
                   kernel_constraint=UnitNorm(axis=0))(inputs_decoder)
# hidden_dec = dense(inputs_decoder, transpose=False)
batch_dec = BatchNormalization()(hidden_dec)
outputs_decoder = Dense(units=2**k, activation='softmax')(batch_dec)
### Model Build
model_dec = keras.Model(inputs=inputs_decoder,
                        outputs=outputs_decoder,
                        name='decoder_model')

### Meta model Layers definitions
inputs_meta = keras.Input(shape=2**k)
encoded_bits = model_enc(inputs=inputs_meta)
x = Lambda(gradient_stopper, name='rounding_layer')(encoded_bits)
if channel == 'BSC':
    noisy_bits = Lambda(BSC_noise,
                        arguments={
Beispiel #6
0
# Load data
data      = np.load(data_loc)
x_d_test  = np.copy(data['imgs'] / 255.)
y_d_test  = np.copy(data['classes'])
# Rearrange y_test as ordinal classes (since absolute value of class doesn't matter)
_, y_d_test_ordinal = np.unique(y_d_test, return_inverse=True)

# Instantiate and load VGGFace with VGG16 core
latent_dim = 128
input_img  = Input(shape=(64, 64, 3))
core_model = VGG16(input_shape=(64, 64, 3), include_top=False)
encoded    = core_model(input_img)
# Feature layer
encoded = Flatten()(encoded)
encoded = Dense(latent_dim, activation='linear', kernel_constraint=UnitNorm())(encoded)
# Create shared model
model = Model(input_img, encoded)

# Load weights
core_folder    = 'trained_models/proposed'
core_weights   = 'steps16_lr10.0_last' 
target_weights = '%s/%s.h5' % (core_folder, core_weights)
model.load_weights(target_weights)

# Attack parameters
batch_size     = 8 # Number of restarts
num_thresholds = 1000 # For AUC
learning_rate  = 1e-2
num_iterations = 2000
mask_size      = 10
#====================================
# Add a TimeDistributed, DENSE layer.
#====================================
#first_dense_layer = Dense(1,
#                          kernel_constraint=UnitNorm(),
#                          # W_constraint=UnitNorm(), # Don't use: legacy
#                          # weights=[layer_loss_weights, np.zeros(1)],
#                          trainable=False)
#errors_by_time0 = TimeDistributed(first_dense_layer,
#                                  weights=[layer_loss_weights, np.zeros(1)],
#                                  trainable=True)(errors)
#==================================== The above is a failed experiment
#
errors_by_time = TimeDistributed(Dense(1,
                                       trainable=False,
                                       kernel_constraint=UnitNorm()),
                                 weights=[layer_loss_weights,
                                          np.zeros(1)],
                                 trainable=False)(errors)
# Flatten and then add another DENSE layer.
errors_by_time = Flatten()(errors_by_time)  # will be (batch_size, nt)

print("\nkitti_train_RBP.py errors_by_time: ", errors_by_time)

final_errors = Dense(1,
                     weights=[time_loss_weights,
                              np.zeros(1)],
                     trainable=False)(errors_by_time)
# Above: weight errors by time
print("\nkitti_train_RBP.py final_errors: ", final_errors)
def build_autoencoder():
    input_size = 61776
    hidden_size = 432
    hidden2_size = 48
    latent_size = 12

    # Build encoder

    input_pconn = Input(shape=(input_size, ))
    d1 = Dense(hidden_size,
               activation='relu',
               kernel_regularizer=WeightsOrthogonalityConstraint(hidden_size,
                                                                 weightage=1.,
                                                                 axis=0),
               activity_regularizer=UncorrelatedFeaturesConstraint(
                   hidden_size, weightage=1.),
               kernel_constraint=UnitNorm(axis=0))
    d2 = Dense(hidden2_size,
               activation='relu',
               kernel_regularizer=WeightsOrthogonalityConstraint(hidden2_size,
                                                                 weightage=1.,
                                                                 axis=0),
               activity_regularizer=UncorrelatedFeaturesConstraint(
                   hidden2_size, weightage=1.),
               kernel_constraint=UnitNorm(axis=0))
    d3 = Dense(latent_size,
               activation='relu',
               kernel_regularizer=WeightsOrthogonalityConstraint(latent_size,
                                                                 weightage=1.,
                                                                 axis=0),
               activity_regularizer=UncorrelatedFeaturesConstraint(
                   latent_size, weightage=1.),
               kernel_constraint=UnitNorm(axis=0))
    hidden_1 = d1(input_pconn)
    hidden2_1 = d2(hidden_1)
    latent = d3(hidden2_1)

    encoder = Model(input_pconn, latent, name='encoder')
    encoder.summary()

    # Build decoder

    latent_inputs = Input(shape=(latent_size, ), name='decoder_input')
    #hidden2_2 = Dense(hidden2_size, activation='relu')(latent_inputs)
    #hidden_2 = Dense(hidden_size, activation='relu')(hidden2_2)
    #output_pconn = Dense(input_size, activation='sigmoid')(hidden_2)
    td3 = DenseTied(hidden2_size,
                    activation='relu',
                    kernel_constraint=UnitNorm(axis=1),
                    tied_to=d3)
    td2 = DenseTied(hidden_size,
                    activation='relu',
                    kernel_constraint=UnitNorm(axis=1),
                    tied_to=d2)
    td1 = DenseTied(input_size,
                    activation='sigmoid',
                    kernel_constraint=UnitNorm(axis=1),
                    tied_to=d1)
    hidden2_2 = td3(latent_inputs)
    hidden_2 = td2(hidden2_2)
    output_pconn = td1(hidden_2)

    decoder = Model(latent_inputs, output_pconn, name="decoder")
    decoder.summary()

    # Build autoencoder = encoder + decoder
    #autoencoder = Model(input_pconn, output_pconn)
    autoencoder = Model(input_pconn,
                        decoder(encoder(input_pconn)),
                        name='autoencoder')
    autoencoder.summary()
    opt = Adam(lr=0.001)
    autoencoder.compile(optimizer=opt, loss='mean_squared_error')

    return (autoencoder, encoder, decoder)
def build_model(input_classes,input_vectors_sizes,output_classes):
    """Build model
    
    Create a model which takes a list of vectors along with a list
    of classes as input. There is a single classifier head.

    Arguments:
        input_classes {[type]} -- Number of one hot encoded inputs
        input_vectors_sizes {[type]} -- Number of vectorised inputs
        output_classes {[type]} -- Number of classes to classify 
    
    Returns:
        [keras.model] -- Compiled model
    """
    dimensions = 20
    inputs = []
    embedded_outputs = []
    for i in input_classes:
        input_layer = Input((1,))
        inputs.append(input_layer)
        embedder = Embedding(input_dim=i,output_dim=dimensions,input_length=1,embeddings_constraint=UnitNorm(axis=0))
        embedded_layer = embedder(input_layer)
        embedded_outputs.append(embedded_layer)
    
    for i in input_vector_sizes:
        input_layer = Input((1,i))
        inputs.append(input_layer)
        embedded_outputs.append(input_layer)


    embedded_concats = Concatenate()(embedded_outputs)
    flatten_layer = Flatten()

    dense_layer = Dense(output_classes,activation='softmax')

    flattened_output = flatten_layer(embedded_concats)
    dense_output = dense_layer(flattened_output)

    # dense_output = dense_layer(embedded_concats)

    model = Model(inputs,dense_output)
    print(model.summary())
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

    return model
    Arguments:
        input_classes {[type]} -- Number of one hot encoded inputs
        input_vectors_sizes {[type]} -- Number of vectorised inputs
        output_classes {[type]} -- Number of classes to classify 
    
    Returns:
        [keras.model] -- Compiled model
    """
    dimensions = 20
    inputs = []
    embedded_outputs = []
    for i in input_classes:
        input_layer = Input((1,))
        inputs.append(input_layer)
        embedder = Embedding(input_dim=i,output_dim=dimensions,input_length=1,embeddings_constraint=UnitNorm(axis=0))
        embedded_layer = embedder(input_layer)
        embedded_outputs.append(embedded_layer)
    
    for i in input_vector_sizes:
        input_layer = Input((1,i))
        inputs.append(input_layer)
        dense = Dense(dimensions,activation='linear')
        densed_layer = dense(input_layer)
        embedded_outputs.append(densed_layer)

    embedded_concats = Concatenate()(embedded_outputs)
    flatten_layer = Flatten()

    dense_layer = Dense(output_classes,activation='softmax')
Beispiel #11
0
def build_model(input_classes, input_vectors_sizes, output_classes_1,
                output_classes_2):
    """ Build the model
    
    Build a keras model that with multiple classification heads. It also 
    takes input in the form of classes, one hot encoded. First layer
    is an embedding layer for those classes. This model uses an adaptable
    loss to compile the model which masks specific target classes
    
    Arguments:
        input_classes {[type]} -- 
        input_vectors_sizes {[type]} -- [description]
        output_classes_1 {[number]} -- Classes in classifier 1
        output_classes_2 {[number]} -- Classes in classifier 2
    
    Returns:
        [keras.model] -- Multi classification model
    """
    dimensions = 20
    inputs = []
    embedded_outputs = []
    for i in input_classes:
        input_layer = Input((1, ))
        inputs.append(input_layer)
        embedder = Embedding(input_dim=i,
                             output_dim=dimensions,
                             input_length=1,
                             embeddings_constraint=UnitNorm(axis=0))
        embedded_layer = embedder(input_layer)
        embedded_outputs.append(embedded_layer)

    for i in input_vector_sizes:
        input_layer = Input((1, i))
        inputs.append(input_layer)
        embedded_outputs.append(input_layer)

    embedded_concats = Concatenate()(embedded_outputs)
    flatten_layer = Flatten()

    dense_output_1 = Dense(output_classes_1, activation='softmax')
    dense_output_2 = Dense(output_classes_2, activation='softmax')

    flattened_output = flatten_layer(embedded_concats)
    dense_output_1 = dense_output_1(flattened_output)
    dense_output_2 = dense_output_2(flattened_output)

    outputs = [dense_output_1, dense_output_2]
    # dense_output = dense_layer(embedded_concats)
    scale = 0.5

    def out_loss(y_true, y_pred):
        masks = y_true[:, 1]
        targets = y_true[:, 0]
        # mask = tf.math.equal(value_vec,targets)
        targets = tf.cast(tf.boolean_mask(targets, masks), dtype='int32')
        logits = tf.boolean_mask(y_pred, masks)
        # y_pred  = tf.boolean_mask(y_pred,mask)
        # tf.Print(targets,[targets],output_stream=sys.stdout)
        print(targets)
        # tf.Print(targets,[targets])
        res = scale * tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=targets, logits=logits)
        return res

    model = Model(inputs, outputs)
    print(model.summary())
    loss = keras.losses.sparse_categorical_crossentropy
    model.compile(loss=[loss, out_loss], optimizer='adam')

    return model
Beispiel #12
0
def main(model, params):
    datafolder = params['d']
    training_passes = params['t']
    eval_passes = params['e']
    predict_passes = params['p']
    batch_size = params['bs']
    drop = params['drop']
    dim = params['ed']

    constr_dict = {
        'maxnorm': MaxNorm(1, axis=1),
        'unitnorm': UnitNorm(axis=1),
        'nonneg': NonNeg()
    }
    reg_dict = {'l1': l1(0.01), 'l2': l2(0.01), 'l1_l2': l1_l2(0.01, 0.01)}

    train_file = datafolder + "train.txt"
    valid_file = datafolder + "valid.txt"
    test_file = datafolder + "test.txt"
    false_train_file = datafolder + "false_train.txt"

    E_mapping, R_mapping = mapping(
        [train_file, valid_file, test_file, false_train_file])

    VOC_SIZE = len(list(E_mapping.keys()))
    PRED_SIZE = len(list(R_mapping.keys()))

    true_train = np.squeeze(
        np.asarray(
            list(
                data_iterator(train_file,
                              E_mapping,
                              R_mapping,
                              batch_size=-1,
                              mode=params['training_mode']))))

    if params['reverse_labels']:  #TransE
        true_train_labels = np.zeros(len(true_train.T))
    else:
        true_train_labels = np.ones(len(true_train.T))

    if params['false_mode'] == 'fromfile':
        false_train = np.squeeze(
            np.asarray(
                list(
                    data_iterator(false_train_file,
                                  E_mapping,
                                  R_mapping,
                                  batch_size=-1,
                                  mode=params['training_mode']))))

    else:
        s, p, o = true_train
        false_train = np.asarray(
            corrupt_triples(s, p, o, params['check'], params['false_mode']))

    if params['reverse_labels']:
        false_train_labels = np.ones(len(false_train.T))
    else:
        false_train_labels = np.zeros(len(false_train.T))

    if params['constraint']:
        const = constr_dict[params['constraint']]
    else:
        const = None

    if params['regularizer']:
        reg = reg_dict[params['regularizer']]
    else:
        reg = None

    m = model(VOC_SIZE,
              PRED_SIZE,
              dim,
              embeddings_regularizer=const,
              embeddings_constraint=reg,
              dropout=params['drop'])

    m.compile(loss=params['loss'], optimizer='adagrad', metrics=['mae'])

    for i in range(training_passes):
        if params['false_mode'] != 'fromfile':
            s, p, o = true_train
            false_train = np.asarray(
                corrupt_triples(s, p, o, params['check'],
                                params['false_mode']))

        tmpX = np.concatenate([false_train.T, true_train.T], axis=0)
        tmpY = np.concatenate([false_train_labels.T, true_train_labels.T],
                              axis=0)
        tmpY = tmpY * (1 - params['ls']) + params['ls'] / 2

        m.fit(tmpX, tmpY, epochs=1, shuffle=True, batch_size=batch_size)

        try:
            if (i % eval_passes == 0 and i != 0) or (i == training_passes - 1
                                                     and eval_passes > 0):
                if params['filtered']:
                    tmp = true_train.T
                else:
                    tmp = []
                res = evaluate(m, valid_file, E_mapping, R_mapping,
                               params['reverse_labels'], tmp)
                print(res)

        except ZeroDivisionError:
            pass

        if params['store']:
            store_embedding(m, E_mapping, R_mapping, datafolder)

    if predict_passes > 0:
        print(predict_passes)
        test = np.squeeze(
            np.asarray(
                list(
                    data_iterator(test_file,
                                  E_mapping,
                                  R_mapping,
                                  batch_size=-1,
                                  mode=params['training_mode'])))).T

        pred = m.predict(test)
        pred = [p[0] for p in pred]

        mapping_e = reverse_dict(E_mapping)
        mapping_r = reverse_dict(R_mapping)

        with open(params['output_file'], 'w') as f:
            for t, p in zip(test, pred):
                s, r, o = t
                s, r, o = mapping_e[s], mapping_r[r], mapping_e[o]
                string = '\t'.join(map(str, [s, r, o, p])) + '\n'
                f.write(string)
Beispiel #13
0
encoder = Dense(encoding_dim, activation="relu", activity_regularizer=regularizers.l1(learning_rate))(input_layer)
encoder = Dense(hidden_dim, activation="relu")(encoder)
decoder = Dense(hidden_dim, activation="relu")(encoder)
decoder = Dense(encoding_dim, activation="relu")(decoder)
decoder = Dense(input_dim, activation="linear")(decoder)
autoencoder = Model(inputs=input_layer, outputs=decoder)
autoencoder.summary()
'''

encoder1 = Dense(encoding_dim,
                 activation="linear",
                 input_shape=(input_dim, ),
                 use_bias=True,
                 kernel_regularizer=WeightsOrthogonalityConstraint(
                     encoding_dim, weightage=1., axis=0),
                 kernel_constraint=UnitNorm(axis=0))
decoder1 = DenseTied(input_dim,
                     activation="linear",
                     tied_to=encoder1,
                     use_bias=False)

encoder2 = Dense(hidden_dim,
                 activation="relu",
                 input_shape=(encoding_dim, ),
                 use_bias=True,
                 kernel_regularizer=WeightsOrthogonalityConstraint(
                     encoding_dim, weightage=1., axis=0),
                 kernel_constraint=UnitNorm(axis=0))
decoder2 = DenseTied(encoding_dim,
                     activation="relu",
                     tied_to=encoder2,
Beispiel #14
0
def createNetworks(input_size, labels, n_layers, embedding_type,
                   embedding_similarity, transmode, same_weights,
                   graph_distance, scale_negative, activation_function):
    """ Creates neural network that learns node embeddings of given graph(s)
    
    Inputs:
        INPUT_SIZE List [n,m,k,l] where:
            N Number of samples, 
            M Number of original features (equal to n for one-hot coding)
            K Number of embedding features
            L Number of node labels
        EMBEDDING_TYPE Type of embedding approach, e.g. 'unified' (unified embedding for target and context nodes) or 'skipgram' (different embeddings for target and context nodes)
        EMBEDDING_SIMILARITY Measure of similarity between node embeddings within one graph
        TRANSMODE Flag to specify transfer learning mode
        GRAPH_DISTANCE Distance between node embeddings of different graphs
        
    Outputs: 
        Neural network for graph node embeddings
    """

    # what is the correct dictionary size?
    dict_size, feature_size, embedding_size, class_size, negative_size = input_size
    inputsEmbedding = []
    inputsEmbeddingA = []
    inputsEmbeddingB = []
    outputsEmbedding = []
    inputsPrediction = []
    outputsPrediction = []

    if embedding_similarity == 'l2':
        from keras.constraints import UnitNorm
        constraints = UnitNorm(axis=1)
    else:
        constraints = None

    # create embedding branch for graph A
    if feature_size == 1:
        input_shape = (1, )
        input_type = 'int32'
        Embedding_targetA = Embedding(dict_size,
                                      embedding_size,
                                      embeddings_constraint=constraints,
                                      name='Embedding_TargetA')
        Embedding_contextA = Embedding(dict_size,
                                       embedding_size,
                                       embeddings_constraint=constraints,
                                       name='Embedding_ContextA')
    else:
        input_shape = (
            1,
            feature_size,
        )
        input_type = 'float'
        Embedding_targetA = Dense(embedding_size,
                                  activation='tanh',
                                  kernel_constraint=constraints,
                                  name='Embedding_TargetA')
        Embedding_contextA = Dense(embedding_size,
                                   activation='tanh',
                                   kernel_constraint=constraints,
                                   name='Embedding_ContextA')

    input_targetA = Input(shape=input_shape,
                          dtype=input_type,
                          name='Input_TargetA')
    input_contextA = Input(shape=input_shape,
                           dtype=input_type,
                           name='Input_ContextA')
    inputsEmbeddingA.extend([input_targetA, input_contextA])

    # use different or the same encodings for target and context
    embedding_targetA = Embedding_targetA(input_targetA)
    target_weights = np.random.multivariate_normal(
        np.zeros(embedding_size), 0.1 * np.identity(embedding_size), dict_size)
    Embedding_targetA.set_weights([target_weights])
    if embedding_type == 'skipgram':  # separate embeddings for target and context nodes
        embedding_contextA = Embedding_contextA(input_contextA)
        context_weights = np.random.multivariate_normal(
            np.zeros(embedding_size), 0.1 * np.identity(embedding_size),
            dict_size)
        Embedding_contextA.set_weights([context_weights])
    elif embedding_type == 'unified':  # unified embedding
        embedding_contextA = Embedding_targetA(input_contextA)

    # add more dense layers to embedding branch if predicting pagerank
    if labels == 'pagerank':
        embedding_targetA = Dense(embedding_size,
                                  activation='tanh')(embedding_targetA)
        embedding_contextA = Dense(embedding_size,
                                   activation='tanh')(embedding_contextA)

    # create similarity branch for graph A
    inputsSimilarityA = [embedding_targetA, embedding_contextA]
    if embedding_similarity == 'softmax':
        # add negative samples
        input_negativeA = Input(shape=(negative_size, ) + input_shape[1:],
                                dtype=input_type,
                                name='Input_NegativeA')
        inputsEmbeddingA.extend([input_negativeA])
        embedding_negativeA = Embedding_targetA(input_negativeA)
        # add more dense layers to embedding branch if predicting pagerank
        if labels == 'pagerank':
            embedding_negativeA = Dense(embedding_size,
                                        activation='tanh')(embedding_negativeA)
        inputsSimilarityA.extend([embedding_negativeA])
    similarityA = createSimilarityBranch(
        embedding_size,
        mode=embedding_similarity,
        negative_size=negative_size,
        graph='A',
        scale_negative=scale_negative)(inputsSimilarityA)
    outputsEmbedding.extend([similarityA])
    inputsEmbedding.extend(inputsEmbeddingA)

    # create prediction branch
    inputsPrediction.extend([input_targetA])
    predictionBranch = createPredictionBranch(
        embedding_size, n_layers, class_size,
        activation_function)(embedding_targetA)
    predictionOutput = Reshape((class_size, ),
                               name='PredictionOutput')(predictionBranch)
    outputsPrediction.extend([predictionOutput])

    if transmode != '1graph':
        input_targetB = Input(shape=input_shape,
                              dtype=input_type,
                              name='Input_TargetB')
        input_contextB = Input(shape=input_shape,
                               dtype=input_type,
                               name='Input_ContextB')
        inputsEmbeddingB.extend([input_targetB, input_contextB])

        # create embedding branch for graph B
        if feature_size == 1:
            Embedding_targetB = Embedding(dict_size,
                                          embedding_size,
                                          embeddings_constraint=constraints,
                                          name='Embedding_TargetB')
            Embedding_contextB = Embedding(dict_size,
                                           embedding_size,
                                           embeddings_constraint=constraints,
                                           name='Embedding_ContextB')
        else:
            Embedding_targetB = Dense(embedding_size,
                                      activation='tanh',
                                      kernel_constraint=constraints,
                                      name='Embedding_TargetB')
            Embedding_contextB = Dense(embedding_size,
                                       activation='tanh',
                                       kernel_constraint=constraints,
                                       name='Embedding_ContextB')

        # use different or the same encodings for target and context
        embedding_targetB = Embedding_targetB(input_targetB)
        Embedding_targetB.set_weights(
            [
                target_weights if same_weights else np.zeros(
                    (dict_size, embedding_size))
            ]
        )  # np.random.multivariate_normal(np.zeros(embedding_size), 0.1*np.identity(embedding_size), dict_size)])
        if embedding_type == 'skipgram':  # separate embeddings for target and context nodes
            embedding_contextB = Embedding_contextB(input_contextB)
            Embedding_contextB.set_weights(
                [
                    context_weights if same_weights else np.zeros(
                        (dict_size, embedding_size))
                ]
            )  # np.random.multivariate_normal(np.zeros(embedding_size), 0.1*np.identity(embedding_size), dict_size)])
        elif embedding_type == 'unified':  # unified embedding
            embedding_contextB = Embedding_targetB(input_contextB)

        # add more dense layers to embedding branch if predicting pagerank
        if labels == 'pagerank':
            embedding_targetB = Dense(embedding_size,
                                      activation='tanh')(embedding_targetB)
            embedding_contextB = Dense(embedding_size,
                                       activation='tanh')(embedding_contextB)

        # create similarity branch for graph B
        inputsSimilarityB = [embedding_targetB, embedding_contextB]
        if embedding_similarity == 'softmax':
            # add negative samples
            input_negativeB = Input(shape=(negative_size, ) + input_shape[1:],
                                    dtype=input_type,
                                    name='Input_NegativeB')
            inputsEmbeddingB.extend([input_negativeB])
            embedding_negativeB = Embedding_targetB(input_negativeB)
            # add more dense layers to embedding branch if predicting pagerank
            if labels == 'pagerank':
                embedding_negativeB = Dense(
                    embedding_size, activation='tanh')(embedding_negativeB)
            inputsSimilarityB.extend([embedding_negativeB])
        similarityB = createSimilarityBranch(
            embedding_size,
            mode=embedding_similarity,
            negative_size=negative_size,
            graph='B',
            scale_negative=scale_negative)(inputsSimilarityB)
        outputsEmbedding.extend([similarityB])
        inputsEmbedding.extend(inputsEmbeddingB)

        # create graph distance branch
        if transmode != 'noP':
            distanceAB = createDistanceBranch(
                embedding_size,
                mode=graph_distance)([embedding_targetA, embedding_targetB])
            outputsEmbedding.extend([distanceAB])

    modelEmbedding = Model(inputs=inputsEmbedding, outputs=outputsEmbedding)
    branchEmbeddingA = Model(inputs=inputsEmbeddingA,
                             outputs=outputsEmbedding[0])
    branchEmbeddingB = Model(
        inputs=inputsEmbeddingB,
        outputs=outputsEmbedding[1]) if transmode != '1graph' else None
    modelPrediction = Model(inputs=inputsPrediction, outputs=outputsPrediction)

    return modelEmbedding, branchEmbeddingA, branchEmbeddingB, modelPrediction