Esempio n. 1
0
    def __init__(self, args):
        self.input_shape = 28
        self.num_classes = 2
        self.latent_dim = 100

        optimizer = Adam(0.0002, 0.5)

        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(
            loss=['binary_crossentropy', 'categorical_crossentropy'],
            loss_weights=[0.5, 0.5],
            optimizer=optimizer,
            metrics=['accuracy'])

        # Build the generator
        self.generator = self.build_generator()

        # The generator takes noise as input and generates imgs
        noise = Input(shape=(64, ))
        img = self.generator(noise)

        # For the combined model we will only train the generator
        self.discriminator.trainable = False

        # The valid takes generated images as input and determines validity
        valid, _ = self.discriminator(img)

        # The combined model  (stacked generator and discriminator)
        # Trains generator to fool discriminator
        self.combined = Model(noise, valid)
        self.combined.compile(loss=['binary_crossentropy'],
                              optimizer=optimizer)
Esempio n. 2
0
 def __init__(self,Name,LearningRate = 0.0001,img_shape,latent_dim):
     self.Name = Name
     self.LearningRate = LearningRate
     self.img_shape = img_shape
     self.latent_dim = latent_dims
     self.trained = 0
     #self.source_img = tf.Variable()
     #self.target_img = tf.Variable()
     self.g_optimizer = Adam(learning_rate=self.LearningRate)
     self.d_optimizer = Adam(learning_rate=self.LearningRate)
     
     self.optimizer = Adam(learning_rate = self.LearningRate)
     
     # compile the IR Disciminator model - Target
     self.IR_discriminator = self.discriminator_network()
     self.IR_discriminator.compile(optimizer=self.optimizer,loss='binary_crossentropy',metric=['accuracy'])
     
     # compile the IR Generator model - Target
     self.IR_f_net = self.feature_extractor_network()
     self.IR_g_net = self.generator_network()(self.IR_f_net)
     self.IR_generator = Model(inputs = in_image_IR, outputs = self.IR_g_net)
     self.IR_generator.compile(optimizer=self.optimizer,loss='binary_crossentropy',metric=['accuracy'])
     
     # compile the Visual Disciminator model - Source
     self.Visual_discriminator = self.discriminator_network()
     self.Visual_discriminator.compile(optimizer=self.optimizer,loss='binary_crossentropy',metric=['accuracy'])
     
     # compile the Visual Generator model - Visual
     self.Visual_f_net = self.feature_extractor_network()
     self.Visual_g_net = self.generator_network()(self.Visual_f_net)
     self.Visual_generator = Model(inputs = in_image_Vis, outputs = self.Visual_g_net)
     self.Visual_generator.compile(optimizer=self.optimizer,loss='binary_crossentropy',metric=['accuracy'])                
Esempio n. 3
0
 def feature_extractor_network(self):
     # input
     in_image = Input(shape = in_shape)
     # C1 Layer
     nett = Conv2D(32,(5,5))(in_image)		
     nett = BatchNormalization()(nett)
     nett = LeakyReLU(alpha = 0.2)(nett)
     # M2 Layer
     nett = MaxPooling2D(pool_size = (3,3))(nett)
     # C3 Layer
     nett = Conv2D(64,(3,3))		
     nett = BatchNormalization(pool_size = (3,3))(nett)
     nett = LeakyReLU(alpha = 0.2)(nett)
     # L4 Layer
     nett = LocallyConnected2D(128,(3,3))(nett)
     # L5 Layer
     nett = LocallyConnected2D(256,(3,3))(nett)
     # F6 Layer
     nett = Dense(512,activation='relu')(nett)
     nett = Dropout(0.2)(nett)
     # F7 Layer 
     out_features = Dense(activation='tanh')(nett)
     # output
     model = Model(inputs = in_image, outputs = out_features)
     return model
Esempio n. 4
0
    def build_discriminator(self):

        model = Sequential()

        model.add(Dense(78, activation="relu", input_dim=self.input_shape))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(56, activation="relu"))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(32, activation="relu"))
        model.add(Dropout(rate=0.3))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(28, activation="relu"))
        model.add(Dropuout(rate=0.3))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(10, activation="relu"))

        model.summary()

        img = Input(shape=self.img_shape)

        features = model(img)
        valid = Dense(1, activation="sigmoid")(features)
        label = Dense(self.num_classes + 1, activation="softmax")(features)

        return Model(img, [valid, label])
Esempio n. 5
0
 def build_model(self):
     gyr_x, gyr_y, gyr_z, lacc_x, lacc_y, lacc_z, mag_x, mag_y, mag_z = self.input_layer()
     gyr_x_cnn, gyr_y_cnn, gyr_z_cnn, lacc_x_cnn, lacc_y_cnn, lacc_z_cnn, mag_x_cnn, mag_y_cnn, mag_z_cnn = self.residual_layer(
         gyr_x, gyr_y, gyr_z, lacc_x, lacc_y, lacc_z, mag_x, mag_y, mag_z)
     all_resnet = self.cnn_layer(gyr_x_cnn, gyr_y_cnn, gyr_z_cnn, lacc_x_cnn, lacc_y_cnn, lacc_z_cnn, mag_x_cnn,
                                 mag_y_cnn, mag_z_cnn)
     lstm = self.lstm_layer(all_resnet)
     lstm = self.attention_layer(lstm)
     output = self.mlp_layer(lstm)
     model = Model(inputs=[
         gyr_x, gyr_y, gyr_z,
         lacc_x, lacc_y, lacc_z,
         mag_x, mag_y, mag_z
     ],
         outputs=[output])
     model.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])
     return model
Esempio n. 6
0
def main1():
    # Load the data
    train_data, train_label, validation_data, validation_label, test_data, test_label = data_preparation_moe(
    )
    num_features = train_data.shape[1]

    print('Training data shape = {}'.format(train_data.shape))
    print('Validation data shape = {}'.format(validation_data.shape))
    print('Test data shape = {}'.format(test_data.shape))

    #print('Training laebl shape = {}'.format(len(train_label)))

    # Set up the input layer
    input_layer = Input(shape=(num_features, ))

    # Set up MMoE layer
    mmoe_layers = MMoE(units=16, num_experts=8, num_tasks=2)(input_layer)

    output_layers = []

    output_info = ['y0', 'y1']

    # Build tower layer from MMoE layer
    for index, task_layer in enumerate(mmoe_layers):
        tower_layer = Dense(units=8,
                            activation='relu',
                            kernel_initializer=VarianceScaling())(task_layer)
        output_layer = Dense(units=1,
                             name=output_info[index],
                             activation='linear',
                             kernel_initializer=VarianceScaling())(tower_layer)
        output_layers.append(output_layer)

    # Compile model
    model = Model(inputs=[input_layer], outputs=output_layers)
    learning_rates = [1e-4, 1e-3, 1e-2]
    adam_optimizer = Adam(lr=learning_rates[0])
    model.compile(loss={
        'y0': 'mean_squared_error',
        'y1': 'mean_squared_error'
    },
                  optimizer=adam_optimizer,
                  metrics=[metrics.mae])

    # Print out model architecture summary
    model.summary()

    # Train the model
    model.fit(x=train_data,
              y=train_label,
              validation_data=(validation_data, validation_label),
              epochs=100)
    return model
Esempio n. 7
0
def create_autoencoder(input_dim, encoding_dim):
    """
    Args:
        input_dim: dimension of one-hot encoded categorical features
        encoding_dim: dimension of encoded data(hidden layer representation)
    Return: 
        model
    """
    one_hot_in = Input(shape=(input_dim, ), name='input', sparse=True)
    X = Dense(HIDDEN_UNITS, activation='selu')(one_hot_in)
    encoding = Dense(encoding_dim, activation='selu', name='enco')(X)
    X = Dense(HIDDEN_UNITS, activation='selu')(encoding)
    output = Dense(input_dim, activation='sigmoid')(X)

    model = Model(inputs=one_hot_in, outputs=output)
    return model
Esempio n. 8
0
    def build_generator(self):

        model = Sequential()

        model.add(Dense(78, activation="relu", input_dim=self.latent_dim))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(56, activation="relu"))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(32, activation="relu"))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(28, activation="tanh"))

        model.summary()

        noise = Input(shape=(self.latent_dim, ))
        img = model(noise)

        return Model(noise, img)
Esempio n. 9
0
 def generator_network(self):
     # input
     in_latents = Input(shape = (self.latent_dim,))
     #DC1
     nett = Conv2DTranspose(512,(3,3))(in_latents)		
     nett = BatchNormalization()(nett)
     nett = LeakyReLU(alpha = 0.2)(nett)
     #DC2
     nett = Conv2DTranspose(128,(3,3))(nett)	
     nett = BatchNormalization()(nett)
     nett = LeakyReLU(alpha = 0.2)(nett)
     #DC3
     nett = Conv2DTranspose(64,(3,3))		
     nett = BatchNormalization()(nett)
     nett = LeakyReLU(alpha = 0.2)(nett)
     #DC4
     nett = Conv2DTranspose(32,(5,5))(nett)		
     nett = BatchNormalization()(nett)
     out_image = Dense(alpha = 0.2)(nett)
     #output
     model = Model(inputs = in_latents, outputs = out_image)
     return model
Esempio n. 10
0
 def discriminator_network(self):
     # input
     in_image = Input(shape=self.img_shape)
     # C1 layer
     nett = Conv2D(64,(5,5))(in_image)		
     nett = BatchNormalization()(nett)
     nett = LeakyReLU(alpha = 0.2)(nett)
     # C2 layer
     nett = Conv2D(128,(5,5))(nett)		
     nett = BatchNormalization()(nett)
     nett = LeakyReLU(alpha = 0.2)(nett)
     nett = Dropout(0.2)(nett)
     # C3 layer
     nett = Conv2D(256,(5,5))(nett)		
     nett = BatchNormalization()(nett)
     nett = LeakyReLU(alpha = 0.2)(nett)
     nett = Dropout(0.2)(nett)
     # F4 layer
     nett = Flatten()(nett)
     validity = Dense(1,alpha = 0.2)(nett)
     #output
     model =  Model(inputs = in_image, outputs = validity)
     return model
Esempio n. 11
0
def compiled_tcn(num_feat,  # type: int
                 num_classes,  # type: int
                 nb_filters,  # type: int
                 kernel_size,  # type: int
                 dilations,  # type: List[int]
                 nb_stacks,  # type: int
                 max_len,  # type: int
                 padding='causal',  # type: str
                 use_skip_connections=True,  # type: bool
                 return_sequences=True,
                 regression=False,  # type: bool
                 dropout_rate=0.05,  # type: float
                 name='tcn',  # type: str,
                 opt='adam',
                 lr=0.002):
    # type: (...) -> keras.Model
    """Creates a compiled TCN model for a given task (i.e. regression or classification).
    Classification uses a sparse categorical loss. Please input class ids and not one-hot encodings.

    Args:
        num_feat: The number of features of your input, i.e. the last dimension of: (batch_size, timesteps, input_dim).
        num_classes: The size of the final dense layer, how many classes we are predicting.
        nb_filters: The number of filters to use in the convolutional layers.
        kernel_size: The size of the kernel to use in each convolutional layer.
        dilations: The list of the dilations. Example is: [1, 2, 4, 8, 16, 32, 64].
        nb_stacks : The number of stacks of residual blocks to use.
        max_len: The maximum sequence length, use None if the sequence length is dynamic.
        padding: The padding to use in the convolutional layers.
        use_skip_connections: Boolean. If we want to add skip connections from input to each residual block.
        return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence.
        regression: Whether the output should be continuous or discrete.
        use_separable_convolutions: whether to use these instead of normal conv layers for optimizing parameter count
        dropout_rate: Float between 0 and 1. Fraction of the input units to drop.
        name: Name of the model. Useful when having multiple TCN.
        opt: Optimizer name.
        lr: Learning rate.
    Returns:
        A compiled keras TCN.
    """

    dilations = process_dilations(dilations)

    input_layer = Input(shape=(max_len, num_feat))

    x = TCN(nb_filters, kernel_size, nb_stacks, dilations, padding,
            use_skip_connections, dropout_rate, return_sequences, name)(input_layer)

    print('x.shape=', x.shape)

    def get_opt():
        
            return tf.train.AdamOptimizer(learning_rate=1e-3, )
        

    if not regression:
        # classification
        x = Dense(num_classes)(x)
        x = Activation('softmax')(x)
        output_layer = x
        model = Model(input_layer, output_layer)            
        model.compile(get_opt(), loss='sparse_categorical_crossentropy', metrics=[accuracy])
    else:
        # regression
        x = Dense(1)(x)
        x = Activation('linear')(x)
        output_layer = x
        model = Model(input_layer, output_layer)
        model.compile(get_opt(), loss='mean_squared_error')
    print(f'model.x = {input_layer.shape}')
    print(f'model.y = {output_layer.shape}')
    return model
def createModel(patchSize, numClasses, usingClassification=False):

    if K.image_data_format() == 'channels_last':
        bn_axis = -1
    else:
        bn_axis = 1

    input_tensor = Input(shape=(patchSize[0], patchSize[1], patchSize[2], 1))

    # first stage
    x = Conv3D(filters=16,
               kernel_size=(5, 5, 5),
               strides=(1, 1, 1),
               padding='same',
               kernel_initializer='he_normal')(input_tensor)
    x = BatchNormalization(axis=bn_axis)(x)
    x_after_stage_1 = LeakyReLU(alpha=0.01)(x)

    #x_after_stage_1 = Add()([input_tensor, x])

    # first down convolution
    x_down_conv_1 = projection_block_3D(x_after_stage_1,
                                        filters=(32, 32),
                                        kernel_size=(2, 2, 2),
                                        stage=1,
                                        block=1,
                                        se_enabled=True,
                                        se_ratio=4)

    # second stage
    x = identity_block_3D(x_down_conv_1,
                          filters=(32, 32),
                          kernel_size=(3, 3, 3),
                          stage=2,
                          block=1,
                          se_enabled=True,
                          se_ratio=4)
    x_after_stage_2 = identity_block_3D(x,
                                        filters=(32, 32),
                                        kernel_size=(3, 3, 3),
                                        stage=2,
                                        block=2,
                                        se_enabled=True,
                                        se_ratio=4)

    # second down convolution
    x_down_conv_2 = projection_block_3D(x_after_stage_2,
                                        filters=(64, 64),
                                        kernel_size=(2, 2, 2),
                                        stage=2,
                                        block=3,
                                        se_enabled=True,
                                        se_ratio=8)

    # third stage
    x = identity_block_3D(x_down_conv_2,
                          filters=(64, 64),
                          kernel_size=(3, 3, 3),
                          stage=3,
                          block=1,
                          se_enabled=True,
                          se_ratio=8)
    x_after_stage_3 = identity_block_3D(x,
                                        filters=(64, 64),
                                        kernel_size=(3, 3, 3),
                                        stage=3,
                                        block=2,
                                        se_enabled=True,
                                        se_ratio=8)
    #x = identity_block_3D(x, filters=(64, 64), kernel_size=(3, 3, 3), stage=3, block=3, se_enabled=False, se_ratio=16)

    # third down convolution
    x_down_conv_3 = projection_block_3D(x_after_stage_3,
                                        filters=(128, 128),
                                        kernel_size=(2, 2, 2),
                                        stage=3,
                                        block=4,
                                        se_enabled=True,
                                        se_ratio=16)

    # fourth stage
    x = identity_block_3D(x_down_conv_3,
                          filters=(128, 128),
                          kernel_size=(3, 3, 3),
                          stage=4,
                          block=1,
                          se_enabled=True,
                          se_ratio=16)
    x_after_stage_4 = identity_block_3D(x,
                                        filters=(128, 128),
                                        kernel_size=(3, 3, 3),
                                        stage=4,
                                        block=2,
                                        se_enabled=True,
                                        se_ratio=16)
    #x = identity_block_3D(x, filters=(128, 128), kernel_size=(3, 3, 3), stage=4, block=3, se_enabled=False, se_ratio=16)

    ### end of encoder path

    if usingClassification:
        # use x_after_stage_4 as quantification output
        # global average pooling
        x_class = GlobalAveragePooling3D(
            data_format=K.image_data_format())(x_after_stage_4)

        # fully-connected layer
        classification_output = Dense(units=numClasses,
                                      activation='softmax',
                                      kernel_initializer='he_normal',
                                      name='classification_output')(x_class)

    ### decoder path

    # first 3D upsampling
    x = UpSampling3D(size=(2, 2, 2),
                     data_format=K.image_data_format())(x_after_stage_4)
    x = Conv3D(filters=64,
               kernel_size=(3, 3, 3),
               strides=(1, 1, 1),
               padding='same',
               kernel_initializer='he_normal')(x)
    x = BatchNormalization(axis=bn_axis)(x)
    x = LeakyReLU(alpha=0.01)(x)

    x = concatenate([x, x_after_stage_3], axis=bn_axis)

    # first decoder stage
    x = identity_block_3D(x,
                          filters=(128, 128),
                          kernel_size=(3, 3, 3),
                          stage=6,
                          block=1,
                          se_enabled=True,
                          se_ratio=16)
    x = identity_block_3D(x,
                          filters=(128, 128),
                          kernel_size=(3, 3, 3),
                          stage=6,
                          block=2,
                          se_enabled=True,
                          se_ratio=16)

    # second 3D upsampling
    x = UpSampling3D(size=(2, 2, 2), data_format=K.image_data_format())(x)
    x = Conv3D(filters=32,
               kernel_size=(3, 3, 3),
               strides=(1, 1, 1),
               padding='same',
               kernel_initializer='he_normal')(x)
    x = BatchNormalization(axis=bn_axis)(x)
    x = LeakyReLU(alpha=0.01)(x)

    x = concatenate([x, x_after_stage_2], axis=bn_axis)

    # second decoder stage
    x = identity_block_3D(x,
                          filters=(64, 64),
                          kernel_size=(3, 3, 3),
                          stage=7,
                          block=1,
                          se_enabled=True,
                          se_ratio=8)
    x = identity_block_3D(x,
                          filters=(64, 64),
                          kernel_size=(3, 3, 3),
                          stage=7,
                          block=2,
                          se_enabled=True,
                          se_ratio=8)

    # third 3D upsampling
    x = UpSampling3D(size=(2, 2, 2), data_format=K.image_data_format())(x)
    x = Conv3D(filters=16,
               kernel_size=(3, 3, 3),
               strides=(1, 1, 1),
               padding='same',
               kernel_initializer='he_normal')(x)
    x = BatchNormalization(axis=bn_axis)(x)
    x = LeakyReLU(alpha=0.01)(x)

    x = concatenate([x, x_after_stage_1], axis=bn_axis)

    # third decoder stage
    x = identity_block_3D(x,
                          filters=(32, 32),
                          kernel_size=(3, 3, 3),
                          stage=9,
                          block=1,
                          se_enabled=True,
                          se_ratio=4)
    #x = identity_block_3D(x, filters=(32, 32), kernel_size=(3, 3, 3), stage=9, block=2, se_enabled=True, se_ratio=4)

    ### End of decoder

    ### last segmentation segments
    # 1x1x1-Conv3 produces 2 featuremaps for probabilistic  segmentations of the foreground and background
    x = Conv3D(filters=2,
               kernel_size=(1, 1, 1),
               strides=(1, 1, 1),
               padding='same',
               kernel_initializer='he_normal',
               name='conv_veryEnd')(x)
    #x = BatchNormalization(axis=bn_axis)(x) # warum leakyrelu vor softmax?
    #x = LeakyReLU(alpha=0.01)(x)

    segmentation_output = Softmax(axis=bn_axis, name='segmentation_output')(x)
    #segmentation_output = keras.layers.activations.sigmoid(x)

    # create model
    if usingClassification:
        cnn = Model(inputs=[input_tensor],
                    outputs=[segmentation_output, classification_output],
                    name='3D-VResFCN-Classification')
        sModelName = cnn.name
    else:
        cnn = Model(inputs=[input_tensor],
                    outputs=[segmentation_output],
                    name='3D-VResFCN')
        sModelName = cnn.name

    return cnn, sModelName
Esempio n. 13
0
def perceptual_loss(y_true, y_pred):
    vgg = VGG16(include=False, weights="imagenet", input_shape=image_shape)
    loss_model = Model(inputs=vgg.input, outputs=vgg.get_layer("block3_conv3").output)
    loss_model.trainable = False
    return tf.reduce_mean(tf.square(loss_model(y_true), loss_model(y_pred)))
Esempio n. 14
0
encoder_embedding = Embedding(vocab_size, 200, mask_zero=True)(encoder_inputs)
#参考链接:嵌入层 Embedding<https://keras.io/zh/layers/embeddings/#embedding>
encoder_outputs, state_h, state_c = tf.keras.layers.LSTM(
    200, return_state=True)(encoder_embedding)
#参考链接:https://keras.io/zh/layers/recurrent/#lstm
encoder_states = [state_h, state_c]

decoder_inputs = Input(shape=(None, ))
decoder_embedding = Embedding(vocab_size, 200, mask_zero=True)(decoder_inputs)
decoder_lstm = LSTM(200, return_state=True, return_sequences=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding,
                                     initial_state=encoder_states)
decoder_dense = Dense(vocab_size, activation=tf.keras.activations.softmax)
output = decoder_dense(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], output)
model.compile(optimizer=optimizers.RMSprop(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
#参考链接:RMSprop<https://keras.io/zh/optimizers/#rmsprop>
#categorical_crossentropy<https://keras.io/zh/backend/#categorical_crossentropy>

model.summary()

# 模型训练以及保存
model.fit([encoder_input_data, decoder_input_data],
          decoder_output_data,
          batch_size=50,
          epochs=150)
model.save('model.h5')
Esempio n. 15
0
class SGAN:
    def __init__(self, args):
        self.input_shape = 28
        self.num_classes = 2
        self.latent_dim = 100

        optimizer = Adam(0.0002, 0.5)

        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(
            loss=['binary_crossentropy', 'categorical_crossentropy'],
            loss_weights=[0.5, 0.5],
            optimizer=optimizer,
            metrics=['accuracy'])

        # Build the generator
        self.generator = self.build_generator()

        # The generator takes noise as input and generates imgs
        noise = Input(shape=(64, ))
        img = self.generator(noise)

        # For the combined model we will only train the generator
        self.discriminator.trainable = False

        # The valid takes generated images as input and determines validity
        valid, _ = self.discriminator(img)

        # The combined model  (stacked generator and discriminator)
        # Trains generator to fool discriminator
        self.combined = Model(noise, valid)
        self.combined.compile(loss=['binary_crossentropy'],
                              optimizer=optimizer)

    def build_generator(self):

        model = Sequential()

        model.add(Dense(78, activation="relu", input_dim=self.latent_dim))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(56, activation="relu"))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(32, activation="relu"))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(28, activation="tanh"))

        model.summary()

        noise = Input(shape=(self.latent_dim, ))
        img = model(noise)

        return Model(noise, img)

    def build_discriminator(self):

        model = Sequential()

        model.add(Dense(78, activation="relu", input_dim=self.input_shape))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(56, activation="relu"))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(32, activation="relu"))
        model.add(Dropout(rate=0.3))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(28, activation="relu"))
        model.add(Dropuout(rate=0.3))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(10, activation="relu"))

        model.summary()

        img = Input(shape=self.img_shape)

        features = model(img)
        valid = Dense(1, activation="sigmoid")(features)
        label = Dense(self.num_classes + 1, activation="softmax")(features)

        return Model(img, [valid, label])

    def train(self, epochs, batch_size=100, sample_interval=50):

        # Load the dataset
        data = data_load("credit_fraud_sampled.csv")
        x_train, y_train = data[:2]
        x_val, y_val = data[2:4]
        x_test, y_test = data[4:]

        # Class weights:
        # To balance the difference in occurences of digit class labels.
        # 50% of labels that the discriminator trains on are 'fake'.
        # Weight = 1 / frequency
        half_batch = batch_size // 2
        cw1 = {0: 1, 1: 1}
        cw2 = {
            i: self.num_classes / half_batch
            for i in range(self.num_classes)
        }
        cw2[self.num_classes] = 1 / half_batch

        # Adversarial ground truths
        valid = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))

        for epoch in range(epochs):

            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Select a random batch of images
            idx = np.random.randint(0, X_train.shape[0], batch_size)
            imgs = X_train[idx]

            # Sample noise and generate a batch of new images
            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
            gen_imgs = self.generator.predict(noise)

            # One-hot encoding of labels
            labels = to_categorical(y_train[idx],
                                    num_classes=self.num_classes + 1)
            fake_labels = to_categorical(np.full((batch_size, 1),
                                                 self.num_classes),
                                         num_classes=self.num_classes + 1)

            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch(
                imgs, [valid, labels], class_weight=[cw1, cw2])
            d_loss_fake = self.discriminator.train_on_batch(
                gen_imgs, [fake, fake_labels], class_weight=[cw1, cw2])
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # ---------------------
            #  Train Generator
            # ---------------------

            g_loss = self.combined.train_on_batch(noise,
                                                  valid,
                                                  class_weight=[cw1, cw2])

            # Plot the progress
            if epoch % 100 == 0:
                print(
                    "%d [D loss: %f, acc: %.2f%%, op_acc: %.2f%%] [G loss: %f]"
                    % (epoch, d_loss[0], 100 * d_loss[3], 100 * d_loss[4],
                       g_loss))

            # If at save interval => save generated image samples
            if epoch % sample_interval == 0:
                self.sample_images(epoch)

    def sample_images(self, epoch):
        r, c = 5, 5
        noise = np.random.normal(0, 1, (r * c, self.latent_dim))
        gen_imgs = self.generator.predict(noise)

        # Rescale images 0 - 1
        gen_imgs = 0.5 * gen_imgs + 0.5

        fig, axs = plt.subplots(r, c)
        cnt = 0
        for i in range(r):
            for j in range(c):
                axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray')
                axs[i, j].axis('off')
                cnt += 1
        fig.savefig("images/mnist_%d.png" % epoch)
        plt.close()

    def save_model(self):
        def save(model, model_name):
            model_path = "saved_model/%s.json" % model_name
            weights_path = "saved_model/%s_weights.hdf5" % model_name
            options = {"file_arch": model_path, "file_weight": weights_path}
            json_string = model.to_json()
            open(options['file_arch'], 'w').write(json_string)
            model.save_weights(options['file_weight'])

        save(self.generator, "mnist_sgan_generator")
        save(self.discriminator, "mnist_sgan_discriminator")
        save(self.combined, "mnist_sgan_adversarial")
Esempio n. 16
0
    def __init__(self,
                 n_word_vocab=50001,
                 n_role_vocab=7,
                 n_factors_emb=256,
                 n_factors_cls=512,
                 n_hidden=256,
                 word_vocabulary={},
                 role_vocabulary={},
                 unk_word_id=50000,
                 unk_role_id=7,
                 missing_word_id=50001,
                 using_dropout=False,
                 dropout_rate=0.3,
                 optimizer='adagrad',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy']):
        super(NNRF, self).__init__(n_word_vocab, n_role_vocab, n_factors_emb,
                                   n_hidden, word_vocabulary, role_vocabulary,
                                   unk_word_id, unk_role_id, missing_word_id,
                                   using_dropout, dropout_rate, optimizer,
                                   loss, metrics)

        # minus 1 here because one of the role is target role
        self.input_length = n_role_vocab - 1

        # each input is a fixed window of frame set, each word correspond to one role
        input_words = Input(
            shape=(self.input_length, ), dtype=tf.uint32,
            name='input_words')  # Switched dtype to tf specific (team1-change)
        input_roles = Input(
            shape=(self.input_length, ), dtype=tf.uint32,
            name='input_roles')  # Switched dtype to tf specific (team1-change)
        target_role = Input(
            shape=(1, ), dtype=tf.uint32,
            name='target_role')  # Switched dtype to tf specific (team1-change)

        # role based embedding layer
        embedding_layer = role_based_word_embedding(
            input_words, input_roles, n_word_vocab, n_role_vocab,
            glorot_uniform(), missing_word_id, self.input_length,
            n_factors_emb, True, using_dropout, dropout_rate)

        # sum on input_length direction;
        # obtaining context embedding layer, shape is (batch_size, n_factors_emb)
        event_embedding = Lambda(
            lambda x: K.sum(x, axis=1),
            name='event_embedding',
            output_shape=(n_factors_emb, ))(embedding_layer)

        # fully connected layer, output shape is (batch_size, input_length, n_hidden)
        hidden = Dense(n_hidden,
                       activation='linear',
                       input_shape=(n_factors_emb, ),
                       name='projected_event_embedding')(event_embedding)

        # non-linear layer, using 1 to initialize
        non_linearity = PReLU(alpha_initializer='ones',
                              name='context_embedding')(hidden)

        # hidden layer
        hidden_layer2 = target_word_hidden(non_linearity,
                                           target_role,
                                           n_word_vocab,
                                           n_role_vocab,
                                           glorot_uniform(),
                                           n_factors_cls,
                                           n_hidden,
                                           using_dropout=using_dropout,
                                           dropout_rate=dropout_rate)

        # softmax output layer
        output_layer = Dense(n_word_vocab,
                             activation='softmax',
                             input_shape=(n_factors_cls, ),
                             name='softmax_word_output')(hidden_layer2)

        self.model = Model(inputs=[input_words, input_roles, target_role],
                           outputs=[output_layer])

        self.model.compile(optimizer, loss, metrics)
Esempio n. 17
0
class NNRF(GenericModel):
    """Non-incremental model role-filler

    """
    def __init__(self,
                 n_word_vocab=50001,
                 n_role_vocab=7,
                 n_factors_emb=256,
                 n_factors_cls=512,
                 n_hidden=256,
                 word_vocabulary={},
                 role_vocabulary={},
                 unk_word_id=50000,
                 unk_role_id=7,
                 missing_word_id=50001,
                 using_dropout=False,
                 dropout_rate=0.3,
                 optimizer='adagrad',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy']):
        super(NNRF, self).__init__(n_word_vocab, n_role_vocab, n_factors_emb,
                                   n_hidden, word_vocabulary, role_vocabulary,
                                   unk_word_id, unk_role_id, missing_word_id,
                                   using_dropout, dropout_rate, optimizer,
                                   loss, metrics)

        # minus 1 here because one of the role is target role
        self.input_length = n_role_vocab - 1

        # each input is a fixed window of frame set, each word correspond to one role
        input_words = Input(
            shape=(self.input_length, ), dtype=tf.uint32,
            name='input_words')  # Switched dtype to tf specific (team1-change)
        input_roles = Input(
            shape=(self.input_length, ), dtype=tf.uint32,
            name='input_roles')  # Switched dtype to tf specific (team1-change)
        target_role = Input(
            shape=(1, ), dtype=tf.uint32,
            name='target_role')  # Switched dtype to tf specific (team1-change)

        # role based embedding layer
        embedding_layer = role_based_word_embedding(
            input_words, input_roles, n_word_vocab, n_role_vocab,
            glorot_uniform(), missing_word_id, self.input_length,
            n_factors_emb, True, using_dropout, dropout_rate)

        # sum on input_length direction;
        # obtaining context embedding layer, shape is (batch_size, n_factors_emb)
        event_embedding = Lambda(
            lambda x: K.sum(x, axis=1),
            name='event_embedding',
            output_shape=(n_factors_emb, ))(embedding_layer)

        # fully connected layer, output shape is (batch_size, input_length, n_hidden)
        hidden = Dense(n_hidden,
                       activation='linear',
                       input_shape=(n_factors_emb, ),
                       name='projected_event_embedding')(event_embedding)

        # non-linear layer, using 1 to initialize
        non_linearity = PReLU(alpha_initializer='ones',
                              name='context_embedding')(hidden)

        # hidden layer
        hidden_layer2 = target_word_hidden(non_linearity,
                                           target_role,
                                           n_word_vocab,
                                           n_role_vocab,
                                           glorot_uniform(),
                                           n_factors_cls,
                                           n_hidden,
                                           using_dropout=using_dropout,
                                           dropout_rate=dropout_rate)

        # softmax output layer
        output_layer = Dense(n_word_vocab,
                             activation='softmax',
                             input_shape=(n_factors_cls, ),
                             name='softmax_word_output')(hidden_layer2)

        self.model = Model(inputs=[input_words, input_roles, target_role],
                           outputs=[output_layer])

        self.model.compile(optimizer, loss, metrics)

    def set_0_bias(self):
        """ This function is used as a hack that set output bias to 0.
            According to Ottokar's advice in the paper, during the *evaluation*, the output bias needs to be 0 
            in order to replicate the best performance reported in the paper.
        """
        word_output_weights = self.model.get_layer(
            "softmax_word_output").get_weights()
        word_output_kernel = word_output_weights[0]
        word_output_bias = np.zeros(self.n_word_vocab)
        self.model.get_layer("softmax_word_output").set_weights(
            [word_output_kernel, word_output_bias])

        return word_output_weights[1]

    def set_bias(self, bias):
        word_output_weights = self.model.get_layer(
            "softmax_word_output").get_weights()
        word_output_kernel = word_output_weights[0]
        self.model.get_layer("softmax_word_output").set_weights(
            [word_output_kernel, bias])

        return bias

    # Deprecated temporarily
    def train(self,
              i_w,
              i_r,
              t_w,
              t_r,
              t_w_c,
              t_r_c,
              batch_size=256,
              epochs=100,
              validation_split=0.05,
              verbose=0):
        train_result = self.model.fit([i_w, i_r, t_r], t_w_c, batch_size,
                                      epochs, validation_split, verbose)
        return train_result

    def test(self,
             i_w,
             i_r,
             t_w,
             t_r,
             t_w_c,
             t_r_c,
             batch_size=256,
             verbose=0):
        test_result = self.model.evaluate([i_w, i_r, t_r], t_w_c, batch_size,
                                          verbose)
        return test_result

    def train_on_batch(self, i_w, i_r, t_w, t_r, t_w_c, t_r_c):
        train_result = self.model.train_on_batch([i_w, i_r, t_r], t_w_c)
        return train_result

    def test_on_batch(self,
                      i_w,
                      i_r,
                      t_w,
                      t_r,
                      t_w_c,
                      t_r_c,
                      sample_weight=None):
        test_result = self.model.test_on_batch([i_w, i_r, t_r], t_w_c,
                                               sample_weight)
        return test_result

    def predict(self, i_w, i_r, t_r, batch_size=1, verbose=0):
        """ Return the output from softmax layer. """
        predict_result = self.model.predict([i_w, i_r, t_r], batch_size,
                                            verbose)
        return predict_result

    def summary(self):
        self.model.summary()

    def predict_class(self, i_w, i_r, t_r, batch_size=1, verbose=0):
        """ Return predicted target word from prediction. """
        predict_result = self.predict(i_w, i_r, t_r, batch_size, verbose)
        return np.argmax(predict_result, axis=1)

    def p_words(self, i_w, i_r, t_w, t_r, batch_size=1, verbose=0):
        """ Return the output scores given target words. """
        predict_result = self.predict(i_w, i_r, t_r, batch_size, verbose)
        return predict_result[range(batch_size), list(t_w)]

    def top_words(self, i_w, i_r, t_r, topN=20, batch_size=1, verbose=0):
        """ Return top N target words given context. """
        predict_result = self.predict(i_w, i_r, t_r, batch_size, verbose)
        rank_list = np.argsort(predict_result, axis=1)
        return [r[-topN:][::-1] for r in rank_list]

    def list_top_words(self, i_w, i_r, t_r, topN=20, batch_size=1, verbose=0):
        """ Return a list of decoded top N target words.
            (Only for reference, can be removed.)
        """
        top_words_lists = self.top_words(i_w, i_r, t_r, topN, batch_size,
                                         verbose)
        print(
            type(top_words_lists))  # Updated to python3 syntax (team1-change)
        result = []
        for i in range(batch_size):
            top_words_list = top_words_lists[i]
            result.append([self.word_decoder[w] for w in top_words_list])
        return result
Esempio n. 18
0
    def __init__(self,
                 n_word_vocab=50001,
                 n_role_vocab=7,
                 n_factors_emb=300,
                 n_hidden=300,
                 word_vocabulary=None,
                 role_vocabulary=None,
                 unk_word_id=50000,
                 unk_role_id=7,
                 missing_word_id=50001,
                 using_dropout=False,
                 dropout_rate=0.3,
                 optimizer='adagrad',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'],
                 loss_weights=[1., 1.]):
        super(MTRFv4, self).__init__(n_word_vocab, n_role_vocab, n_factors_emb,
                                     n_hidden, word_vocabulary,
                                     role_vocabulary, unk_word_id, unk_role_id,
                                     missing_word_id, using_dropout,
                                     dropout_rate, optimizer, loss, metrics)

        # minus 1 here because one of the role is target role
        input_length = n_role_vocab - 1

        n_factors_cls = n_hidden

        # each input is a fixed window of frame set, each word correspond to one role
        input_words = Input(
            shape=(input_length, ), dtype=tf.uint32,
            name='input_words')  # Switched dtype to tf specific (team1-change)
        input_roles = Input(
            shape=(input_length, ), dtype=tf.uint32,
            name='input_roles')  # Switched dtype to tf specific (team1-change)
        target_word = Input(
            shape=(1, ), dtype=tf.uint32,
            name='target_word')  # Switched dtype to tf specific (team1-change)
        target_role = Input(
            shape=(1, ), dtype=tf.uint32,
            name='target_role')  # Switched dtype to tf specific (team1-change)

        # role based embedding layer
        embedding_layer = factored_embedding(input_words, input_roles,
                                             n_word_vocab, n_role_vocab,
                                             glorot_uniform(), missing_word_id,
                                             input_length, n_factors_emb,
                                             n_hidden, True, using_dropout,
                                             dropout_rate)

        # non-linear layer, using 1 to initialize
        non_linearity = PReLU(alpha_initializer='ones')(embedding_layer)

        # mean on input_length direction;
        # obtaining context embedding layer, shape is (batch_size, n_hidden)
        context_embedding = Lambda(lambda x: K.mean(x, axis=1),
                                   name='context_embedding',
                                   output_shape=(n_hidden, ))(non_linearity)

        # target word hidden layer
        tw_hidden = target_word_hidden(context_embedding,
                                       target_role,
                                       n_word_vocab,
                                       n_role_vocab,
                                       glorot_uniform(),
                                       n_hidden,
                                       n_hidden,
                                       using_dropout=using_dropout,
                                       dropout_rate=dropout_rate)

        # target role hidden layer
        tr_hidden = target_role_hidden(context_embedding,
                                       target_word,
                                       n_word_vocab,
                                       n_role_vocab,
                                       glorot_uniform(),
                                       n_hidden,
                                       n_hidden,
                                       using_dropout=using_dropout,
                                       dropout_rate=dropout_rate)

        # softmax output layer
        target_word_output = Dense(n_word_vocab,
                                   activation='softmax',
                                   input_shape=(n_hidden, ),
                                   name='softmax_word_output')(tw_hidden)

        # softmax output layer
        target_role_output = Dense(n_role_vocab,
                                   activation='softmax',
                                   input_shape=(n_hidden, ),
                                   name='softmax_role_output')(tr_hidden)

        self.model = Model(
            inputs=[input_words, input_roles, target_word, target_role],
            outputs=[target_word_output, target_role_output])

        self.model.compile(optimizer, loss, metrics, loss_weights)
# (10) 양방향 LSTM 모델링 작업
from tf.keras.models import Model, Sequential
from tf.keras.layers import SimpleRNN, Input, Dense, LSTM
from tf.keras.layers import Bidirectional, TimeDistributed

# 학습
from tf.keras.callbacks import EarlyStopping
# 조기종료 콜백함수 정의

xInput = Input(batch_shape=(None, right_idx3, 256))
xBiLstm = Bidirectional(LSTM(240, return_sequences=True),
                        merge_mode='concat')(xInput)
xOutput = TimeDistributed(Dense(1, activation='sigmoid'))(xBiLstm)
# 각 스텝에서 cost가 전송되고, 오류가 다음 step으로 전송됨.

model1 = Model(xInput, xOutput)
model1.compile(loss='binary_crossentropy',
               optimizer='rmsprop',
               metrics=['accuracy'])
model1.summary()

from keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=3)  # 조기종료 콜백함수 정의
# In[24]:

########## 3gram
# 교차검증 kfold
from sklearn.model_selection import KFold

# Accuracy, Precision, Recall, F1-Score
def eval_GS(model_name,
            experiment_name,
            eval_file_name,
            model=None,
            print_result=True,
            verb_baseline=False):
    MODEL_NAME = experiment_name
    eval_file = os.path.join(EVAL_PATH, eval_file_name)
    result_file = os.path.join(MODEL_PATH, MODEL_NAME + '_' + eval_file_name)

    if model:
        net = model
    else:
        description = model_builder.load_description(MODEL_PATH, MODEL_NAME)
        net = model_builder.build_model(model_name, description)
        net.load(MODEL_PATH, MODEL_NAME, description)

    sent_layer = 'context_embedding'

    sent_model = Model(inputs=net.model.input,
                       outputs=net.model.get_layer(sent_layer).output)

    # if print_result:
    #     sent_model.summary()

    n_input_length = len(net.role_vocabulary) - 1

    print net.role_vocabulary

    scores = []
    similarities = []
    original_sim_f = []
    similarities_f = []
    lo_similarities = []
    hi_similarities = []
    records = []

    print("Embedding: " + experiment_name)
    print("=" * 60)
    print("\n")
    print("sentence1\tsentence2\taverage_score\tembedding_cosine")
    print("-" * 60)

    with open(eval_file, 'r') as f, \
        open(result_file, 'w') as f_out:

        first = True
        for line in f:
            # skip header
            if first:
                first = False
                continue

            s = line.split()
            sentence = " ".join(s[1:5])
            score = float(s[5])
            hilo = s[6].upper()

            # verb subject object landmark
            # A1 - object; A0 - subject
            V1, A0, A1, V2 = sentence.split()

            V1 = wnl.lemmatize(V1, wn.VERB)
            A0 = wnl.lemmatize(A0, wn.NOUN)
            A1 = wnl.lemmatize(A1, wn.NOUN)
            V2 = wnl.lemmatize(V2, wn.VERB)

            V1_i = net.word_vocabulary.get(V1, net.unk_word_id)
            A0_i = net.word_vocabulary.get(A0, net.unk_word_id)
            A1_i = net.word_vocabulary.get(A1, net.unk_word_id)
            V2_i = net.word_vocabulary.get(V2, net.unk_word_id)

            # if np.array([V1_i, A0_i, A1_i, V2_i]).any() == net.unk_word_id:
            #     print 'OOV: ', A0, A1, V1, V2

            V_ri = net.role_vocabulary['V']
            A0_ri = net.role_vocabulary['A0']
            A1_ri = net.role_vocabulary['A1']

            sent1_x = dict((r, net.missing_word_id)
                           for r in (net.role_vocabulary.values()))
            sent2_x = dict((r, net.missing_word_id)
                           for r in (net.role_vocabulary.values()))

            sent1_x.pop(n_input_length)
            sent2_x.pop(n_input_length)

            sent1_x[V_ri] = V1_i
            sent2_x[V_ri] = V2_i

            if not verb_baseline:
                sent1_x[A0_ri] = A0_i
                sent1_x[A1_ri] = A1_i
                sent2_x[A0_ri] = A0_i
                sent2_x[A1_ri] = A1_i

            zeroA = np.array([0])

            s1_w = np.array(sent1_x.values()).reshape((1, n_input_length))
            s1_r = np.array(sent1_x.keys()).reshape((1, n_input_length))
            s2_w = np.array(sent2_x.values()).reshape((1, n_input_length))
            s2_r = np.array(sent2_x.keys()).reshape((1, n_input_length))

            if re.search('NNRF', model_name):
                sent1_emb = sent_model.predict([s1_w, s1_r, zeroA])
                sent2_emb = sent_model.predict([s2_w, s2_r, zeroA])
            else:
                sent1_emb = sent_model.predict([s1_w, s1_r, zeroA, zeroA])
                sent2_emb = sent_model.predict([s2_w, s2_r, zeroA, zeroA])

            # Baseline
            #sent1_emb = V1_i
            #sent2_emb = V2_i
            # Compositional
            # sent1_emb = V1_i + A0_i + A1_i
            # sent2_emb = V2_i + A0_i + A1_i
            #sent1_emb = V1_i * A0_i * A1_i
            #sent2_emb = V2_i * A0_i * A1_i

            similarity = -(cosine(sent1_emb, sent2_emb) - 1.0
                           )  # convert distance to similarity

            if hilo == "HIGH":
                hi_similarities.append(similarity)
            elif hilo == "LOW":
                lo_similarities.append(similarity)
            else:
                raise Exception("Unknown hilo value %s" % hilo)

            if (V1, A0, A1, V2) not in records:
                records.append((V1, A0, A1, V2))
                # print "\"%s %s %s\"\t\"%s %s %s\"\t%.2f\t%.2f \n" % (A0, V1, A1, A0, V2, A1, score, similarity)

            scores.append(score)
            similarities.append(similarity)

            f_out.write("\"%s %s %s\"\t\"%s %s %s\"\t %.2f \t %.2f \n" %
                        (A0, V1, A1, A0, V2, A1, score, similarity))

    print("-" * 60)

    correlation, pvalue = spearmanr(scores, similarities)

    if print_result:
        print("Total number of samples: %d" % len(scores)
              )  #Added paranthesis to the print statements (team1-change)
        print("Spearman correlation: %.4f; 2-tailed p-value: %.10f" %
              (correlation, pvalue)
              )  #Added paranthesis to the print statements (team1-change)
        print("High: %.2f; Low: %.2f" %
              (np.mean(hi_similarities), np.mean(lo_similarities))
              )  #Added paranthesis to the print statements (team1-change)

        # import pylab
        # pylab.scatter(scores, similarities)
        # pylab.show()

    return correlation
Esempio n. 21
0
class MTRFv4(GenericModel):
    """Multi-task non-incremental role-filler

    """
    def __init__(self,
                 n_word_vocab=50001,
                 n_role_vocab=7,
                 n_factors_emb=300,
                 n_hidden=300,
                 word_vocabulary=None,
                 role_vocabulary=None,
                 unk_word_id=50000,
                 unk_role_id=7,
                 missing_word_id=50001,
                 using_dropout=False,
                 dropout_rate=0.3,
                 optimizer='adagrad',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'],
                 loss_weights=[1., 1.]):
        super(MTRFv4, self).__init__(n_word_vocab, n_role_vocab, n_factors_emb,
                                     n_hidden, word_vocabulary,
                                     role_vocabulary, unk_word_id, unk_role_id,
                                     missing_word_id, using_dropout,
                                     dropout_rate, optimizer, loss, metrics)

        # minus 1 here because one of the role is target role
        input_length = n_role_vocab - 1

        n_factors_cls = n_hidden

        # each input is a fixed window of frame set, each word correspond to one role
        input_words = Input(
            shape=(input_length, ), dtype=tf.uint32,
            name='input_words')  # Switched dtype to tf specific (team1-change)
        input_roles = Input(
            shape=(input_length, ), dtype=tf.uint32,
            name='input_roles')  # Switched dtype to tf specific (team1-change)
        target_word = Input(
            shape=(1, ), dtype=tf.uint32,
            name='target_word')  # Switched dtype to tf specific (team1-change)
        target_role = Input(
            shape=(1, ), dtype=tf.uint32,
            name='target_role')  # Switched dtype to tf specific (team1-change)

        # role based embedding layer
        embedding_layer = factored_embedding(input_words, input_roles,
                                             n_word_vocab, n_role_vocab,
                                             glorot_uniform(), missing_word_id,
                                             input_length, n_factors_emb,
                                             n_hidden, True, using_dropout,
                                             dropout_rate)

        # non-linear layer, using 1 to initialize
        non_linearity = PReLU(alpha_initializer='ones')(embedding_layer)

        # mean on input_length direction;
        # obtaining context embedding layer, shape is (batch_size, n_hidden)
        context_embedding = Lambda(lambda x: K.mean(x, axis=1),
                                   name='context_embedding',
                                   output_shape=(n_hidden, ))(non_linearity)

        # target word hidden layer
        tw_hidden = target_word_hidden(context_embedding,
                                       target_role,
                                       n_word_vocab,
                                       n_role_vocab,
                                       glorot_uniform(),
                                       n_hidden,
                                       n_hidden,
                                       using_dropout=using_dropout,
                                       dropout_rate=dropout_rate)

        # target role hidden layer
        tr_hidden = target_role_hidden(context_embedding,
                                       target_word,
                                       n_word_vocab,
                                       n_role_vocab,
                                       glorot_uniform(),
                                       n_hidden,
                                       n_hidden,
                                       using_dropout=using_dropout,
                                       dropout_rate=dropout_rate)

        # softmax output layer
        target_word_output = Dense(n_word_vocab,
                                   activation='softmax',
                                   input_shape=(n_hidden, ),
                                   name='softmax_word_output')(tw_hidden)

        # softmax output layer
        target_role_output = Dense(n_role_vocab,
                                   activation='softmax',
                                   input_shape=(n_hidden, ),
                                   name='softmax_role_output')(tr_hidden)

        self.model = Model(
            inputs=[input_words, input_roles, target_word, target_role],
            outputs=[target_word_output, target_role_output])

        self.model.compile(optimizer, loss, metrics, loss_weights)

    def set_0_bias(self):
        word_output_weights = self.model.get_layer(
            "softmax_word_output").get_weights()
        word_output_kernel = word_output_weights[0]
        word_output_bias = np.zeros(self.n_word_vocab)
        self.model.get_layer("softmax_word_output").set_weights(
            [word_output_kernel, word_output_bias])

        role_output_weights = self.model.get_layer(
            "softmax_role_output").get_weights()
        role_output_kernel = role_output_weights[0]
        role_output_bias = np.zeros(self.n_role_vocab)
        self.model.get_layer("softmax_role_output").set_weights(
            [role_output_kernel, role_output_bias])

        return word_output_weights[1], role_output_weights[1]

    def set_bias(self, bias):
        word_output_weights = self.model.get_layer(
            "softmax_word_output").get_weights()
        word_output_kernel = word_output_weights[0]
        self.model.get_layer("softmax_word_output").set_weights(
            [word_output_kernel, bias[0]])

        role_output_weights = self.model.get_layer(
            "softmax_role_output").get_weights()
        role_output_kernel = role_output_weights[0]
        self.model.get_layer("softmax_role_output").set_weights(
            [role_output_kernel, bias[1]])

        return bias

    # Train and test
    # Deprecated temporarily
    def train(self,
              i_w,
              i_r,
              t_w,
              t_r,
              t_w_c,
              t_r_c,
              batch_size=256,
              epochs=100,
              validation_split=0.05,
              verbose=0):
        train_result = self.model.fit([i_w, i_r, t_w, t_r], [t_w_c, t_r_c],
                                      batch_size, epochs, validation_split,
                                      verbose)
        return train_result

    def test(self,
             i_w,
             i_r,
             t_w,
             t_r,
             t_w_c,
             t_r_c,
             batch_size=256,
             verbose=0):
        test_result = self.model.evaluate([i_w, i_r, t_w, t_r], [t_w_c, t_r_c],
                                          batch_size, verbose)
        return test_result

    def train_on_batch(self, i_w, i_r, t_w, t_r, t_w_c, t_r_c):
        train_result = self.model.train_on_batch([i_w, i_r, t_w, t_r],
                                                 [t_w_c, t_r_c])
        return train_result

    def test_on_batch(self,
                      i_w,
                      i_r,
                      t_w,
                      t_r,
                      t_w_c,
                      t_r_c,
                      sample_weight=None):
        test_result = self.model.test_on_batch([i_w, i_r, t_w, t_r],
                                               [t_w_c, t_r_c], sample_weight)
        return test_result

    def predict(self, i_w, i_r, t_w, t_r, batch_size=1, verbose=0):
        """ Return the output from softmax layer. """
        predict_result = self.model.predict([i_w, i_r, t_w, t_r], batch_size,
                                            verbose)
        return predict_result

    def predict_word(self, i_w, i_r, t_w, t_r, batch_size=1, verbose=0):
        """ Return predicted target word from prediction. """
        predict_result = self.predict(i_w, i_r, t_w, t_r, batch_size, verbose)
        return np.argmax(predict_result[0], axis=1)

    def predict_role(self, i_w, i_r, t_w, t_r, batch_size=1, verbose=0):
        """ Return predicted target role from prediction. """
        predict_result = self.predict(i_w, i_r, t_w, t_r, batch_size, verbose)
        return np.argmax(predict_result[1], axis=1)

    def p_words(self, i_w, i_r, t_w, t_r, batch_size=1, verbose=0):
        """ Return the output scores given target words. """
        predict_result = self.predict(i_w, i_r, t_w, t_r, batch_size, verbose)
        return predict_result[0][range(batch_size), list(t_w)]

    def p_roles(self, i_w, i_r, t_w, t_r, batch_size=1, verbose=0):
        """ Return the output scores given target roles. """
        predict_result = self.predict(i_w, i_r, t_w, t_r, batch_size, verbose)
        return predict_result[1][range(batch_size), list(t_r)]

    def top_words(self, i_w, i_r, t_w, t_r, topN=20, batch_size=1, verbose=0):
        """ Return top N target words given context. """
        predict_result = self.predict(i_w, i_r, t_w, t_r, batch_size,
                                      verbose)[0]
        rank_list = np.argsort(predict_result, axis=1)[0]
        return rank_list[-topN:][::-1]
        # return [r[-topN:][::-1] for r in rank_list]

    # TODO
    def list_top_words(self, i_w, i_r, t_r, topN=20, batch_size=1, verbose=0):
        """ Return a list of decoded top N target words.
            (Only for reference, can be removed.)
        """
        top_words_lists = self.top_words(i_w, i_r, t_r, topN, batch_size,
                                         verbose)
        print(
            type(top_words_lists))  # Updated to python3 syntax (team1-change)
        result = []
        for i in range(batch_size):
            top_words_list = top_words_lists[i]
            result.append([self.word_decoder[w] for w in top_words_list])
        return result

    def summary(self):
        self.model.summary()