def __init__(self, args): self.input_shape = 28 self.num_classes = 2 self.latent_dim = 100 optimizer = Adam(0.0002, 0.5) # Build and compile the discriminator self.discriminator = self.build_discriminator() self.discriminator.compile( loss=['binary_crossentropy', 'categorical_crossentropy'], loss_weights=[0.5, 0.5], optimizer=optimizer, metrics=['accuracy']) # Build the generator self.generator = self.build_generator() # The generator takes noise as input and generates imgs noise = Input(shape=(64, )) img = self.generator(noise) # For the combined model we will only train the generator self.discriminator.trainable = False # The valid takes generated images as input and determines validity valid, _ = self.discriminator(img) # The combined model (stacked generator and discriminator) # Trains generator to fool discriminator self.combined = Model(noise, valid) self.combined.compile(loss=['binary_crossentropy'], optimizer=optimizer)
def __init__(self,Name,LearningRate = 0.0001,img_shape,latent_dim): self.Name = Name self.LearningRate = LearningRate self.img_shape = img_shape self.latent_dim = latent_dims self.trained = 0 #self.source_img = tf.Variable() #self.target_img = tf.Variable() self.g_optimizer = Adam(learning_rate=self.LearningRate) self.d_optimizer = Adam(learning_rate=self.LearningRate) self.optimizer = Adam(learning_rate = self.LearningRate) # compile the IR Disciminator model - Target self.IR_discriminator = self.discriminator_network() self.IR_discriminator.compile(optimizer=self.optimizer,loss='binary_crossentropy',metric=['accuracy']) # compile the IR Generator model - Target self.IR_f_net = self.feature_extractor_network() self.IR_g_net = self.generator_network()(self.IR_f_net) self.IR_generator = Model(inputs = in_image_IR, outputs = self.IR_g_net) self.IR_generator.compile(optimizer=self.optimizer,loss='binary_crossentropy',metric=['accuracy']) # compile the Visual Disciminator model - Source self.Visual_discriminator = self.discriminator_network() self.Visual_discriminator.compile(optimizer=self.optimizer,loss='binary_crossentropy',metric=['accuracy']) # compile the Visual Generator model - Visual self.Visual_f_net = self.feature_extractor_network() self.Visual_g_net = self.generator_network()(self.Visual_f_net) self.Visual_generator = Model(inputs = in_image_Vis, outputs = self.Visual_g_net) self.Visual_generator.compile(optimizer=self.optimizer,loss='binary_crossentropy',metric=['accuracy'])
def feature_extractor_network(self): # input in_image = Input(shape = in_shape) # C1 Layer nett = Conv2D(32,(5,5))(in_image) nett = BatchNormalization()(nett) nett = LeakyReLU(alpha = 0.2)(nett) # M2 Layer nett = MaxPooling2D(pool_size = (3,3))(nett) # C3 Layer nett = Conv2D(64,(3,3)) nett = BatchNormalization(pool_size = (3,3))(nett) nett = LeakyReLU(alpha = 0.2)(nett) # L4 Layer nett = LocallyConnected2D(128,(3,3))(nett) # L5 Layer nett = LocallyConnected2D(256,(3,3))(nett) # F6 Layer nett = Dense(512,activation='relu')(nett) nett = Dropout(0.2)(nett) # F7 Layer out_features = Dense(activation='tanh')(nett) # output model = Model(inputs = in_image, outputs = out_features) return model
def build_discriminator(self): model = Sequential() model.add(Dense(78, activation="relu", input_dim=self.input_shape)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(56, activation="relu")) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(32, activation="relu")) model.add(Dropout(rate=0.3)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(28, activation="relu")) model.add(Dropuout(rate=0.3)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(10, activation="relu")) model.summary() img = Input(shape=self.img_shape) features = model(img) valid = Dense(1, activation="sigmoid")(features) label = Dense(self.num_classes + 1, activation="softmax")(features) return Model(img, [valid, label])
def build_model(self): gyr_x, gyr_y, gyr_z, lacc_x, lacc_y, lacc_z, mag_x, mag_y, mag_z = self.input_layer() gyr_x_cnn, gyr_y_cnn, gyr_z_cnn, lacc_x_cnn, lacc_y_cnn, lacc_z_cnn, mag_x_cnn, mag_y_cnn, mag_z_cnn = self.residual_layer( gyr_x, gyr_y, gyr_z, lacc_x, lacc_y, lacc_z, mag_x, mag_y, mag_z) all_resnet = self.cnn_layer(gyr_x_cnn, gyr_y_cnn, gyr_z_cnn, lacc_x_cnn, lacc_y_cnn, lacc_z_cnn, mag_x_cnn, mag_y_cnn, mag_z_cnn) lstm = self.lstm_layer(all_resnet) lstm = self.attention_layer(lstm) output = self.mlp_layer(lstm) model = Model(inputs=[ gyr_x, gyr_y, gyr_z, lacc_x, lacc_y, lacc_z, mag_x, mag_y, mag_z ], outputs=[output]) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) return model
def main1(): # Load the data train_data, train_label, validation_data, validation_label, test_data, test_label = data_preparation_moe( ) num_features = train_data.shape[1] print('Training data shape = {}'.format(train_data.shape)) print('Validation data shape = {}'.format(validation_data.shape)) print('Test data shape = {}'.format(test_data.shape)) #print('Training laebl shape = {}'.format(len(train_label))) # Set up the input layer input_layer = Input(shape=(num_features, )) # Set up MMoE layer mmoe_layers = MMoE(units=16, num_experts=8, num_tasks=2)(input_layer) output_layers = [] output_info = ['y0', 'y1'] # Build tower layer from MMoE layer for index, task_layer in enumerate(mmoe_layers): tower_layer = Dense(units=8, activation='relu', kernel_initializer=VarianceScaling())(task_layer) output_layer = Dense(units=1, name=output_info[index], activation='linear', kernel_initializer=VarianceScaling())(tower_layer) output_layers.append(output_layer) # Compile model model = Model(inputs=[input_layer], outputs=output_layers) learning_rates = [1e-4, 1e-3, 1e-2] adam_optimizer = Adam(lr=learning_rates[0]) model.compile(loss={ 'y0': 'mean_squared_error', 'y1': 'mean_squared_error' }, optimizer=adam_optimizer, metrics=[metrics.mae]) # Print out model architecture summary model.summary() # Train the model model.fit(x=train_data, y=train_label, validation_data=(validation_data, validation_label), epochs=100) return model
def create_autoencoder(input_dim, encoding_dim): """ Args: input_dim: dimension of one-hot encoded categorical features encoding_dim: dimension of encoded data(hidden layer representation) Return: model """ one_hot_in = Input(shape=(input_dim, ), name='input', sparse=True) X = Dense(HIDDEN_UNITS, activation='selu')(one_hot_in) encoding = Dense(encoding_dim, activation='selu', name='enco')(X) X = Dense(HIDDEN_UNITS, activation='selu')(encoding) output = Dense(input_dim, activation='sigmoid')(X) model = Model(inputs=one_hot_in, outputs=output) return model
def build_generator(self): model = Sequential() model.add(Dense(78, activation="relu", input_dim=self.latent_dim)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(56, activation="relu")) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(32, activation="relu")) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(28, activation="tanh")) model.summary() noise = Input(shape=(self.latent_dim, )) img = model(noise) return Model(noise, img)
def generator_network(self): # input in_latents = Input(shape = (self.latent_dim,)) #DC1 nett = Conv2DTranspose(512,(3,3))(in_latents) nett = BatchNormalization()(nett) nett = LeakyReLU(alpha = 0.2)(nett) #DC2 nett = Conv2DTranspose(128,(3,3))(nett) nett = BatchNormalization()(nett) nett = LeakyReLU(alpha = 0.2)(nett) #DC3 nett = Conv2DTranspose(64,(3,3)) nett = BatchNormalization()(nett) nett = LeakyReLU(alpha = 0.2)(nett) #DC4 nett = Conv2DTranspose(32,(5,5))(nett) nett = BatchNormalization()(nett) out_image = Dense(alpha = 0.2)(nett) #output model = Model(inputs = in_latents, outputs = out_image) return model
def discriminator_network(self): # input in_image = Input(shape=self.img_shape) # C1 layer nett = Conv2D(64,(5,5))(in_image) nett = BatchNormalization()(nett) nett = LeakyReLU(alpha = 0.2)(nett) # C2 layer nett = Conv2D(128,(5,5))(nett) nett = BatchNormalization()(nett) nett = LeakyReLU(alpha = 0.2)(nett) nett = Dropout(0.2)(nett) # C3 layer nett = Conv2D(256,(5,5))(nett) nett = BatchNormalization()(nett) nett = LeakyReLU(alpha = 0.2)(nett) nett = Dropout(0.2)(nett) # F4 layer nett = Flatten()(nett) validity = Dense(1,alpha = 0.2)(nett) #output model = Model(inputs = in_image, outputs = validity) return model
def compiled_tcn(num_feat, # type: int num_classes, # type: int nb_filters, # type: int kernel_size, # type: int dilations, # type: List[int] nb_stacks, # type: int max_len, # type: int padding='causal', # type: str use_skip_connections=True, # type: bool return_sequences=True, regression=False, # type: bool dropout_rate=0.05, # type: float name='tcn', # type: str, opt='adam', lr=0.002): # type: (...) -> keras.Model """Creates a compiled TCN model for a given task (i.e. regression or classification). Classification uses a sparse categorical loss. Please input class ids and not one-hot encodings. Args: num_feat: The number of features of your input, i.e. the last dimension of: (batch_size, timesteps, input_dim). num_classes: The size of the final dense layer, how many classes we are predicting. nb_filters: The number of filters to use in the convolutional layers. kernel_size: The size of the kernel to use in each convolutional layer. dilations: The list of the dilations. Example is: [1, 2, 4, 8, 16, 32, 64]. nb_stacks : The number of stacks of residual blocks to use. max_len: The maximum sequence length, use None if the sequence length is dynamic. padding: The padding to use in the convolutional layers. use_skip_connections: Boolean. If we want to add skip connections from input to each residual block. return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence. regression: Whether the output should be continuous or discrete. use_separable_convolutions: whether to use these instead of normal conv layers for optimizing parameter count dropout_rate: Float between 0 and 1. Fraction of the input units to drop. name: Name of the model. Useful when having multiple TCN. opt: Optimizer name. lr: Learning rate. Returns: A compiled keras TCN. """ dilations = process_dilations(dilations) input_layer = Input(shape=(max_len, num_feat)) x = TCN(nb_filters, kernel_size, nb_stacks, dilations, padding, use_skip_connections, dropout_rate, return_sequences, name)(input_layer) print('x.shape=', x.shape) def get_opt(): return tf.train.AdamOptimizer(learning_rate=1e-3, ) if not regression: # classification x = Dense(num_classes)(x) x = Activation('softmax')(x) output_layer = x model = Model(input_layer, output_layer) model.compile(get_opt(), loss='sparse_categorical_crossentropy', metrics=[accuracy]) else: # regression x = Dense(1)(x) x = Activation('linear')(x) output_layer = x model = Model(input_layer, output_layer) model.compile(get_opt(), loss='mean_squared_error') print(f'model.x = {input_layer.shape}') print(f'model.y = {output_layer.shape}') return model
def createModel(patchSize, numClasses, usingClassification=False): if K.image_data_format() == 'channels_last': bn_axis = -1 else: bn_axis = 1 input_tensor = Input(shape=(patchSize[0], patchSize[1], patchSize[2], 1)) # first stage x = Conv3D(filters=16, kernel_size=(5, 5, 5), strides=(1, 1, 1), padding='same', kernel_initializer='he_normal')(input_tensor) x = BatchNormalization(axis=bn_axis)(x) x_after_stage_1 = LeakyReLU(alpha=0.01)(x) #x_after_stage_1 = Add()([input_tensor, x]) # first down convolution x_down_conv_1 = projection_block_3D(x_after_stage_1, filters=(32, 32), kernel_size=(2, 2, 2), stage=1, block=1, se_enabled=True, se_ratio=4) # second stage x = identity_block_3D(x_down_conv_1, filters=(32, 32), kernel_size=(3, 3, 3), stage=2, block=1, se_enabled=True, se_ratio=4) x_after_stage_2 = identity_block_3D(x, filters=(32, 32), kernel_size=(3, 3, 3), stage=2, block=2, se_enabled=True, se_ratio=4) # second down convolution x_down_conv_2 = projection_block_3D(x_after_stage_2, filters=(64, 64), kernel_size=(2, 2, 2), stage=2, block=3, se_enabled=True, se_ratio=8) # third stage x = identity_block_3D(x_down_conv_2, filters=(64, 64), kernel_size=(3, 3, 3), stage=3, block=1, se_enabled=True, se_ratio=8) x_after_stage_3 = identity_block_3D(x, filters=(64, 64), kernel_size=(3, 3, 3), stage=3, block=2, se_enabled=True, se_ratio=8) #x = identity_block_3D(x, filters=(64, 64), kernel_size=(3, 3, 3), stage=3, block=3, se_enabled=False, se_ratio=16) # third down convolution x_down_conv_3 = projection_block_3D(x_after_stage_3, filters=(128, 128), kernel_size=(2, 2, 2), stage=3, block=4, se_enabled=True, se_ratio=16) # fourth stage x = identity_block_3D(x_down_conv_3, filters=(128, 128), kernel_size=(3, 3, 3), stage=4, block=1, se_enabled=True, se_ratio=16) x_after_stage_4 = identity_block_3D(x, filters=(128, 128), kernel_size=(3, 3, 3), stage=4, block=2, se_enabled=True, se_ratio=16) #x = identity_block_3D(x, filters=(128, 128), kernel_size=(3, 3, 3), stage=4, block=3, se_enabled=False, se_ratio=16) ### end of encoder path if usingClassification: # use x_after_stage_4 as quantification output # global average pooling x_class = GlobalAveragePooling3D( data_format=K.image_data_format())(x_after_stage_4) # fully-connected layer classification_output = Dense(units=numClasses, activation='softmax', kernel_initializer='he_normal', name='classification_output')(x_class) ### decoder path # first 3D upsampling x = UpSampling3D(size=(2, 2, 2), data_format=K.image_data_format())(x_after_stage_4) x = Conv3D(filters=64, kernel_size=(3, 3, 3), strides=(1, 1, 1), padding='same', kernel_initializer='he_normal')(x) x = BatchNormalization(axis=bn_axis)(x) x = LeakyReLU(alpha=0.01)(x) x = concatenate([x, x_after_stage_3], axis=bn_axis) # first decoder stage x = identity_block_3D(x, filters=(128, 128), kernel_size=(3, 3, 3), stage=6, block=1, se_enabled=True, se_ratio=16) x = identity_block_3D(x, filters=(128, 128), kernel_size=(3, 3, 3), stage=6, block=2, se_enabled=True, se_ratio=16) # second 3D upsampling x = UpSampling3D(size=(2, 2, 2), data_format=K.image_data_format())(x) x = Conv3D(filters=32, kernel_size=(3, 3, 3), strides=(1, 1, 1), padding='same', kernel_initializer='he_normal')(x) x = BatchNormalization(axis=bn_axis)(x) x = LeakyReLU(alpha=0.01)(x) x = concatenate([x, x_after_stage_2], axis=bn_axis) # second decoder stage x = identity_block_3D(x, filters=(64, 64), kernel_size=(3, 3, 3), stage=7, block=1, se_enabled=True, se_ratio=8) x = identity_block_3D(x, filters=(64, 64), kernel_size=(3, 3, 3), stage=7, block=2, se_enabled=True, se_ratio=8) # third 3D upsampling x = UpSampling3D(size=(2, 2, 2), data_format=K.image_data_format())(x) x = Conv3D(filters=16, kernel_size=(3, 3, 3), strides=(1, 1, 1), padding='same', kernel_initializer='he_normal')(x) x = BatchNormalization(axis=bn_axis)(x) x = LeakyReLU(alpha=0.01)(x) x = concatenate([x, x_after_stage_1], axis=bn_axis) # third decoder stage x = identity_block_3D(x, filters=(32, 32), kernel_size=(3, 3, 3), stage=9, block=1, se_enabled=True, se_ratio=4) #x = identity_block_3D(x, filters=(32, 32), kernel_size=(3, 3, 3), stage=9, block=2, se_enabled=True, se_ratio=4) ### End of decoder ### last segmentation segments # 1x1x1-Conv3 produces 2 featuremaps for probabilistic segmentations of the foreground and background x = Conv3D(filters=2, kernel_size=(1, 1, 1), strides=(1, 1, 1), padding='same', kernel_initializer='he_normal', name='conv_veryEnd')(x) #x = BatchNormalization(axis=bn_axis)(x) # warum leakyrelu vor softmax? #x = LeakyReLU(alpha=0.01)(x) segmentation_output = Softmax(axis=bn_axis, name='segmentation_output')(x) #segmentation_output = keras.layers.activations.sigmoid(x) # create model if usingClassification: cnn = Model(inputs=[input_tensor], outputs=[segmentation_output, classification_output], name='3D-VResFCN-Classification') sModelName = cnn.name else: cnn = Model(inputs=[input_tensor], outputs=[segmentation_output], name='3D-VResFCN') sModelName = cnn.name return cnn, sModelName
def perceptual_loss(y_true, y_pred): vgg = VGG16(include=False, weights="imagenet", input_shape=image_shape) loss_model = Model(inputs=vgg.input, outputs=vgg.get_layer("block3_conv3").output) loss_model.trainable = False return tf.reduce_mean(tf.square(loss_model(y_true), loss_model(y_pred)))
encoder_embedding = Embedding(vocab_size, 200, mask_zero=True)(encoder_inputs) #参考链接:嵌入层 Embedding<https://keras.io/zh/layers/embeddings/#embedding> encoder_outputs, state_h, state_c = tf.keras.layers.LSTM( 200, return_state=True)(encoder_embedding) #参考链接:https://keras.io/zh/layers/recurrent/#lstm encoder_states = [state_h, state_c] decoder_inputs = Input(shape=(None, )) decoder_embedding = Embedding(vocab_size, 200, mask_zero=True)(decoder_inputs) decoder_lstm = LSTM(200, return_state=True, return_sequences=True) decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states) decoder_dense = Dense(vocab_size, activation=tf.keras.activations.softmax) output = decoder_dense(decoder_outputs) model = Model([encoder_inputs, decoder_inputs], output) model.compile(optimizer=optimizers.RMSprop(), loss='categorical_crossentropy', metrics=['accuracy']) #参考链接:RMSprop<https://keras.io/zh/optimizers/#rmsprop> #categorical_crossentropy<https://keras.io/zh/backend/#categorical_crossentropy> model.summary() # 模型训练以及保存 model.fit([encoder_input_data, decoder_input_data], decoder_output_data, batch_size=50, epochs=150) model.save('model.h5')
class SGAN: def __init__(self, args): self.input_shape = 28 self.num_classes = 2 self.latent_dim = 100 optimizer = Adam(0.0002, 0.5) # Build and compile the discriminator self.discriminator = self.build_discriminator() self.discriminator.compile( loss=['binary_crossentropy', 'categorical_crossentropy'], loss_weights=[0.5, 0.5], optimizer=optimizer, metrics=['accuracy']) # Build the generator self.generator = self.build_generator() # The generator takes noise as input and generates imgs noise = Input(shape=(64, )) img = self.generator(noise) # For the combined model we will only train the generator self.discriminator.trainable = False # The valid takes generated images as input and determines validity valid, _ = self.discriminator(img) # The combined model (stacked generator and discriminator) # Trains generator to fool discriminator self.combined = Model(noise, valid) self.combined.compile(loss=['binary_crossentropy'], optimizer=optimizer) def build_generator(self): model = Sequential() model.add(Dense(78, activation="relu", input_dim=self.latent_dim)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(56, activation="relu")) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(32, activation="relu")) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(28, activation="tanh")) model.summary() noise = Input(shape=(self.latent_dim, )) img = model(noise) return Model(noise, img) def build_discriminator(self): model = Sequential() model.add(Dense(78, activation="relu", input_dim=self.input_shape)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(56, activation="relu")) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(32, activation="relu")) model.add(Dropout(rate=0.3)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(28, activation="relu")) model.add(Dropuout(rate=0.3)) model.add(BatchNormalization(momentum=0.8)) model.add(Dense(10, activation="relu")) model.summary() img = Input(shape=self.img_shape) features = model(img) valid = Dense(1, activation="sigmoid")(features) label = Dense(self.num_classes + 1, activation="softmax")(features) return Model(img, [valid, label]) def train(self, epochs, batch_size=100, sample_interval=50): # Load the dataset data = data_load("credit_fraud_sampled.csv") x_train, y_train = data[:2] x_val, y_val = data[2:4] x_test, y_test = data[4:] # Class weights: # To balance the difference in occurences of digit class labels. # 50% of labels that the discriminator trains on are 'fake'. # Weight = 1 / frequency half_batch = batch_size // 2 cw1 = {0: 1, 1: 1} cw2 = { i: self.num_classes / half_batch for i in range(self.num_classes) } cw2[self.num_classes] = 1 / half_batch # Adversarial ground truths valid = np.ones((batch_size, 1)) fake = np.zeros((batch_size, 1)) for epoch in range(epochs): # --------------------- # Train Discriminator # --------------------- # Select a random batch of images idx = np.random.randint(0, X_train.shape[0], batch_size) imgs = X_train[idx] # Sample noise and generate a batch of new images noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) gen_imgs = self.generator.predict(noise) # One-hot encoding of labels labels = to_categorical(y_train[idx], num_classes=self.num_classes + 1) fake_labels = to_categorical(np.full((batch_size, 1), self.num_classes), num_classes=self.num_classes + 1) # Train the discriminator d_loss_real = self.discriminator.train_on_batch( imgs, [valid, labels], class_weight=[cw1, cw2]) d_loss_fake = self.discriminator.train_on_batch( gen_imgs, [fake, fake_labels], class_weight=[cw1, cw2]) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # --------------------- # Train Generator # --------------------- g_loss = self.combined.train_on_batch(noise, valid, class_weight=[cw1, cw2]) # Plot the progress if epoch % 100 == 0: print( "%d [D loss: %f, acc: %.2f%%, op_acc: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100 * d_loss[3], 100 * d_loss[4], g_loss)) # If at save interval => save generated image samples if epoch % sample_interval == 0: self.sample_images(epoch) def sample_images(self, epoch): r, c = 5, 5 noise = np.random.normal(0, 1, (r * c, self.latent_dim)) gen_imgs = self.generator.predict(noise) # Rescale images 0 - 1 gen_imgs = 0.5 * gen_imgs + 0.5 fig, axs = plt.subplots(r, c) cnt = 0 for i in range(r): for j in range(c): axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') axs[i, j].axis('off') cnt += 1 fig.savefig("images/mnist_%d.png" % epoch) plt.close() def save_model(self): def save(model, model_name): model_path = "saved_model/%s.json" % model_name weights_path = "saved_model/%s_weights.hdf5" % model_name options = {"file_arch": model_path, "file_weight": weights_path} json_string = model.to_json() open(options['file_arch'], 'w').write(json_string) model.save_weights(options['file_weight']) save(self.generator, "mnist_sgan_generator") save(self.discriminator, "mnist_sgan_discriminator") save(self.combined, "mnist_sgan_adversarial")
def __init__(self, n_word_vocab=50001, n_role_vocab=7, n_factors_emb=256, n_factors_cls=512, n_hidden=256, word_vocabulary={}, role_vocabulary={}, unk_word_id=50000, unk_role_id=7, missing_word_id=50001, using_dropout=False, dropout_rate=0.3, optimizer='adagrad', loss='sparse_categorical_crossentropy', metrics=['accuracy']): super(NNRF, self).__init__(n_word_vocab, n_role_vocab, n_factors_emb, n_hidden, word_vocabulary, role_vocabulary, unk_word_id, unk_role_id, missing_word_id, using_dropout, dropout_rate, optimizer, loss, metrics) # minus 1 here because one of the role is target role self.input_length = n_role_vocab - 1 # each input is a fixed window of frame set, each word correspond to one role input_words = Input( shape=(self.input_length, ), dtype=tf.uint32, name='input_words') # Switched dtype to tf specific (team1-change) input_roles = Input( shape=(self.input_length, ), dtype=tf.uint32, name='input_roles') # Switched dtype to tf specific (team1-change) target_role = Input( shape=(1, ), dtype=tf.uint32, name='target_role') # Switched dtype to tf specific (team1-change) # role based embedding layer embedding_layer = role_based_word_embedding( input_words, input_roles, n_word_vocab, n_role_vocab, glorot_uniform(), missing_word_id, self.input_length, n_factors_emb, True, using_dropout, dropout_rate) # sum on input_length direction; # obtaining context embedding layer, shape is (batch_size, n_factors_emb) event_embedding = Lambda( lambda x: K.sum(x, axis=1), name='event_embedding', output_shape=(n_factors_emb, ))(embedding_layer) # fully connected layer, output shape is (batch_size, input_length, n_hidden) hidden = Dense(n_hidden, activation='linear', input_shape=(n_factors_emb, ), name='projected_event_embedding')(event_embedding) # non-linear layer, using 1 to initialize non_linearity = PReLU(alpha_initializer='ones', name='context_embedding')(hidden) # hidden layer hidden_layer2 = target_word_hidden(non_linearity, target_role, n_word_vocab, n_role_vocab, glorot_uniform(), n_factors_cls, n_hidden, using_dropout=using_dropout, dropout_rate=dropout_rate) # softmax output layer output_layer = Dense(n_word_vocab, activation='softmax', input_shape=(n_factors_cls, ), name='softmax_word_output')(hidden_layer2) self.model = Model(inputs=[input_words, input_roles, target_role], outputs=[output_layer]) self.model.compile(optimizer, loss, metrics)
class NNRF(GenericModel): """Non-incremental model role-filler """ def __init__(self, n_word_vocab=50001, n_role_vocab=7, n_factors_emb=256, n_factors_cls=512, n_hidden=256, word_vocabulary={}, role_vocabulary={}, unk_word_id=50000, unk_role_id=7, missing_word_id=50001, using_dropout=False, dropout_rate=0.3, optimizer='adagrad', loss='sparse_categorical_crossentropy', metrics=['accuracy']): super(NNRF, self).__init__(n_word_vocab, n_role_vocab, n_factors_emb, n_hidden, word_vocabulary, role_vocabulary, unk_word_id, unk_role_id, missing_word_id, using_dropout, dropout_rate, optimizer, loss, metrics) # minus 1 here because one of the role is target role self.input_length = n_role_vocab - 1 # each input is a fixed window of frame set, each word correspond to one role input_words = Input( shape=(self.input_length, ), dtype=tf.uint32, name='input_words') # Switched dtype to tf specific (team1-change) input_roles = Input( shape=(self.input_length, ), dtype=tf.uint32, name='input_roles') # Switched dtype to tf specific (team1-change) target_role = Input( shape=(1, ), dtype=tf.uint32, name='target_role') # Switched dtype to tf specific (team1-change) # role based embedding layer embedding_layer = role_based_word_embedding( input_words, input_roles, n_word_vocab, n_role_vocab, glorot_uniform(), missing_word_id, self.input_length, n_factors_emb, True, using_dropout, dropout_rate) # sum on input_length direction; # obtaining context embedding layer, shape is (batch_size, n_factors_emb) event_embedding = Lambda( lambda x: K.sum(x, axis=1), name='event_embedding', output_shape=(n_factors_emb, ))(embedding_layer) # fully connected layer, output shape is (batch_size, input_length, n_hidden) hidden = Dense(n_hidden, activation='linear', input_shape=(n_factors_emb, ), name='projected_event_embedding')(event_embedding) # non-linear layer, using 1 to initialize non_linearity = PReLU(alpha_initializer='ones', name='context_embedding')(hidden) # hidden layer hidden_layer2 = target_word_hidden(non_linearity, target_role, n_word_vocab, n_role_vocab, glorot_uniform(), n_factors_cls, n_hidden, using_dropout=using_dropout, dropout_rate=dropout_rate) # softmax output layer output_layer = Dense(n_word_vocab, activation='softmax', input_shape=(n_factors_cls, ), name='softmax_word_output')(hidden_layer2) self.model = Model(inputs=[input_words, input_roles, target_role], outputs=[output_layer]) self.model.compile(optimizer, loss, metrics) def set_0_bias(self): """ This function is used as a hack that set output bias to 0. According to Ottokar's advice in the paper, during the *evaluation*, the output bias needs to be 0 in order to replicate the best performance reported in the paper. """ word_output_weights = self.model.get_layer( "softmax_word_output").get_weights() word_output_kernel = word_output_weights[0] word_output_bias = np.zeros(self.n_word_vocab) self.model.get_layer("softmax_word_output").set_weights( [word_output_kernel, word_output_bias]) return word_output_weights[1] def set_bias(self, bias): word_output_weights = self.model.get_layer( "softmax_word_output").get_weights() word_output_kernel = word_output_weights[0] self.model.get_layer("softmax_word_output").set_weights( [word_output_kernel, bias]) return bias # Deprecated temporarily def train(self, i_w, i_r, t_w, t_r, t_w_c, t_r_c, batch_size=256, epochs=100, validation_split=0.05, verbose=0): train_result = self.model.fit([i_w, i_r, t_r], t_w_c, batch_size, epochs, validation_split, verbose) return train_result def test(self, i_w, i_r, t_w, t_r, t_w_c, t_r_c, batch_size=256, verbose=0): test_result = self.model.evaluate([i_w, i_r, t_r], t_w_c, batch_size, verbose) return test_result def train_on_batch(self, i_w, i_r, t_w, t_r, t_w_c, t_r_c): train_result = self.model.train_on_batch([i_w, i_r, t_r], t_w_c) return train_result def test_on_batch(self, i_w, i_r, t_w, t_r, t_w_c, t_r_c, sample_weight=None): test_result = self.model.test_on_batch([i_w, i_r, t_r], t_w_c, sample_weight) return test_result def predict(self, i_w, i_r, t_r, batch_size=1, verbose=0): """ Return the output from softmax layer. """ predict_result = self.model.predict([i_w, i_r, t_r], batch_size, verbose) return predict_result def summary(self): self.model.summary() def predict_class(self, i_w, i_r, t_r, batch_size=1, verbose=0): """ Return predicted target word from prediction. """ predict_result = self.predict(i_w, i_r, t_r, batch_size, verbose) return np.argmax(predict_result, axis=1) def p_words(self, i_w, i_r, t_w, t_r, batch_size=1, verbose=0): """ Return the output scores given target words. """ predict_result = self.predict(i_w, i_r, t_r, batch_size, verbose) return predict_result[range(batch_size), list(t_w)] def top_words(self, i_w, i_r, t_r, topN=20, batch_size=1, verbose=0): """ Return top N target words given context. """ predict_result = self.predict(i_w, i_r, t_r, batch_size, verbose) rank_list = np.argsort(predict_result, axis=1) return [r[-topN:][::-1] for r in rank_list] def list_top_words(self, i_w, i_r, t_r, topN=20, batch_size=1, verbose=0): """ Return a list of decoded top N target words. (Only for reference, can be removed.) """ top_words_lists = self.top_words(i_w, i_r, t_r, topN, batch_size, verbose) print( type(top_words_lists)) # Updated to python3 syntax (team1-change) result = [] for i in range(batch_size): top_words_list = top_words_lists[i] result.append([self.word_decoder[w] for w in top_words_list]) return result
def __init__(self, n_word_vocab=50001, n_role_vocab=7, n_factors_emb=300, n_hidden=300, word_vocabulary=None, role_vocabulary=None, unk_word_id=50000, unk_role_id=7, missing_word_id=50001, using_dropout=False, dropout_rate=0.3, optimizer='adagrad', loss='sparse_categorical_crossentropy', metrics=['accuracy'], loss_weights=[1., 1.]): super(MTRFv4, self).__init__(n_word_vocab, n_role_vocab, n_factors_emb, n_hidden, word_vocabulary, role_vocabulary, unk_word_id, unk_role_id, missing_word_id, using_dropout, dropout_rate, optimizer, loss, metrics) # minus 1 here because one of the role is target role input_length = n_role_vocab - 1 n_factors_cls = n_hidden # each input is a fixed window of frame set, each word correspond to one role input_words = Input( shape=(input_length, ), dtype=tf.uint32, name='input_words') # Switched dtype to tf specific (team1-change) input_roles = Input( shape=(input_length, ), dtype=tf.uint32, name='input_roles') # Switched dtype to tf specific (team1-change) target_word = Input( shape=(1, ), dtype=tf.uint32, name='target_word') # Switched dtype to tf specific (team1-change) target_role = Input( shape=(1, ), dtype=tf.uint32, name='target_role') # Switched dtype to tf specific (team1-change) # role based embedding layer embedding_layer = factored_embedding(input_words, input_roles, n_word_vocab, n_role_vocab, glorot_uniform(), missing_word_id, input_length, n_factors_emb, n_hidden, True, using_dropout, dropout_rate) # non-linear layer, using 1 to initialize non_linearity = PReLU(alpha_initializer='ones')(embedding_layer) # mean on input_length direction; # obtaining context embedding layer, shape is (batch_size, n_hidden) context_embedding = Lambda(lambda x: K.mean(x, axis=1), name='context_embedding', output_shape=(n_hidden, ))(non_linearity) # target word hidden layer tw_hidden = target_word_hidden(context_embedding, target_role, n_word_vocab, n_role_vocab, glorot_uniform(), n_hidden, n_hidden, using_dropout=using_dropout, dropout_rate=dropout_rate) # target role hidden layer tr_hidden = target_role_hidden(context_embedding, target_word, n_word_vocab, n_role_vocab, glorot_uniform(), n_hidden, n_hidden, using_dropout=using_dropout, dropout_rate=dropout_rate) # softmax output layer target_word_output = Dense(n_word_vocab, activation='softmax', input_shape=(n_hidden, ), name='softmax_word_output')(tw_hidden) # softmax output layer target_role_output = Dense(n_role_vocab, activation='softmax', input_shape=(n_hidden, ), name='softmax_role_output')(tr_hidden) self.model = Model( inputs=[input_words, input_roles, target_word, target_role], outputs=[target_word_output, target_role_output]) self.model.compile(optimizer, loss, metrics, loss_weights)
# (10) 양방향 LSTM 모델링 작업 from tf.keras.models import Model, Sequential from tf.keras.layers import SimpleRNN, Input, Dense, LSTM from tf.keras.layers import Bidirectional, TimeDistributed # 학습 from tf.keras.callbacks import EarlyStopping # 조기종료 콜백함수 정의 xInput = Input(batch_shape=(None, right_idx3, 256)) xBiLstm = Bidirectional(LSTM(240, return_sequences=True), merge_mode='concat')(xInput) xOutput = TimeDistributed(Dense(1, activation='sigmoid'))(xBiLstm) # 각 스텝에서 cost가 전송되고, 오류가 다음 step으로 전송됨. model1 = Model(xInput, xOutput) model1.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) model1.summary() from keras.callbacks import EarlyStopping early_stopping = EarlyStopping(monitor='val_loss', patience=3) # 조기종료 콜백함수 정의 # In[24]: ########## 3gram # 교차검증 kfold from sklearn.model_selection import KFold # Accuracy, Precision, Recall, F1-Score
def eval_GS(model_name, experiment_name, eval_file_name, model=None, print_result=True, verb_baseline=False): MODEL_NAME = experiment_name eval_file = os.path.join(EVAL_PATH, eval_file_name) result_file = os.path.join(MODEL_PATH, MODEL_NAME + '_' + eval_file_name) if model: net = model else: description = model_builder.load_description(MODEL_PATH, MODEL_NAME) net = model_builder.build_model(model_name, description) net.load(MODEL_PATH, MODEL_NAME, description) sent_layer = 'context_embedding' sent_model = Model(inputs=net.model.input, outputs=net.model.get_layer(sent_layer).output) # if print_result: # sent_model.summary() n_input_length = len(net.role_vocabulary) - 1 print net.role_vocabulary scores = [] similarities = [] original_sim_f = [] similarities_f = [] lo_similarities = [] hi_similarities = [] records = [] print("Embedding: " + experiment_name) print("=" * 60) print("\n") print("sentence1\tsentence2\taverage_score\tembedding_cosine") print("-" * 60) with open(eval_file, 'r') as f, \ open(result_file, 'w') as f_out: first = True for line in f: # skip header if first: first = False continue s = line.split() sentence = " ".join(s[1:5]) score = float(s[5]) hilo = s[6].upper() # verb subject object landmark # A1 - object; A0 - subject V1, A0, A1, V2 = sentence.split() V1 = wnl.lemmatize(V1, wn.VERB) A0 = wnl.lemmatize(A0, wn.NOUN) A1 = wnl.lemmatize(A1, wn.NOUN) V2 = wnl.lemmatize(V2, wn.VERB) V1_i = net.word_vocabulary.get(V1, net.unk_word_id) A0_i = net.word_vocabulary.get(A0, net.unk_word_id) A1_i = net.word_vocabulary.get(A1, net.unk_word_id) V2_i = net.word_vocabulary.get(V2, net.unk_word_id) # if np.array([V1_i, A0_i, A1_i, V2_i]).any() == net.unk_word_id: # print 'OOV: ', A0, A1, V1, V2 V_ri = net.role_vocabulary['V'] A0_ri = net.role_vocabulary['A0'] A1_ri = net.role_vocabulary['A1'] sent1_x = dict((r, net.missing_word_id) for r in (net.role_vocabulary.values())) sent2_x = dict((r, net.missing_word_id) for r in (net.role_vocabulary.values())) sent1_x.pop(n_input_length) sent2_x.pop(n_input_length) sent1_x[V_ri] = V1_i sent2_x[V_ri] = V2_i if not verb_baseline: sent1_x[A0_ri] = A0_i sent1_x[A1_ri] = A1_i sent2_x[A0_ri] = A0_i sent2_x[A1_ri] = A1_i zeroA = np.array([0]) s1_w = np.array(sent1_x.values()).reshape((1, n_input_length)) s1_r = np.array(sent1_x.keys()).reshape((1, n_input_length)) s2_w = np.array(sent2_x.values()).reshape((1, n_input_length)) s2_r = np.array(sent2_x.keys()).reshape((1, n_input_length)) if re.search('NNRF', model_name): sent1_emb = sent_model.predict([s1_w, s1_r, zeroA]) sent2_emb = sent_model.predict([s2_w, s2_r, zeroA]) else: sent1_emb = sent_model.predict([s1_w, s1_r, zeroA, zeroA]) sent2_emb = sent_model.predict([s2_w, s2_r, zeroA, zeroA]) # Baseline #sent1_emb = V1_i #sent2_emb = V2_i # Compositional # sent1_emb = V1_i + A0_i + A1_i # sent2_emb = V2_i + A0_i + A1_i #sent1_emb = V1_i * A0_i * A1_i #sent2_emb = V2_i * A0_i * A1_i similarity = -(cosine(sent1_emb, sent2_emb) - 1.0 ) # convert distance to similarity if hilo == "HIGH": hi_similarities.append(similarity) elif hilo == "LOW": lo_similarities.append(similarity) else: raise Exception("Unknown hilo value %s" % hilo) if (V1, A0, A1, V2) not in records: records.append((V1, A0, A1, V2)) # print "\"%s %s %s\"\t\"%s %s %s\"\t%.2f\t%.2f \n" % (A0, V1, A1, A0, V2, A1, score, similarity) scores.append(score) similarities.append(similarity) f_out.write("\"%s %s %s\"\t\"%s %s %s\"\t %.2f \t %.2f \n" % (A0, V1, A1, A0, V2, A1, score, similarity)) print("-" * 60) correlation, pvalue = spearmanr(scores, similarities) if print_result: print("Total number of samples: %d" % len(scores) ) #Added paranthesis to the print statements (team1-change) print("Spearman correlation: %.4f; 2-tailed p-value: %.10f" % (correlation, pvalue) ) #Added paranthesis to the print statements (team1-change) print("High: %.2f; Low: %.2f" % (np.mean(hi_similarities), np.mean(lo_similarities)) ) #Added paranthesis to the print statements (team1-change) # import pylab # pylab.scatter(scores, similarities) # pylab.show() return correlation
class MTRFv4(GenericModel): """Multi-task non-incremental role-filler """ def __init__(self, n_word_vocab=50001, n_role_vocab=7, n_factors_emb=300, n_hidden=300, word_vocabulary=None, role_vocabulary=None, unk_word_id=50000, unk_role_id=7, missing_word_id=50001, using_dropout=False, dropout_rate=0.3, optimizer='adagrad', loss='sparse_categorical_crossentropy', metrics=['accuracy'], loss_weights=[1., 1.]): super(MTRFv4, self).__init__(n_word_vocab, n_role_vocab, n_factors_emb, n_hidden, word_vocabulary, role_vocabulary, unk_word_id, unk_role_id, missing_word_id, using_dropout, dropout_rate, optimizer, loss, metrics) # minus 1 here because one of the role is target role input_length = n_role_vocab - 1 n_factors_cls = n_hidden # each input is a fixed window of frame set, each word correspond to one role input_words = Input( shape=(input_length, ), dtype=tf.uint32, name='input_words') # Switched dtype to tf specific (team1-change) input_roles = Input( shape=(input_length, ), dtype=tf.uint32, name='input_roles') # Switched dtype to tf specific (team1-change) target_word = Input( shape=(1, ), dtype=tf.uint32, name='target_word') # Switched dtype to tf specific (team1-change) target_role = Input( shape=(1, ), dtype=tf.uint32, name='target_role') # Switched dtype to tf specific (team1-change) # role based embedding layer embedding_layer = factored_embedding(input_words, input_roles, n_word_vocab, n_role_vocab, glorot_uniform(), missing_word_id, input_length, n_factors_emb, n_hidden, True, using_dropout, dropout_rate) # non-linear layer, using 1 to initialize non_linearity = PReLU(alpha_initializer='ones')(embedding_layer) # mean on input_length direction; # obtaining context embedding layer, shape is (batch_size, n_hidden) context_embedding = Lambda(lambda x: K.mean(x, axis=1), name='context_embedding', output_shape=(n_hidden, ))(non_linearity) # target word hidden layer tw_hidden = target_word_hidden(context_embedding, target_role, n_word_vocab, n_role_vocab, glorot_uniform(), n_hidden, n_hidden, using_dropout=using_dropout, dropout_rate=dropout_rate) # target role hidden layer tr_hidden = target_role_hidden(context_embedding, target_word, n_word_vocab, n_role_vocab, glorot_uniform(), n_hidden, n_hidden, using_dropout=using_dropout, dropout_rate=dropout_rate) # softmax output layer target_word_output = Dense(n_word_vocab, activation='softmax', input_shape=(n_hidden, ), name='softmax_word_output')(tw_hidden) # softmax output layer target_role_output = Dense(n_role_vocab, activation='softmax', input_shape=(n_hidden, ), name='softmax_role_output')(tr_hidden) self.model = Model( inputs=[input_words, input_roles, target_word, target_role], outputs=[target_word_output, target_role_output]) self.model.compile(optimizer, loss, metrics, loss_weights) def set_0_bias(self): word_output_weights = self.model.get_layer( "softmax_word_output").get_weights() word_output_kernel = word_output_weights[0] word_output_bias = np.zeros(self.n_word_vocab) self.model.get_layer("softmax_word_output").set_weights( [word_output_kernel, word_output_bias]) role_output_weights = self.model.get_layer( "softmax_role_output").get_weights() role_output_kernel = role_output_weights[0] role_output_bias = np.zeros(self.n_role_vocab) self.model.get_layer("softmax_role_output").set_weights( [role_output_kernel, role_output_bias]) return word_output_weights[1], role_output_weights[1] def set_bias(self, bias): word_output_weights = self.model.get_layer( "softmax_word_output").get_weights() word_output_kernel = word_output_weights[0] self.model.get_layer("softmax_word_output").set_weights( [word_output_kernel, bias[0]]) role_output_weights = self.model.get_layer( "softmax_role_output").get_weights() role_output_kernel = role_output_weights[0] self.model.get_layer("softmax_role_output").set_weights( [role_output_kernel, bias[1]]) return bias # Train and test # Deprecated temporarily def train(self, i_w, i_r, t_w, t_r, t_w_c, t_r_c, batch_size=256, epochs=100, validation_split=0.05, verbose=0): train_result = self.model.fit([i_w, i_r, t_w, t_r], [t_w_c, t_r_c], batch_size, epochs, validation_split, verbose) return train_result def test(self, i_w, i_r, t_w, t_r, t_w_c, t_r_c, batch_size=256, verbose=0): test_result = self.model.evaluate([i_w, i_r, t_w, t_r], [t_w_c, t_r_c], batch_size, verbose) return test_result def train_on_batch(self, i_w, i_r, t_w, t_r, t_w_c, t_r_c): train_result = self.model.train_on_batch([i_w, i_r, t_w, t_r], [t_w_c, t_r_c]) return train_result def test_on_batch(self, i_w, i_r, t_w, t_r, t_w_c, t_r_c, sample_weight=None): test_result = self.model.test_on_batch([i_w, i_r, t_w, t_r], [t_w_c, t_r_c], sample_weight) return test_result def predict(self, i_w, i_r, t_w, t_r, batch_size=1, verbose=0): """ Return the output from softmax layer. """ predict_result = self.model.predict([i_w, i_r, t_w, t_r], batch_size, verbose) return predict_result def predict_word(self, i_w, i_r, t_w, t_r, batch_size=1, verbose=0): """ Return predicted target word from prediction. """ predict_result = self.predict(i_w, i_r, t_w, t_r, batch_size, verbose) return np.argmax(predict_result[0], axis=1) def predict_role(self, i_w, i_r, t_w, t_r, batch_size=1, verbose=0): """ Return predicted target role from prediction. """ predict_result = self.predict(i_w, i_r, t_w, t_r, batch_size, verbose) return np.argmax(predict_result[1], axis=1) def p_words(self, i_w, i_r, t_w, t_r, batch_size=1, verbose=0): """ Return the output scores given target words. """ predict_result = self.predict(i_w, i_r, t_w, t_r, batch_size, verbose) return predict_result[0][range(batch_size), list(t_w)] def p_roles(self, i_w, i_r, t_w, t_r, batch_size=1, verbose=0): """ Return the output scores given target roles. """ predict_result = self.predict(i_w, i_r, t_w, t_r, batch_size, verbose) return predict_result[1][range(batch_size), list(t_r)] def top_words(self, i_w, i_r, t_w, t_r, topN=20, batch_size=1, verbose=0): """ Return top N target words given context. """ predict_result = self.predict(i_w, i_r, t_w, t_r, batch_size, verbose)[0] rank_list = np.argsort(predict_result, axis=1)[0] return rank_list[-topN:][::-1] # return [r[-topN:][::-1] for r in rank_list] # TODO def list_top_words(self, i_w, i_r, t_r, topN=20, batch_size=1, verbose=0): """ Return a list of decoded top N target words. (Only for reference, can be removed.) """ top_words_lists = self.top_words(i_w, i_r, t_r, topN, batch_size, verbose) print( type(top_words_lists)) # Updated to python3 syntax (team1-change) result = [] for i in range(batch_size): top_words_list = top_words_lists[i] result.append([self.word_decoder[w] for w in top_words_list]) return result def summary(self): self.model.summary()