class JointSequentialIntentModel(IntentExtractionModel): """ Joint Intent classification and Slot tagging Model """ def __init__(self): super(JointSequentialIntentModel, self).__init__() def build(self, sentence_length, vocab_size, tag_labels, intent_labels, token_emb_size=100, tagger_hidden=100, tagger_dropout=0.5, intent_classifier_hidden=100, emb_model_path=None): """ Build the model Args: sentence_length (int): max length of a sentence vocab_size (int): vocabulary size tag_labels (int): number of tag labels intent_labels (int): number of intent labels token_emb_size (int): token embedding vectors size tagger_hidden (int): label tagger LSTM hidden size tagger_dropout (float): label tagger dropout rate intent_classifier_hidden (int): intent LSTM hidden size emb_model_path (str): external embedding model path """ tokens_input, token_emb = self._create_input_embed(sentence_length, emb_model_path is not None, token_emb_size, vocab_size) intent_enc = Bidirectional(LSTM(intent_classifier_hidden))(token_emb) intent_out = Dense(intent_labels, activation='softmax', name='intent_classifier')(intent_enc) intent_vec_rep = RepeatVector(sentence_length)(intent_out) slot_emb = Bidirectional(LSTM(tagger_hidden, return_sequences=True))(token_emb) tagger_features = concatenate([slot_emb, intent_vec_rep], axis=-1) tagger = Bidirectional( LSTM(tagger_hidden, return_sequences=True))(tagger_features) tagger = Dropout(tagger_dropout)(tagger) tagger_out = TimeDistributed( Dense(tag_labels, activation='softmax'), name='slot_tag_classifier')(tagger) self.model = Model(inputs=tokens_input, outputs=[ intent_out, tagger_out]) self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy', loss_weights=[1., 1.], metrics=['categorical_accuracy'])
def get_model(): '''stack ensemble by NN model ''' input_layer = Input(shape=get_ensemble_inputShape()) layer = Dense(units=128, activation='relu')(input_layer) layer = BatchNormalization()(layer) layer = Dropout(0.3)(layer) output_layer = Dense(6, activation='sigmoid')(layer) model = Model(inputs=input_layer, outputs=output_layer) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['acc']) return model
numerator = tf.reduce_sum(onehots_true * probabilities, axis=0) denominator = tf.reduce_sum(onehots_true + probabilities, axis=0) loss = 1.0 - 2.0 * (numerator + 1) / (denominator + 1) return loss def main() with open(args.config, 'r') as f: yam = yaml.load(f) img_path = yam['img_path'] mask_path = yam['mask_path'] epochs = yam['epochs'] image_size = yam['image_size'] start_neurons = yam['start_neurons'] batch_size = yam['batch_size'] get_custom_objects().update({'lrelu': Activation(tf.keras.layers.LeakyReLU(alpha=0.3))}) train_generator = directory_to_generator(img_path , mask_path , image_size) steps_per_epoch = int( np.ceil(train_generator.shape[0] / batch_size) ) input_layer = Input((image_size, image_size, 3)) output_layer = build_model(input_layer, start_neurons) model = Model(input_layer, output_layer) model.compile(loss = dice_loss, optimizer='adam', metrics=["accuracy"]) model.fit(train_generator , epochs = epochs , steps_per_epoch = steps_per_epoch , batch_size = batch_size) if __name__ == "__main__": main()
def ctc_lambda_func( args ): y_pred, labels, label_lengths = args y_pred_len = [ [y_pred.shape[1] ] ] * batchSize # y_pred = y_pred[:, 2:, :] return K.ctc_batch_cost( labels, K.softmax( y_pred ), y_pred_len, label_lengths ) labels = Input(name='the_labels', shape=[ labelWidth ], dtype='int32') images = Input(name='the_images', shape=[ targetH, targetW, 1 ], dtype='float32') label_lengths = Input(name='label_lengths', shape=[1], dtype='int32') y_pred = backBone( images ) loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')( [ y_pred, labels, label_lengths ]) fullModel = Model( inputs=[ images, labels, label_lengths ], outputs=loss_out ) # plot_model(fullModel, to_file='model2.png', show_shapes=True) fullModel.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=optimizer, metrics=['accuracy']) train_loader = DataGenerator( opt.traindata, batchSize=opt.batchSize, limit=opt.traindata_limit, cache=opt.traindata_cache ) test_loader = DataGenerator( opt.valdata, batchSize=opt.batchSize, limit=opt.valdata_limit, cache=opt.valdata_cache ) # import pdb; pdb.set_trace(); # import IPython as x; x.embed() class WeightsSaver(Callback): def __init__(self): self.fname = opt.outfile + '_' + datetime.now().strftime('%d%m%Y_%H%M%S') self.i = 1 self.j = 1 self.saveInterval = 100 if 'SAVE_INTERVAL' not in environ else int( environ['SAVE_INTERVAL'])
from keras import Input from keras import layers from keras import Model input_tensor=Input(shape=(64,)) x=layers.Dense(32,activation='relu')(input_tensor) x=layers.Dense(32,activation='relu')(x) output_tensor=layers.Dense(10,activation='softmax')(x) model=Model(input_tensor,output_tensor) # modle.summary() model.compile(optimizer='rmsprop',loss='sparse_categorical_crossentropy',metrics=['acc']) import numpy as np x_train=np.random.random((1000,64)) y_train=np.random.randint(0,10,1000) model.fit(x_train,y_train,epochs=10,batch_size=128) score=model.evaluate(x_train,y_train)
def autoencode(pipe: Pipe, layer_config: List[Dict], from_file: str, store_model: str, loss: str, optimiser: str, epochs: int, batch_size: int, shuffle: bool, validation_split: float, adjust_weights: float, mode: str): """Build and train an autoencoder.""" import keras from keras import regularizers, Sequential, Input, Model from keras.callbacks import EarlyStopping, TensorBoard from keras.engine import InputLayer from keras.engine.saving import model_from_yaml, model_from_json from keras.layers import Dense from numpy.random.mtrand import seed from tensorflow import set_random_seed from lyner.keras_extras import SignalHandler seed(1) set_random_seed(2) matrix = pipe.matrix.copy() if matrix.isnull().values.any(): LOGGER.warning("Dropping rows containing nan values") matrix.dropna(how='any', inplace=True) def parse_layout(layer_conf): get_layer_type = lambda t: getattr(keras.layers, t, None) regdict = {'l1_l2': regularizers.l1_l2, 'l1': regularizers.l1, 'l2': regularizers.l2} lc = layer_conf.copy() layer_type = lc.get('type', None) if layer_type: lc['type'] = get_layer_type(layer_type) # TODO parse regularizers kernel_reg_type = lc.get('kernel_regularizer', None) if kernel_reg_type: if '(' in kernel_reg_type and ')' in kernel_reg_type: params = kernel_reg_type[kernel_reg_type.index('(') + 1:kernel_reg_type.index(')')] if '+' in params: params = params.split('+') else: params = [params] params = [float(p) for p in params] kernel_reg_type = kernel_reg_type[:kernel_reg_type.index('(')] lc['kernel_regularizer'] = regdict[kernel_reg_type](*params) return lc.pop('type'), int(lc.pop('n')), lc layout = [parse_layout(layer_conf) for layer_conf in layer_config] labels = matrix.columns.values.tolist() data = matrix.values shape = (data.shape[0],) data = data.transpose() if layout: encoding_dim = layout[-1][1] encoder = Sequential(name="encoder") encoder.add(InputLayer(shape, name="encoder_input")) for layer_num, (Layer, n_nodes, extra_args) in enumerate(layout): encoder.add(Layer(n_nodes, name=f"encoder_{layer_num}_{n_nodes}", **extra_args)) # kernel_regularizer=regularizers.l1_l2(0.001, 0.001), # kernel_regularizer=regularizers.l1(0.0001), decoder = Sequential(name="decoder") decoder.add(InputLayer((encoding_dim,), name="decoder_input")) for layer_num, (Layer, n_nodes, _) in enumerate(layout[::-1][1:]): decoder.add(Layer(n_nodes, name=f"decoder_{layer_num}_{n_nodes}")) decoder.add(Dense(shape[0], activation='linear', name="decoder_output")) input_layer = Input(shape=shape, name="autoencoder_input") encode_layer = encoder(input_layer) decode_layer = decoder(encode_layer) autoencoder = Model(input_layer, decode_layer) if store_model: if store_model.endswith('.yaml'): model_string = autoencoder.to_yaml() elif store_model.endswith('.json'): model_string = autoencoder.to_json() else: model_string = autoencoder.to_yaml() with open(store_model, 'wt') as writer: writer.write(model_string) elif from_file: with open(from_file, 'rt') as reader: model_string = '\n'.join(reader.readlines()) if from_file.endswith('.yaml'): autoencoder = model_from_yaml(model_string) elif from_file.endswith('.json'): autoencoder = model_from_json(model_string) # TODO set encoder and decoder correctly else: raise ValueError("No model specified. Use either of --layer-config or --from-file.") # from pprint import pprint # pprint(autoencoder.get_config()) autoencoder.compile(optimizer=optimiser, loss=loss, metrics=['mse'], ) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0.0000001, patience=50) sh = SignalHandler() autoencoder.fit(np.vsplit(data, 1), np.vsplit(data, 1), callbacks=[TensorBoard(log_dir='/tmp/autoencoder'), sh, early_stopping], epochs=epochs, batch_size=batch_size, validation_split=validation_split, shuffle=shuffle ) sh.uninit() class Autoencoder: def __init__(self, encoder=None, decoder=None): self._encoder = encoder self._decoder = decoder def inverse_transform(self, data): return self._decoder.predict(data).transpose() def transform(self, data): return self._encoder.predict(data).transpose() pipe.decomposition = Autoencoder(encoder, decoder) encoded_data = pipe.decomposition.transform(data) decoded_data = pipe.decomposition.inverse_transform(encoded_data.T) pre_error = ((data.T - decoded_data) ** 2).mean(axis=None) print(f"MSE: {pre_error}") pipe._index = pipe.matrix.index pipe._columns = pipe.matrix.columns if adjust_weights: quant = float(adjust_weights) for i, layer in enumerate(encoder.layers): W, b = layer.get_weights() low, median, high = np.quantile(W.flatten(), [quant, 0.5, 1 - quant]) W_low = W * (W < low) W_high = W * (W > high) selected_weights = W_low + W_high # oplot([Histogram(x=W.flatten()), Histogram(x=W[W < low].flatten()), Histogram(x=W[W > high].flatten())]) layer.set_weights([selected_weights, b]) break encoded_data = pipe.decomposition.transform(data) decoded_data = pipe.decomposition.inverse_transform(encoded_data.T) post_error = ((data.T - decoded_data) ** 2).mean(axis=None) print(f"MSE: {post_error}") if 'weights' == mode: layer = 0 layer_weights = encoder.layers[layer].get_weights() layer = encoder.layers[layer] if len(layer_weights) == 0: layer_weights = encoder.layers[0].get_weights() if len(layer_weights) >= 2: layer_weights = layer_weights[:-1] # last one is bias new_data = layer_weights[0] index = [f'Weight_{i}' for i in range(new_data.shape[0])] num_nodes = new_data.shape[1] columns = [f"{layer.name}_{i}" for i in range(num_nodes)] elif 'nodes' == mode: new_data = encoder.predict(np.vsplit(data, 1)).transpose() columns = labels index = [f"{mode}_{i}" for i in range(encoding_dim)] elif 'discard' == mode: W, b = encoder.layers[0].get_weights() W = np.sum(np.abs(W), axis=1) W[W != 0] = 1 print(f"Kept {np.sum(W)} weights") v: np.array = pipe.matrix.values new_data = (v.T * W).T columns = pipe.matrix.columns index = pipe.matrix.index else: raise ValueError(f"Unknown mode {mode}") pipe.matrix = pd.DataFrame(data=new_data, columns=columns, index=index, ) return
def main(): config = InferenceConfig() # config.display() # Create Mask Model mask_model = modellib.MaskRCNN(mode='inference', model_dir=MASK_LOGS_DIR, config=config) mask_model.load_weights(COCO_MODEL_PATH, by_name=True) # Freeze model mask_model.keras_model.trainable = False class_names = CLASS_NAMES file_names = next(os.walk(IMAGE_DIR))[2] random.shuffle(file_names) if SHOW is True: test_file_names = next(os.walk(TESTING_DIR))[2][0:1] else: test_file_names = next(os.walk(TESTING_DIR))[2][0:10] def log_auto_color(s): try: log_file = open("loss_hist_bc_lab.txt", 'a') log_file.write('{}\n'.format(s)) # print("{}: {}".format(str(datetime.now()), s)) # For debugging except: None finally: log_file.close() def report_loss(filenames=test_file_names): X_batch, Y_batch, _ = generate_training_datum(filenames, image_dir=TESTING_DIR) loss = model.evaluate(X_batch, Y_batch) print("Loss(bachnorm): {}".format(loss)) log_auto_color(loss) def colorize(filename='000000000643.jpg'): """Colorize One picture""" X_batch, Y_batch, images = generate_training_datum( [filename], image_dir=TESTING_DIR) # plt.imshow(images[0]) # plt.show() preds = model.predict(X_batch) # print("predict shape: {}".format(preds.shape)) lab = rgb2lab(images[0]) pred_image = np.zeros(lab.shape) pred_image[:, :, 0] = lab[:, :, 0] pred_image[:, :, 1:] = preds[0] * 128 # pred_image = np.concatenate((images[0], lab2rgb(pred_image)), axis=1) # Demage orignal image pred_image = lab2rgb(pred_image) # print(preds) if SHOW is True: plt.imshow(pred_image) plt.show() return pred_image def get_feature_map(images): """ Get the feature map from the trained mask_rcnn """ result = mask_model.run_graph( images, [ ('P2', mask_model.keras_model.get_layer('fpn_p2').output ), # -> shape: (2, 256, 256, 256) ('P3', mask_model.keras_model.get_layer('fpn_p3').output ), # -> shape: (2, 128, 128, 256) ('P4', mask_model.keras_model.get_layer('fpn_p4').output ), # -> shape: (2, 64, 64, 256) ('P5', mask_model.keras_model.get_layer('fpn_p5').output ), # -> shape: (2, 32, 32, 256) ]) return result def generate_training_datum(filenames, image_dir=IMAGE_DIR): images = [] grayscaled_rgbs = [] Y_batch = [] for filename in filenames: image = skimage.io.imread(os.path.join(image_dir, filename)) image, _, _, _ = utils.resize_image(image, min_dim=config.IMAGE_MAX_DIM) try: image = image[:config.IMAGE_SHAPE[0], :config. IMAGE_SHAPE[1], :] except IndexError: continue images.append(image) lab = rgb2lab(image) grayscaled_rgb = gray2rgb(rgb2gray(image)) grayscaled_rgbs.append(grayscaled_rgb) Y_batch.append(lab[:, :, 1:] / 128) feature_maps = get_feature_map(grayscaled_rgbs) # print(feature_maps['P2'].shape) # -> (batch_size, pool_size, pool_size, filter_num) grayscaled_rgbs = np.asarray(grayscaled_rgbs) Y_batch = np.asarray(Y_batch) # print(grayscaled_rgbs.shape) # -> (batch_size, height, width, channels) return feature_maps, Y_batch, images # generate_training_datum(file_names[0:2]) # ========= Building the network ========= # # Input: https://stackoverflow.com/questions/44747343/keras-input-explanation-input-shape-units-batch-size-dim-etc P5 = Input(shape=( 32, 32, 256, ), name='P5') P4 = Input(shape=( 64, 64, 256, ), name='P4') P3 = Input(shape=( 128, 128, 256, ), name='P3') P2 = Input(shape=( 256, 256, 256, ), name='P2') initer = keras.initializers.RandomUniform(minval=-0.5, maxval=0.5) # activer = 'relu' activer = 'sigmoid' # error = 'mse' error = 'mean_absolute_error' # Decode decode_p5 = KL.Conv2D(128, (3, 3), padding='same', bias_initializer=initer, activation=activer, name='decode_p5')(P5) # decode_p5 = KL.TimeDistributed(BatchNorm(axis=3), name='p5_bn')(decode_p5) # decode_p5 = KL.Activation('relu')(decode_p5) decode_p5 = UpSampling2D((2, 2))(decode_p5) decode_p4 = Conv2D(128, (1, 1), padding='same', bias_initializer=initer)(P4) decode_p4_5 = Add()([decode_p5, decode_p4]) decode_p4_5 = BatchNorm(axis=3, name='p45_bn')(decode_p4_5) decode_p4_5 = KL.Activation(activer)(decode_p4_5) decode2_p4_5 = Conv2D(64, (3, 3), activation=activer, padding='same', bias_initializer=initer)(decode_p4_5) decode2_p4_5 = UpSampling2D((2, 2))(decode2_p4_5) decode2_p3 = Conv2D(64, (3, 3), padding='same', bias_initializer=initer)(P3) decode2_p3_4_5 = Add()([decode2_p4_5, decode2_p3]) decode2_p3_4_5 = BatchNorm(axis=3, name='p345_bn')(decode2_p3_4_5) decode2_p3_4_5 = KL.Activation(activer)(decode2_p3_4_5) decode3_p345 = Conv2D(32, (3, 3), activation=activer, padding='same')(decode2_p3_4_5) decode3_p345 = UpSampling2D((2, 2))(decode3_p345) decode3_p2 = Conv2D(32, (1, 1), padding='same', bias_initializer=initer)(P2) decode3_p2345 = Add()([decode3_p345, decode3_p2]) decode3_p2345 = BatchNorm(axis=3, name='p2345_bn')(decode3_p2345) decode3_p2345 = KL.Activation(activer)(decode3_p2345) decode_out = Conv2D(16, (3, 3), activation=activer, padding='same', bias_initializer=initer)(decode3_p2345) decode_out = UpSampling2D((2, 2))(decode_out) decode_out = Conv2D(4, (3, 3), activation=activer, padding='same', bias_initializer=initer)(decode_out) decode_out = UpSampling2D((2, 2))(decode_out) decode_out = Conv2D(2, (3, 3), activation='tanh', padding='same', bias_initializer=initer)(decode_out) # build tensorboard = TensorBoard(log_dir=TB_LOG_DIR) model = Model(inputs=[P5, P4, P3, P2], outputs=decode_out) if os.path.isfile('auto_color_batch_norm_lab.h5'): print('Found weights') model.load_weights('auto_color_batch_norm_lab.h5') sgd = optimizers.SGD(lr=0.005, momentum=0.1, decay=0.0, nesterov=False) model.compile(optimizer=sgd, loss=error) # ========= Training =========== # batch_size = BATCH_SIZE for i in range(int(len(file_names) / batch_size - 1)): # for i in range(30): print('(batchnorm) Training on batch {}'.format(i)) X_batch, Y_batch, _ = generate_training_datum( file_names[i * batch_size:(i + 1) * batch_size]) model.train_on_batch(X_batch, Y_batch) if SHOW is True: colored = colorize() report_loss() # color_files = random.choice(test_file_names) if i % 10 == 0: report_loss() colored = colorize() skimage.io.imsave(os.path.join( TESTING_RESULT_DIR, '{}_test_batchnorm_lab_'.format(i) + "00000643.jpg"), arr=colored) if i % 300 == 299: model.save_weights("{}_color_batchnorm_mrcnn_lab.h5".format(i)) # ===== Store Model ===== # # Save model model_json = model.to_json() with open("batchnorm_model.json", "w") as json_file: json_file.write(model_json) model.save_weights("auto_color_batchnorm_final_lab.h5")
text_input = Input(shape=(None,), dtype='int32', name='text') embedded_text = layers.Embedding(text_vocabulary_size, 64)(text_input) encoded_text = layers.LSTM(32)(embedded_text) question_input = Input(shape=(None,), dtype='int32', name='question') embedded_question = layers.Embedding( question_vocabulary_size, 32)(question_input) encoded_question = layers.LSTM(16)(embedded_question) concatenated = layers.concatenate([encoded_text, encoded_question], axis=-1) answer = layers.Dense(answer_vocabulary_size, activation='softmax')(concatenated) model = Model([text_input, question_input], answer) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc']) # %% num_samples = 1000 max_length = 100 text = np.random.randint(1, text_vocabulary_size, size=(num_samples, max_length)) question = np.random.randint( 1, question_vocabulary_size, size=(num_samples, max_length)) answers = np.random.randint(1, answer_vocabulary_size, size=(num_samples,)) answers = keras.utils.to_categorical(answers, answer_vocabulary_size) model.fit({'text': text, 'question': question}, answers, epochs=10, batch_size=128)
time_dense = PReLU(name='time_dense_prelu')(Dense(units=5, name='time_dense')(time_bn)) inv_con = concatenate([time_dense, member_feat_dense, question_dense], name='invite_concatenate') inv_con_bn = BatchNormalization(name='inv_con_bn')(inv_con) inv_dense_1 = PReLU(name='inv_dense_1_prelu')(Dense( units=512, name='inv_dense_1')(inv_con_bn)) inv_dense_2 = PReLU()(Dense(units=128, name='inv_dense_2_prelu')(inv_dense_1)) inv_out = Dense(units=1, activation='sigmoid', name='inv_out')(inv_dense_2) model = Model(inputs=[time_input, ques_input, member_feat_input], outputs=inv_out) model.compile(optimizer='adam', loss=keras.losses.binary_crossentropy, metrics=['acc']) model.summary() keras.utils.plot_model(model, './model.png', show_shapes=True) #%% class MGene(keras.utils.Sequence): def __init__(self, batch_size: int, invite: pd.DataFrame): self.batch_size = batch_size self.invite = invite self.length = len(invite) self.true_len_rate = len( invite[invite['is_answer'] == 1]) / self.length
def transform_1D_data_to_reverse_dist( data, new_sample_ratio=False, return_same_sized_combined_dist=True, bins=30, imba_f=1.2, visualization=True): from keras import Input, Sequential, Model from keras.layers import Dense from keras.optimizers import Adam from keras.callbacks import EarlyStopping import matplotlib.pyplot as plt # instead of making rare events having the same standing as frequent events, we make rare events even more common than norm # imba factor controls the distribution of rare events > normal events # if no_of_new_samples is not specified, it attempts to calculate the number by finding the amount of new samples # required to fill up the remaining area of the uniform dist (think of it as the unfilled area of a rectangle' if new_sample_ratio == 0 and new_sample_ratio != False or imba_f == 0: return data latent_dim = 1 feature_count = len(data[0]) enc_input = Input(shape=(feature_count, )) encoder = Sequential() encoder.add(Dense(100, input_shape=(feature_count, ))) encoder.add(Dense(latent_dim)) decoder = Sequential() decoder.add(Dense(100, input_shape=(latent_dim, ))) decoder.add(Dense(feature_count)) final = Model(enc_input, decoder(encoder(enc_input))) final.compile(optimizer=Adam(lr=1e-4), loss="mean_squared_error") np.random.shuffle(data) final.fit( x=np.asarray(data), y=np.asarray(data), batch_size=int(len(data) / 10), callbacks=[EarlyStopping(monitor='loss', min_delta=0.00001)], epochs=500) latent_values = encoder.predict(data) if visualization: plt.figure('Original latent values histogram') plt.hist(latent_values, bins=bins) if bins > len(latent_values): bins = int(len(latent_values) / 2) count, ranges = np.histogram(latent_values, bins=bins) no_of_new_samples = 0 if not new_sample_ratio: no_of_new_samples = np.sum(np.max(count) - count) else: no_of_new_samples = int(len(data) * new_sample_ratio) bins_probability_table = [ np.power(x, imba_f) for x in np.rint(max(count) - count) / max(count) ] bins_probability_table /= np.max(bins_probability_table) new_latent_values = [] while (True): for i in range(len(bins_probability_table)): bin_rng = [ranges[i], ranges[i + 1]] bins_prob = bins_probability_table[i] if np.random.rand() < bins_prob: new_synth_latent = np.random.rand() * ( bin_rng[1] - bin_rng[0]) + bin_rng[0] new_latent_values.append([new_synth_latent]) if len(new_latent_values) >= no_of_new_samples: break if len(new_latent_values) >= no_of_new_samples: break # for debugging if len(new_latent_values) == 0: return data new_synth_data = decoder.predict(np.asarray(new_latent_values)) if visualization: plt.figure('New latent values histogram') plt.hist(np.asarray(new_latent_values), bins=bins) plt.figure('Combined latent values histogram') combined_latent_values = np.concatenate( (np.asarray(new_latent_values), latent_values)) plt.hist(combined_latent_values, bins=bins) plt.show() # count_, ranges_ = np.histogram(new_latent_values, bins=bins) if return_same_sized_combined_dist == True: resampled_data = np.concatenate((data, new_synth_data)) np.random.shuffle(resampled_data) resampled_data = resampled_data[:len(data)] # for debugging # debugging_latent_v = encoder.predict(resampled_data) # plt.hist(debugging_latent_v, bins=bins) # plt.show() return resampled_data return new_latent_values
def transform_1D_samples_using_DOPE( data, return_same_sized_combined_dist=True, new_sample_ratio=0.3, no_of_std=3, visualization=False): from keras import Input, Sequential, Model from keras.layers import Dense from keras.optimizers import Adam from keras.callbacks import EarlyStopping import matplotlib.pyplot as plt from scipy.stats import chi if new_sample_ratio == 0 or no_of_std == 0: return data latent_dim = 1 no_of_new_samples = int(len(data) * new_sample_ratio) feature_count = len(data[0]) enc_input = Input(shape=(feature_count, )) encoder = Sequential() encoder.add(Dense(100, input_shape=(feature_count, ))) encoder.add(Dense(latent_dim)) decoder = Sequential() decoder.add(Dense(100, input_shape=(latent_dim, ))) decoder.add(Dense(feature_count)) final = Model(enc_input, decoder(encoder(enc_input))) final.compile(optimizer=Adam(lr=1e-4), loss="mean_squared_error") np.random.shuffle(data) final.fit( x=np.asarray(data), y=np.asarray(data), batch_size=int(len(data) / 10), callbacks=[EarlyStopping(monitor='loss', min_delta=0.00001)], epochs=500) latent_values = encoder.predict(data) if visualization: # for debugging of distribution of latent_values plt.figure('Latent value distribution') plt.hist(latent_values, bins=30) plt.show() center = np.mean(latent_values, axis=0) std = np.std(latent_values, axis=0) chi_std = chi.std(2, 0, np.linalg.norm(std)) # x-mean # I have a problem with the following line, he assumes that the latent values are already gaussian # distributed hence using it directly dist = np.linalg.norm(latent_values - center, axis=1) # Frobenius norm if visualization: # for debugging of distribution plt.figure('L1 norm distribution') plt.hist(dist, bins=30) plt.show() for i, el in enumerate(dist): dist[i] = 0. if el > no_of_std * chi_std else dist[i] if visualization: # for debugging of distribution plt.figure('L1 norm distribution after std filtering') plt.hist(list(filter(lambda x: x > 0, dist)), bins=30) plt.show() threshold = sorted(dist)[int(len(dist) * 0.9)] # this is cutting too much dist = [0. if x < threshold else x for x in dist] if visualization: # for debugging of distribution plt.figure( 'L1 norm distribution after std & threshold filtering') plt.hist(list(filter(lambda x: x > 0, dist)), bins=30) plt.show() dist /= np.sum(dist) synth_latent = [] for i in range(no_of_new_samples): # choose an ele from 1st argv, given that 1st argv has prob dist in p choice = np.random.choice(np.arange(len(dist)), p=dist) a = latent_values[choice] latent_copy = np.concatenate( (latent_values[:choice], latent_values[choice + 1:])) latent_copy -= a latent_copy = np.linalg.norm(latent_copy, axis=1) # Frobenius norm b = np.argmin(latent_copy) if b >= choice: b += 1 b = latent_values[b] scale = np.random.rand() c = scale * (a - b) + b synth_latent.append(c) new_latent_values = np.concatenate( (latent_values, np.asarray(synth_latent))) new_data = decoder.predict(np.asarray(synth_latent)) if return_same_sized_combined_dist: resampled_data = np.concatenate((data, new_data)) np.random.shuffle(resampled_data) return resampled_data[:len(data)] return new_data
class CharacterTagger: """ A class for character-based neural morphological tagger """ def __init__(self, reverse=False, word_rnn="cnn", min_char_count=1, char_embeddings_size=16, char_conv_layers=1, char_window_size=5, char_filters=None, char_filter_multiple=25, char_highway_layers=1, conv_dropout=0.0, highway_dropout=0.0, intermediate_dropout=0.0, lstm_dropout=0.0, word_lstm_layers=1, word_lstm_units=128, word_dropout=0.0, regularizer=None, batch_size=16, validation_split=0.2, nepochs=25, min_prob=0.01, max_diff=2.0, callbacks=None, verbose=1): self.reverse = reverse self.word_rnn = word_rnn self.min_char_count = min_char_count self.char_embeddings_size = char_embeddings_size self.char_conv_layers = char_conv_layers self.char_window_size = char_window_size self.char_filters = char_filters self.char_filter_multiple = char_filter_multiple self.char_highway_layers = char_highway_layers self.conv_dropout = conv_dropout self.highway_dropout = highway_dropout self.intermediate_dropout = intermediate_dropout self.word_lstm_layers = word_lstm_layers self.word_lstm_units = word_lstm_units self.lstm_dropout = lstm_dropout self.word_dropout = word_dropout self.regularizer = regularizer self.batch_size = batch_size self.validation_split = validation_split self.nepochs = nepochs self.min_prob = min_prob self.max_diff = max_diff self.callbacks = callbacks self.verbose = verbose self.initialize() def initialize(self): if isinstance(self.char_window_size, int): self.char_window_size = [self.char_window_size] if self.char_filters is None or isinstance(self.char_filters, int): self.char_filters = [self.char_filters] * len( self.char_window_size) if len(self.char_window_size) != len(self.char_filters): raise ValueError( "There should be the same number of window sizes and filter sizes" ) if isinstance(self.word_lstm_units, int): self.word_lstm_units = [self.word_lstm_units ] * self.word_lstm_layers if len(self.word_lstm_units) != self.word_lstm_layers: raise ValueError( "There should be the same number of lstm layer units and lstm layers" ) if self.regularizer is not None: self.regularizer = kreg.l2(self.regularizer) def to_json(self, outfile, model_file, lm_file=None): info = dict() if lm_file is not None: info["lm_file"] = lm_file # model_file = os.path.abspath(model_file) for (attr, val) in inspect.getmembers(self): if not (attr.startswith("__") or inspect.ismethod(val) or isinstance(getattr(CharacterTagger, attr, None), property) or isinstance(val, np.ndarray) or isinstance(val, Vocabulary) or attr.isupper() or attr in ["callbacks", "model_", "regularizer"]): info[attr] = val elif isinstance(val, Vocabulary): info[attr] = val.jsonize() elif isinstance(val, np.ndarray): val = val.tolist() info[attr] = val elif attr == "model_": info["dump_file"] = model_file self.model_.save_weights(model_file) elif attr == "callbacks": for callback in val: if isinstance(callback, EarlyStopping): info["early_stopping_callback"] = { "patience": callback.patience, "monitor": callback.monitor } elif isinstance(callback, ModelCheckpoint): info["model_checkpoint_callback"] =\ {key: getattr(callback, key) for key in ["monitor", "filepath"]} elif isinstance(callback, ReduceLROnPlateau): info["LR_callback"] =\ {key: getattr(callback, key) for key in ["monitor", "factor", "patience", "cooldown", "epsilon"]} elif attr.endswith("regularizer"): if val is not None: info[attr] = float(val.l2) with open(outfile, "w", encoding="utf8") as fout: json.dump(info, fout) @property def symbols_number_(self): return self.symbols_.symbols_number_ @property def tags_number_(self): return self.tags_.symbols_number_ def transform(self, data, labels=None, pad=True, return_indexes=True, buckets_number=None, bucket_size=None, join_buckets=True): lengths = [len(x) + 2 for x in data] if pad: indexes, level_lengths = make_bucket_indexes( lengths, buckets_number=buckets_number, bucket_size=bucket_size, join_buckets=join_buckets) else: indexes = [[i] for i in range(len(data))] level_lengths = lengths X = [None] * len(data) for bucket_indexes, bucket_length in zip(indexes, level_lengths): for i in bucket_indexes: sent = data[i] if not self.reverse else data[i][::-1] X[i] = [ self._make_sent_vector(sent, bucket_length=bucket_length) ] if labels is not None: tags = labels[i] if not self.reverse else labels[i][::-1] X[i].append( self._make_tags_vector(tags, bucket_length=bucket_length)) if return_indexes: return X, indexes else: return X def _make_sent_vector(self, sent, bucket_length=None): if bucket_length is None: bucket_length = len(sent) answer = np.zeros(shape=(bucket_length, MAX_WORD_LENGTH + 2), dtype=np.int32) for i, word in enumerate(sent): answer[i, 0] = BEGIN m = min(len(word), MAX_WORD_LENGTH) for j, x in enumerate(word[-m:]): answer[i, j + 1] = self.symbols_.toidx(x) answer[i, m + 1] = END answer[i, m + 2:] = PAD return answer def _make_tags_vector(self, tags, bucket_length=None, func=None): m = len(tags) if bucket_length is None: bucket_length = m answer = np.zeros(shape=(bucket_length, ), dtype=np.int32) for i, tag in enumerate(tags): answer[i] = self.tags_.toidx(tag) if func is None else func(tag) return answer def train(self, data, labels, dev_data=None, dev_labels=None, symbol_vocabulary_file=None, tags_vocabulary_file=None, lm_file=None, model_file=None, save_file=None): """ Trains the tagger on data :data: with labels :labels: data: list of lists of sequences, a list of sentences labels: list of lists of strs, a list of sequences of tags, each tag is a feature-value structure :return: """ if symbol_vocabulary_file is None: self.symbols_ = Vocabulary( character=True, min_count=self.min_char_count).train(data) else: self.symbols_ = vocabulary_from_json(symbol_vocabulary_file, use_features=False) if tags_vocabulary_file is None: self.tags_ = FeatureVocabulary(character=False).train(labels) else: with open(tags_vocabulary_file, "r", encoding="utf8") as fin: tags_info = json.load(fin) self.tags_ = vocabulary_from_json(tags_info, use_features=True) if self.verbose > 0: print("{} characters, {} tags".format(self.symbols_number_, self.tags_number_)) X_train, indexes_by_buckets = self.transform(data, labels, buckets_number=10) if dev_data is not None: X_dev, dev_indexes_by_buckets =\ self.transform(dev_data, dev_labels, bucket_size=BUCKET_SIZE) else: X_dev, dev_indexes_by_buckets = [None] * 2 self.build() if save_file is not None and model_file is not None: self.to_json(save_file, model_file, lm_file) self._train_on_data(X_train, indexes_by_buckets, X_dev, dev_indexes_by_buckets, model_file=model_file) return self def _train_on_data(self, X, indexes_by_buckets, X_dev=None, dev_indexes_by_buckets=None, model_file=None): if X_dev is None: X_dev, dev_indexes_by_buckets = X, [] validation_split = self.validation_split else: validation_split = 0.0 train_indexes_by_buckets = [] for curr_indexes in indexes_by_buckets: np.random.shuffle(curr_indexes) if validation_split != 0.0: train_bucket_size = int( (1.0 - self.validation_split) * len(curr_indexes)) train_indexes_by_buckets.append( curr_indexes[:train_bucket_size]) dev_indexes_by_buckets.append(curr_indexes[train_bucket_size:]) else: train_indexes_by_buckets.append(curr_indexes) if model_file is not None: callback = ModelCheckpoint(model_file, monitor="val_acc", save_weights_only=True, save_best_only=True) if self.callbacks is not None: self.callbacks.append(callback) else: self.callbacks = [callback] train_steps = sum((1 + (len(x) - 1) // self.batch_size) for x in train_indexes_by_buckets) dev_steps = len(dev_indexes_by_buckets) train_gen = generate_data(X, train_indexes_by_buckets, self.tags_number_, self.batch_size, use_last=False) dev_gen = generate_data(X_dev, dev_indexes_by_buckets, self.tags_number_, use_last=False, shuffle=False) self.model_.fit_generator(train_gen, steps_per_epoch=train_steps, epochs=self.nepochs, callbacks=self.callbacks, validation_data=dev_gen, validation_steps=dev_steps, verbose=1) if model_file is not None: self.model_.load_weights(model_file) return self def predict(self, data, labels=None, return_probs=False): X_test, indexes_by_buckets =\ self.transform(data, labels=labels, bucket_size=BUCKET_SIZE) answer, probs = [None] * len(data), [None] * len(data) for k, (X_curr, bucket_indexes) in enumerate( zip(X_test[::-1], indexes_by_buckets[::-1])): X_curr = [ np.array([X_test[i][j] for i in bucket_indexes]) for j in range(len(X_test[0]) - int(labels is not None)) ] bucket_probs = self.model_.predict(X_curr, batch_size=256) bucket_labels = np.argmax(bucket_probs, axis=-1) for curr_labels, curr_probs, index in\ zip(bucket_labels, bucket_probs, bucket_indexes): curr_labels = curr_labels[:len(data[index])] curr_labels = [ self.tags_.symbols_[label] for label in curr_labels ] answer[index], probs[ index] = curr_labels, curr_probs[:len(data[index])] return (answer, probs) if return_probs else answer def score(self, data, labels): X_test, indexes_by_buckets = self.transform(data, labels, bucket_size=BUCKET_SIZE) probs = [None] * len(data) for k, (X_curr, bucket_indexes) in enumerate( zip(X_test[::-1], indexes_by_buckets[::-1])): X_curr = [ np.array([X_test[i][j] for i in bucket_indexes]) for j in range(len(X_test[0]) - 1) ] y_curr = [np.array(X_test[i][-1]) for i in bucket_indexes] bucket_probs = self.model_.predict(X_curr, batch_size=256) for curr_labels, curr_probs, index in zip(y_curr, bucket_probs, bucket_indexes): L = len(data[index]) probs[index] = curr_probs[np.arange(L), curr_labels[:L]] return probs def build(self): word_inputs = kl.Input(shape=(None, MAX_WORD_LENGTH + 2), dtype="int32") inputs = [word_inputs] word_outputs = self.build_word_cnn(word_inputs) outputs, lstm_outputs = self.build_basic_network(word_outputs) compile_args = { "optimizer": ko.nadam(lr=0.002, clipnorm=5.0), "loss": "categorical_crossentropy", "metrics": ["accuracy"] } self.model_ = Model(inputs, outputs) self.model_.compile(**compile_args) if self.verbose > 0: print(self.model_.summary()) return self def build_word_cnn(self, inputs): # inputs = kl.Input(shape=(MAX_WORD_LENGTH,), dtype="int32") inputs = kl.Lambda(kb.one_hot, arguments={"num_classes": self.symbols_number_}, output_shape=lambda x: tuple(x) + (self.symbols_number_, ))(inputs) char_embeddings = kl.Dense(self.char_embeddings_size, use_bias=False)(inputs) conv_outputs = [] self.char_output_dim_ = 0 for window_size, filters_number in zip(self.char_window_size, self.char_filters): curr_output = char_embeddings curr_filters_number = (min(self.char_filter_multiple * window_size, 200) if filters_number is None else filters_number) for _ in range(self.char_conv_layers - 1): curr_output = kl.Conv2D( curr_filters_number, (1, window_size), padding="same", activation="relu", data_format="channels_last")(curr_output) if self.conv_dropout > 0.0: curr_output = kl.Dropout(self.conv_dropout)(curr_output) curr_output = kl.Conv2D(curr_filters_number, (1, window_size), padding="same", activation="relu", data_format="channels_last")(curr_output) conv_outputs.append(curr_output) self.char_output_dim_ += curr_filters_number if len(conv_outputs) > 1: conv_output = kl.Concatenate(axis=-1)(conv_outputs) else: conv_output = conv_outputs[0] highway_input = kl.Lambda(kb.max, arguments={"axis": -2})(conv_output) if self.intermediate_dropout > 0.0: highway_input = kl.Dropout( self.intermediate_dropout)(highway_input) for i in range(self.char_highway_layers - 1): highway_input = Highway(activation="relu")(highway_input) if self.highway_dropout > 0.0: highway_input = kl.Dropout(self.highway_dropout)(highway_input) highway_output = Highway(activation="relu")(highway_input) return highway_output def build_basic_network(self, word_outputs): """ Creates the basic network architecture, transforming word embeddings to intermediate outputs """ if self.word_dropout > 0.0: lstm_outputs = kl.Dropout(self.word_dropout)(word_outputs) else: lstm_outputs = word_outputs for j in range(self.word_lstm_layers - 1): lstm_outputs = kl.Bidirectional( kl.LSTM(self.word_lstm_units[j], return_sequences=True, dropout=self.lstm_dropout))(lstm_outputs) lstm_outputs = kl.Bidirectional( kl.LSTM(self.word_lstm_units[-1], return_sequences=True, dropout=self.lstm_dropout))(lstm_outputs) pre_outputs = kl.TimeDistributed(kl.Dense( self.tags_number_, activation="softmax", activity_regularizer=self.regularizer), name="p")(lstm_outputs) return pre_outputs, lstm_outputs
# Flatten feature map to a 1-dim tensor x = layers.Flatten()(x) # Create a fully connected layer with ReLU activation and 512 hidden units x = layers.Dense(512, activation='relu')(x) # Add a dropout rate of 0.5 x = layers.Dropout(0.5)(x) # Create output layer with a single node and sigmoid activation output = layers.Dense(1, activation='sigmoid')(x) # Configure and compile the model model = Model(img_input, output) model.compile(loss='binary_crossentropy', optimizer=RMSprop(lr=0.001), metrics=['acc']) history = model.fit_generator(train_generator, steps_per_epoch=100, epochs=30, validation_data=validation_generator, validation_steps=50, verbose=2) # Retrieve a list of accuracy results on training and test data # sets for each training epoch acc = history.history['acc'] val_acc = history.history['val_acc'] # Retrieve a list of list results on training and test data
# Make both of the discriminator networks non-trainable discriminatorA.trainable = False discriminatorB.trainable = False probsA = discriminatorA(generatedA) probsB = discriminatorB(generatedB) adversarial_model = Model(inputs=[inputA, inputB], outputs=[ probsA, probsB, reconstructedA, reconstructedB, generatedAId, generatedBId ]) adversarial_model.compile( loss=['mse', 'mse', 'mae', 'mae', 'mae', 'mae'], loss_weights=[1, 1, 10.0, 10.0, 1.0, 1.0], optimizer=common_optimizer) tensorboard = TensorBoard(log_dir="logs/{}".format(time.time()), write_images=True, write_grads=True, write_graph=True) tensorboard.set_model(generatorAToB) tensorboard.set_model(generatorBToA) tensorboard.set_model(discriminatorA) tensorboard.set_model(discriminatorB) real_labels = np.ones((batch_size, 7, 7, 1)) fake_labels = np.zeros((batch_size, 7, 7, 1)) for epoch in range(epochs):
from keras import models from keras.layers import Conv2D, Flatten, MaxPooling2D, Dense, GlobalAveragePooling2D, Dropout from keras import layers from keras import Model import logging from keras.applications.inception_v3 import InceptionV3 X_size = 75 Y_size = 75 base_model = InceptionV3(include_top=False, input_shape=(X_size, Y_size, 3), classes=52) x = base_model.output x = GlobalAveragePooling2D()(x) x = Dropout(0.7)(x) predictions = Dense(52, activation='softmax')(x) model = Model(inputs=base_model.input, outputs=predictions) datagen = ImageDataGenerator(rescale=1. / 255) train_generator = datagen.flow_from_directory('./trafficSignsHW/trainFULL', target_size=(X_size, Y_size), batch_size=32, class_mode='categorical') model.compile(keras.optimizers.Adam(), 'categorical_crossentropy', metrics=['accuracy']) model.fit_generator(train_generator, steps_per_epoch=20, epochs=32) model.save("my_model.h5")
def build(self, sentence_length, word_length, target_label_dims, word_vocab, word_vocab_size, char_vocab_size, word_embedding_dims=100, char_embedding_dims=25, word_lstm_dims=25, tagger_lstm_dims=100, tagger_fc_dims=100, dropout=0.2, external_embedding_model=None): """ Build a NERCRF model Args: sentence_length (int): max sentence length word_length (int): max word length in characters target_label_dims (int): number of entity labels (for classification) word_vocab (dict): word to int dictionary word_vocab_size (int): word vocabulary size char_vocab_size (int): character vocabulary size word_embedding_dims (int): word embedding dimensions char_embedding_dims (int): character embedding dimensions word_lstm_dims (int): character LSTM feature extractor output dimensions tagger_lstm_dims (int): word tagger LSTM output dimensions tagger_fc_dims (int): output fully-connected layer size dropout (float): dropout rate external_embedding_model (str): path to external word embedding model """ # build word input words_input = Input(shape=(sentence_length,), name='words_input') if external_embedding_model is not None: # load and prepare external word embedding external_emb, ext_emb_size = load_word_embeddings(external_embedding_model) embedding_matrix = np.zeros((word_vocab_size, ext_emb_size)) for word, i in word_vocab.items(): embedding_vector = external_emb.get(word.lower()) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector # load pre-trained word embeddings into an Embedding layer # note that we set trainable = False so as to keep the embeddings fixed embedding_layer = Embedding(word_vocab_size, ext_emb_size, weights=[embedding_matrix], input_length=sentence_length, trainable=False) else: # learn embeddings ourselves embedding_layer = Embedding(word_vocab_size, word_embedding_dims, input_length=sentence_length) word_embeddings = embedding_layer(words_input) word_embeddings = Dropout(dropout)(word_embeddings) # create word character embeddings word_chars_input = Input(shape=(sentence_length, word_length), name='word_chars_input') char_embedding_layer = Embedding(char_vocab_size, char_embedding_dims, input_length=word_length) char_embeddings = TimeDistributed(char_embedding_layer)(word_chars_input) char_embeddings = TimeDistributed(Bidirectional(LSTM(word_lstm_dims)))(char_embeddings) char_embeddings = Dropout(dropout)(char_embeddings) # create the final feature vectors features = concatenate([word_embeddings, char_embeddings], axis=-1) # encode using a bi-lstm bilstm = Bidirectional(LSTM(tagger_lstm_dims, return_sequences=True))(features) bilstm = Dropout(dropout)(bilstm) after_lstm_hidden = Dense(tagger_fc_dims)(bilstm) # classify the dense vectors crf = CRF(target_label_dims, sparse_target=False) predictions = crf(after_lstm_hidden) # compile the model model = Model(inputs=[words_input, word_chars_input], outputs=predictions) model.compile(loss=crf.loss_function, optimizer='adam', metrics=[crf.accuracy]) self.model = model
def train(hps, epochs, save_interval=200): half_batch = int(hps.batch_size / 2) dataset, shape = data.load_dataset(hps) # loss values for further plotting model = mb.CapsuleGANModel(hps, shape) discriminator = model.build_discriminator() generator = model.build_generator() discriminator.compile(loss='binary_crossentropy', optimizer=Adam(hps.learning_rate, hps.beta_1, hps.beta_2, hps.epsilon), metrics=['accuracy']) generator.compile(loss='binary_crossentropy', optimizer=Adam(hps.learning_rate, hps.beta_1, hps.beta_2, hps.epsilon)) z = Input(shape=(100, )) img = generator(z) discriminator.trainable = False valid = discriminator(img) combined = Model(z, valid) combined.compile(loss='binary_crossentropy', optimizer=Adam(hps.learning_rate, hps.beta_1, hps.beta_2, hps.epsilon)) for epoch in range(epochs): # --------------------- # Train Discriminator # --------------------- # select a random half batch of images idx = np.random.randint(0, dataset.shape[0], half_batch) imgs = dataset[idx] noise = np.random.normal(0, 1, (half_batch, 100)) # generate a half batch of new images gen_imgs = generator.predict(noise) # train the discriminator by feeding both real and fake (generated) images one by one d_loss_real = discriminator.train_on_batch( imgs, np.ones((half_batch, 1)) * 0.9) # 0.9 for label smoothing d_loss_fake = discriminator.train_on_batch(gen_imgs, np.zeros((half_batch, 1))) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # --------------------- # Train Generator # --------------------- noise = np.random.normal(0, 1, (hps.batch_size, 100)) # the generator wants the discriminator to label the generated samples # as valid (ones) valid_y = np.array([1] * hps.batch_size) # train the generator g_loss = combined.train_on_batch(noise, np.ones((hps.batch_size, 1))) # Plot the progress print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss)) model.D_L_REAL.append(d_loss_real) model.D_L_FAKE.append(d_loss_fake) model.D_L.append(d_loss) model.D_ACC.append(d_loss[1]) model.G_L.append(g_loss) # if at save interval => save generated image samples if epoch % (5 * save_interval) == 0: su.save_imgs(hps.module, generator, epoch, hps) if epoch % (10 * save_interval) == 0: generator.save( os.path.join(hps.model_dir, hps.module + '_gen_model_{}.h5'.format(epoch))) discriminator.save( os.path.join(hps.model_dir, hps.module + '_dis_model_{}.h5'.format(epoch))) # if epoch % (15*save_interval) == 0: # # joblib.dump(model, "model_{}.pkl".format(epoch)) # with open("model_{}.json".format(epoch), 'w') as f: # ujson.dump(model, f) # f.close() plt.plot(model.D_L) plt.title('Discriminator results') plt.xlabel('Epochs') plt.ylabel('Discriminator Loss (blue), Discriminator Accuracy (orange)') plt.legend(['Discriminator Loss', 'Discriminator Accuracy']) su.save_fig("{}_DL".format(hps.module)) plt.plot(model.G_L) plt.title('Generator results') plt.xlabel('Epochs') plt.ylabel('Generator Loss (blue)') plt.legend('Generator Loss') su.save_fig("{}_GL".format(hps.module))
def build_model(vectors, shape, settings): max_length, nr_hidden, nr_class = shape input1 = layers.Input(shape=(max_length,), dtype="int32", name="words1") input2 = layers.Input(shape=(max_length,), dtype="int32", name="words2") # embeddings (projected) embed = create_embedding(vectors, max_length, nr_hidden) a = embed(input1) b = embed(input2) # step 1: attend F = create_feedforward(nr_hidden) att_weights = layers.dot([F(a), F(b)], axes=-1) G = create_feedforward(nr_hidden) if settings["entail_dir"] == "both": norm_weights_a = layers.Lambda(normalizer(1))(att_weights) norm_weights_b = layers.Lambda(normalizer(2))(att_weights) alpha = layers.dot([norm_weights_a, a], axes=1) beta = layers.dot([norm_weights_b, b], axes=1) # step 2: compare comp1 = layers.concatenate([a, beta]) comp2 = layers.concatenate([b, alpha]) v1 = layers.TimeDistributed(G)(comp1) v2 = layers.TimeDistributed(G)(comp2) # step 3: aggregate v1_sum = layers.Lambda(sum_word)(v1) v2_sum = layers.Lambda(sum_word)(v2) concat = layers.concatenate([v1_sum, v2_sum]) elif settings["entail_dir"] == "left": norm_weights_a = layers.Lambda(normalizer(1))(att_weights) alpha = layers.dot([norm_weights_a, a], axes=1) comp2 = layers.concatenate([b, alpha]) v2 = layers.TimeDistributed(G)(comp2) v2_sum = layers.Lambda(sum_word)(v2) concat = v2_sum else: norm_weights_b = layers.Lambda(normalizer(2))(att_weights) beta = layers.dot([norm_weights_b, b], axes=1) comp1 = layers.concatenate([a, beta]) v1 = layers.TimeDistributed(G)(comp1) v1_sum = layers.Lambda(sum_word)(v1) concat = v1_sum H = create_feedforward(nr_hidden) out = H(concat) out = layers.Dense(nr_class, activation="softmax")(out) model = Model([input1, input2], out) model.compile( optimizer=optimizers.Adam(lr=settings["lr"]), loss="categorical_crossentropy", metrics=["accuracy"], ) return model
def construct_model(self): """ Construct the :math:`1`-st order and :math:`0`-th order models, which are used to approximate the :math:`U_1(x, C(x))` and the :math:`U_0(x)` utilities respectively. For each pair of objects in :math:`x_i, x_j \in Q` :math:`U_1(x, C(x))` we construct :class:`CmpNetCore` with weight sharing to approximate a pairwise-matrix. A pairwise matrix with index (i,j) corresponds to the :math:`U_1(x_i,x_j)` is a measure of how favorable it is to choose :math:`x_i` over :math:`x_j`. Using this matrix we calculate the borda score for each object to calculate :math:`U_1(x, C(x))`. For `0`-th order model we construct :math:`\lvert Q \lvert` sequential networks whose weights are shared to evaluate the :math:`U_0(x)` for each object in the query set :math:`Q`. The output mode is using linear activation. Returns ------- model: keras :class:`Model` Neural network to learn the FETA utility score """ def create_input_lambda(i): return Lambda(lambda x: x[:, i]) if self._use_zeroth_model: self.logger.debug('Create 0th order model') zeroth_order_outputs = [] inputs = [] for i in range(self.n_objects): x = create_input_lambda(i)(self.input_layer) inputs.append(x) for hidden in self.hidden_layers_zeroth: x = hidden(x) zeroth_order_outputs.append(self.output_node_zeroth(x)) zeroth_order_scores = concatenate(zeroth_order_outputs) self.logger.debug('0th order model finished') self.logger.debug('Create 1st order model') outputs = [list() for _ in range(self.n_objects)] for i, j in combinations(range(self.n_objects), 2): if self._use_zeroth_model: x1 = inputs[i] x2 = inputs[j] else: x1 = create_input_lambda(i)(self.input_layer) x2 = create_input_lambda(j)(self.input_layer) x1x2 = concatenate([x1, x2]) x2x1 = concatenate([x2, x1]) for hidden in self.hidden_layers: x1x2 = hidden(x1x2) x2x1 = hidden(x2x1) merged_left = concatenate([x1x2, x2x1]) merged_right = concatenate([x2x1, x1x2]) n_g = self.output_node(merged_left) n_l = self.output_node(merged_right) outputs[i].append(n_g) outputs[j].append(n_l) # convert rows of pairwise matrix to keras layers: outputs = [concatenate(x) for x in outputs] # compute utility scores: sum_func = lambda s: K.mean(s, axis=1, keepdims=True) scores = [Lambda(sum_func)(x) for x in outputs] scores = concatenate(scores) self.logger.debug('1st order model finished') if self._use_zeroth_model: scores = add([scores, zeroth_order_scores]) model = Model(inputs=self.input_layer, outputs=scores) self.logger.debug('Compiling complete model...') model.compile(loss=self.loss_function, optimizer=self.optimizer, metrics=self.metrics) return model
vocabulary_size = 50000 num_income_groups = 10 posts_input = Input(shape=(None,), dtype='int32', name='posts') embedded_posts = layers.Embedding(vocabulary_size, 256)(posts_input) x = layers.Conv1D(128, 5, activation='relu')(embedded_posts) x = layers.MaxPooling1D(5)(x) x = layers.Conv1D(256, 5, activation='relu')(x) x = layers.Conv1D(256, 5, activation='relu')(x) x = layers.MaxPooling1D(5)(x) x = layers.Conv1D(256, 5, activation='relu')(x) x = layers.Conv1D(256, 5, activation='relu')(x) x = layers.GlobalMaxPooling1D()(x) x = layers.Dense(128, activation='relu')(x) age_prediction = layers.Dense(1, name='age')(x) income_prediction = layers.Dense( num_income_groups, activation='softmax', name='income')(x) gender_prediction = layers.Dense(1, activation='sigmoid', name='gender')(x) model = Model(posts_input, [age_prediction, income_prediction, gender_prediction]) model.compile(optimizer='rmsprop', loss={'age': 'mse', 'income': 'categorical_crossentropy', 'gender': 'binary_crossentropy'}, loss_weights={'age': 0.25, 'income': 1.0, 'gender': 10.0})
X = np.random.randint(10, size=(n_samples, dx, dy)) y_true = np.ones((n_samples, dx, dout)) # X[2, 0] = mask_value # X[3, 1] = mask_value sample_weight = np.ones_like(y_true) # sample_weight[2, 0] = 0 # sample_weight[3, 1] = 0 sample_weight[0, 0] = 0 inp = Input(shape=(dx, dy)) dense = TimeDistributed(Dense(dout))(inp) model = Model(inputs=inp, outputs=dense) model.summary() model.compile(optimizer="rmsprop", loss="mae", sample_weight_mode="temporal") set_model_weights_to_unity(model) y_pred = model.predict(X, verbose=0) unmasked_loss = mae(y_true, y_pred, mask=False) masked_loss = mae(y_true, y_pred, mask=True) weighted_loss = mae(y_true, y_pred, mask=False, weights=sample_weight) keras_loss = model.evaluate(X, y_true, verbose=0) keras_loss_weighted = model.evaluate(X, y_true, sample_weight=sample_weight[..., 0], verbose=0) print(f"unmasked loss: {unmasked_loss}") print(f"masked loss: {masked_loss}") print(f"weighted loss: {weighted_loss}") print(f"evaluate with Keras: {keras_loss}")
model = Dropout(droprate)( model ) # To forget ordrop the few pixels from the layer to avoid the learnign the noise of the parameter. #Fully connected final layer model = Dense(num_classes)( model) # To connect all the layers as fully connected layers. model = Activation('softmax')( model ) # As its a multi class classfication, softmax is used. This will add the model probability output from each output nodes to 1. val = Model(inputs, model) #compile model using accuracy to measure model performance val.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.RMSprop(), metrics=['accuracy']) #describe the layers val.summary() # define path to save model model_path = Path + 'fm_cnn_BN16.h5' # prepare callbacks callbacks = [ EarlyStopping(monitor='val_acc', patience=10, mode='max', verbose=1), ModelCheckpoint(model_path, monitor='val_acc', save_best_only=True, mode='max',
model = layers.Dense(128)(model) model = layers.Dense(64)(model) model = layers.Dense(4)(model) output = layers.Activation('softmax')(model) model = Model(input,output) model.summary() callback_list = [keras.callbacks.EarlyStopping(monitor = 'val_acc', patience = 5), keras.callbacks.ModelCheckpoint(filepath='ResNet18.h5', monitor = 'val_loss', save_best_only = True)] model.compile(loss = 'sparse_categorical_crossentropy', optimizer = 'adam', metrics = ['acc']) import time # 훈련할 때마다 time을 가져와서 초기화를 시켜줘야 합니다! start = time.time() history = model.fit(train_image, train_label, epochs = 100, callbacks=callback_list,validation_data = (test_image,test_label)) time = time.time() - start print("테스트 시 소요 시간(초) : {}".format(time)) print("전체 파라미터 수 : {}".format(sum([arr.flatten().shape[0] for arr in model.get_weights()]))) # 모델 훈련이 잘 되었는지 그래프로 확입힙니다. acc = history.history['acc'] val_acc = history.history['val_acc'] loss = history.history['loss'] val_loss = history.history['val_loss']
crf = CRF(len(label.index) + 1, learn_mode='marginal')(gru_kata) preds = Dense(len(label.index) + 1, activation='softmax')(gru_kata) print "Model Choice:" model_choice = 1 # input('Enter 1 for CRF or 2 for Dense layer: ') model = Model(inputs=[sequence_input, sequence_input_c], outputs=[crf]) if model_choice == 2: model = Model(inputs=[sequence_input, sequence_input_c], outputs=[preds]) optimizer = 'adam' # raw_input('Enter optimizer (default rmsprop): ') loss = 'binary_crossentropy' # raw_input('Enter loss function (default categorical_crossentropy): ') model.summary() model.compile(loss=loss, optimizer=optimizer, metrics=['acc']) load_m = False """ Training """ epoch = input('Enter number of epochs: ') batch = input('Enter number of batch size: ') model.fit([np.array(x_train.padded), np.array(x_train_char)], [np.array(y_encoded)], epochs=epoch, batch_size=batch) """ Converting text data to int using index
true_positives = K.cast(true_ones, K.floatx()) true_positive_count = K.sum(true_positives) label_positive_count = K.sum(y_true) recall = true_positive_count / label_positive_count return recall def precision_m(y_true, y_pred): y_true = K.round(y_true) y_pred = K.round(y_pred) pair_sum = tf.add(y_true, y_pred) true_ones = K.equal(pair_sum, 2.) true_positives = K.cast(true_ones, K.floatx()) true_positive_count = K.sum(true_positives) pred_positive_count = K.sum(y_pred) precision = true_positive_count / pred_positive_count return precision def f1_m(y_true, y_pred): precision = precision_m(y_true, y_pred) recall = recall_m(y_true, y_pred) return 2 * ((precision * recall) / (precision + recall)) # Another way to define your optimizer adam = Adam(lr=0.001) # We add metrics to get more results you want to see model = Model(inputs="your inputs") model.compile(optimizer=adam, loss="mean_squared_error", metrics=['categorical_accuracy', recall_m, precision_m, f1_m])
D_out_11, D_out_12, D_out_13, D_out_14, D_out_21, D_out_22, D_out_23, D_out_24, D_out_31, D_out_32, D_out_33, D_out_34, D_out_41, D_out_42, D_out_43, D_out_44 ]) GAN = Model(inputs=[G_input], outputs=[generated_image, GAN_output], name="GAN") GAN_loss = [laplacian_loss, 'binary_crossentropy'] opt_GAN = Adam(lr=lr_schedule(0, G_inital_lr, G_decay_factor, G_decay_period), beta_1=0.9, beta_2=0.999, epsilon=1e-08) loss_weights = [1, 0.005] GAN.compile(loss=GAN_loss, loss_weights=loss_weights, optimizer=opt_GAN, metrics={'model_1': mae_on_first_channel}) GAN.summary() # training start here real_val = 1.0 fake_val = 0.0 # can load pretrained models here, not necessary # D.load_weights('save/pre_D.hdf5') G.load_weights('save/formalin_g_G_500.hdf5') for iteration in range(0, num_iters, 1): # train D until D can distinguish real and generated images lr_D = lr_schedule(iteration, D_inital_lr, D_decay_factor, D_decay_period) K.set_value(D.optimizer.lr, lr_D)
embedding = Embedding(vocab_size, embedding_vector_size, input_length=1, name='embedding', weights=model_mat_skip_gram) target = embedding(input_target) target = Reshape((embedding_vector_size, 1))(target) context = embedding(input_context) context = Reshape((embedding_vector_size, 1))(context) # setup a cosine similarity operation which will be output in a secondary model # similarity = merge([target, context], mode='cos', dot_axes=0) similarity = dot([target, context], axes=1, normalize=True) # now perform the dot product operation to get a similarity measure dot_product = dot([target, context], axes=1) dot_product = Reshape((1, ))(dot_product) # add the sigmoid output layer output = Dense(1, activation='sigmoid')(dot_product) # create the primary training model model = Model(input=[input_target, input_context], output=output) model.compile(loss='binary_crossentropy', optimizer='rmsprop') model.summary() # create a secondary validation model to run our similarity checks during training validation_model = Model(input=[input_target, input_context], output=similarity) callb = SimilarityCallback(t) callb.run_sim()
xtest = sequence.pad_sequences(xtest, maxlen=maxima_longitud) #Creacion de modelo entrada = Input(shape=(maxima_longitud, )) x = Embedding(maximas_caracteristicas, tamano_embedding)(entrada) #Capa especial para texto. x = LSTM(tamano_embedding, return_sequences=True, activation='relu')( x) #returns_sequences devuelde los estados obtenidos por embedding. x = Flatten()(x) #Llevar a una dimension x = Dense(1, activation="sigmoid", kernel_initializer='zeros', bias_initializer='zeros')(x) modelo = Model(inputs=entrada, outputs=x) modelo.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_accuracy']) modelo.summary() #Clasificacion binaria, positivo o negativo #entrenamiento #Callback para guardar el mejor modelo de las mejores epocas. checkpoint = ModelCheckpoint('deteccion_texto.h5', monitor='val_binary_accuracy', verbose=1, save_best_only=True, save_weights_only=False, mode='auto') history = modelo.fit(xentrenamiento, yentrenamiento,
descriptionEmbeddings, #input_length=MAX_DESC_SEQUENCE_LENGTH, mask_zero=True)(descriptionBranchI) descriptionBranch = SpatialDropout1D(rate=0.2)( descriptionBranch) #Masks the same embedding element for all tokens descriptionBranch = BatchNormalization()(descriptionBranch) descriptionBranch = Dropout(0.2)(descriptionBranch) descriptionBranch = LSTM(units=30)(descriptionBranch) descriptionBranch = BatchNormalization()(descriptionBranch) descriptionBranch = Dropout(0.2, name="description")(descriptionBranch) descriptionBranchO = Dense(len(set(classes)), activation='softmax')(descriptionBranch) descriptionModel = Model(inputs=descriptionBranchI, outputs=descriptionBranchO) descriptionModel.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy']) start = time.time() descriptionHistory = descriptionModel.fit(trainDescription, classes, epochs=nb_epoch, batch_size=batch_size, verbose=verbosity, validation_split=validation_split, callbacks=callbacks) print("descriptionBranch finished after " + str(datetime.timedelta(seconds=round(time.time() - start)))) descriptionModel.save(modelPath + 'descriptionBranchNorm.h5') ##################### #2a.) Link Model for Domain
model.summary() # model = Sequential() # model.add(xception) # model.add(layers.Dense(1000, activation='relu')) # model.add(layers.Dense(1000, activation='relu')) # model.add(Dropout(0.5)) # model.add(layers.Dense(num_classes, activation='softmax')) model.compile( loss='categorical_crossentropy', optimizer='adam', metrics = ['accuracy']) loops = 1 for i in range(loops): print ('\n\nEPOCH SET {}'.format(i)) nb_epochs = 2 history = model.fit_generator( train_generator, steps_per_epoch = train_generator.samples // batch_size, validation_data = validation_generator, validation_steps = validation_generator.samples // batch_size, epochs = nb_epochs) name = 'Xception_places_200_FE'
val_dir, target_size=(IM_WIDTH, IM_HEIGHT), batch_size=batch_size, class_mode='categorical') inception_model = InceptionResNetV2(include_top=False) x = GlobalAveragePooling2D(name='avg_pool')(inception_model.output) x = Dense(nb_classes, activation='softmax', name='predictions')(x) model = Model(inception_model.input, x) # model = load_model('boxes.h5') for layer in model.layers: layer.trainable = False model.layers[-1].trainable = True model.layers[-2].trainable = True checkpoint = ModelCheckpoint("boxes_trained_epoch_{epoch}.h5", monitor='val_loss', save_weights_only=False, save_best_only=True) model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy']) model.fit_generator(train_generator, validation_data=validation_generator, epochs=nb_epoch, callbacks=[checkpoint])
#conv2 = Conv1D(128, 3, activation='tanh')(max_1) #max_2 = MaxPooling1D(3)(conv2) question_out_1 = Flatten()(question_dmax_2) #out_1 = LSTM(128)(max_1) merged_vector = merge([relation_out_1, question_out_1], mode='concat') # good dense_1 = Dense(128, activation='relu')(merged_vector) dense_2 = Dense(128, activation='relu')(dense_1) dense_3 = Dense(128, activation='relu')(dense_2) predictions = Dense(1, activation='sigmoid')(dense_3) #predictions = Dense(len(labels_index), activation='softmax')(merged_vector) model = Model(input=[tweet_relation, tweet_ques], output=predictions) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) model.fit([rela_train, ques_train], label_train, nb_epoch=10, batch_size=20, verbose=1, shuffle=True) json_string = model.to_json() # json_string = model.get_config() open('my_model_architecture.json', 'w').write(json_string) model.save_weights('my_model_weights.h5') score = model.evaluate([rela_train, ques_train], label_train, verbose=0) print('train score:', score[0]) print('train accuracy:', score[1])
z = layers.MaxPooling2D((3, 3))(z) z = layers.Conv2D(128, (1, 1), padding='same')(z) z = layers.ReLU()(z) z = layers.Conv2D(64, (1, 1))(z) z = layers.LeakyReLU(alpha=0.3)(z) z = layers.Conv2D(32, (1, 1))(z) z = layers.ReLU()(z) z = layers.Flatten()(z) # Von folgendem Layer werden die Gewichtungen erfasst z = layers.Dense(32, kernel_regularizer=l1(0.001))(z) z = layers.ReLU()(z) model_output_1 = layers.Dense(1, activation='sigmoid')(z) model = Model(input_1, model_output_1) model.summary() model.compile(loss=['binary_crossentropy'], optimizer=optimizers.Nadam(lr=1e-2), metrics=['acc']) path = os.path.join(os.getcwd(), 'logs/') callbacks_list = [keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5)] STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size STEP_SIZE_VALID = validation_generator.n // validation_generator.batch_size STEP_SIZE_TEST = test_generator.n // test_generator.batch_size history = model.fit_generator( generator=train_generator, steps_per_epoch=STEP_SIZE_TRAIN, epochs=40, callbacks=callbacks_list, validation_data=validation_generator,
class PolicyValueNetwork: """ AlphaZero Residual-CNN """ def __init__(self, model_file=None): # Build Network Architecture input_shape = Board().encoded_states().shape # (6, 15, 15) inputs = Input(input_shape) shared_net = Sequential([ *ConvBlock(32, input_shape=input_shape), *ConvBlock(64), *ConvBlock(128) ], "shared_net") policy_head = Sequential([ shared_net, *ConvBlock(4, (1, 1), "relu"), Flatten(), Dense(Game["board_size"], kernel_regularizer=l2()), Activation("softmax") ], "policy_head") value_head = Sequential([ shared_net, *ConvBlock(2, (1, 1), "relu"), Flatten(), Dense(64, activation="relu", kernel_regularizer=l2()), Dense(1, kernel_regularizer=l2()), Activation("tanh") ], "value_head") self.model = Model( inputs, [value_head(inputs), policy_head(inputs)] ) if model_file is not None: self.restore_model(model_file) def compile(self, opt): """ Optimization and Loss definition """ self.model.compile( optimizer=sgd(), loss=["mse", "categorical_crossentropy"] ) def eval_state(self, state): """ Evaluate a board state. """ vp = self.model.predict_on_batch(state.encoded_states()[np.newaxis, :]) # format to (float, np.array((255,1),dtype=float)) structure return vp[0][0][0], vp[1][0] def train_step(self, optimizer): """ One Network Tranning step. """ opt = self.model.optimizer K.set_value(opt.lr, optimizer["lr"]) K.set_value(opt.momentum, optimizer["momentum"]) # loss = self.model.train_on_batch(inputs, [winner, probs]) # return loss def save_model(self, filename): base_path = "{}/keras".format(TRAINING_CONFIG["model_path"]) if not os.path.exists(base_path): os.mkdir(base_path) self.model.save_weights("{}/{}.h5".format(base_path, filename)) def restore_model(self, filename): base_path = "{}/keras".format(TRAINING_CONFIG["model_path"]) if os.path.exists("{}/{}.h5".format(base_path, filename)): self.model.load_weights("{}/{}.h5".format(base_path, filename))
# Let's use the 'mixed7' layer as the input to our model last_layer = pre_trained_model.get_layer('mixed7') print("last later output shape: ", last_layer.output_shape) last_output = last_layer.output # this is the input to our own model # building our own model to on top of last_layer x = layers.Flatten()(last_output) # flattening output layer to 1-dim x = layers.Dense(units=1024, activation='relu')(x) x = layers.Dropout(rate=0.2)(x) x = layers.Dense(units=1, activation='sigmoid')(x) model = Model(pre_trained_model.input, x) model.compile(optimizer=RMSprop(lr=0.0001), loss='binary_crossentropy', metrics=['acc'] ) # now for the data base_dir = 'utils/cats_and_dogs_filtered' train_dir = os.path.join(base_dir, 'train') validation_dir = os.path.join(base_dir, 'validation') train_cats_dir = os.path.join(train_dir, 'cats') train_dogs_dir = os.path.join(train_dir, 'dogs') validation_cats_dir = os.path.join(validation_dir, 'cats') validation_dogs_dir = os.path.join(validation_dir, 'dogs') train_cats_filenames = os.listdir(train_cats_dir) train_dogs_filenames = os.listdir(train_dogs_dir)
class CharacterTagger: """A class for character-based neural morphological tagger Parameters: symbols: character vocabulary tags: morphological tags vocabulary word_rnn: the type of character-level network (only `cnn` implemented) char_embeddings_size: the size of character embeddings char_conv_layers: the number of convolutional layers on character level char_window_size: the width of convolutional filter (filters). It can be a list if several parallel filters are applied, for example, [2, 3, 4, 5]. char_filters: the number of convolutional filters for each window width. It can be a number, a list (when there are several windows of different width on a single convolution layer), a list of lists, if there are more than 1 convolution layers, or **None**. If **None**, a layer with width **width** contains min(**char_filter_multiple** * **width**, 200) filters. char_filter_multiple: the ratio between filters number and window width char_highway_layers: the number of highway layers on character level conv_dropout: the ratio of dropout between convolutional layers highway_dropout: the ratio of dropout between highway layers, intermediate_dropout: the ratio of dropout between convolutional and highway layers on character level lstm_dropout: dropout ratio in word-level LSTM word_vectorizers: list of parameters for additional word-level vectorizers, for each vectorizer it stores a pair of vectorizer dimension and the dimension of the corresponding word embedding word_lstm_layers: the number of word-level LSTM layers word_lstm_units: hidden dimensions of word-level LSTMs word_dropout: the ratio of dropout before word level (it is applied to word embeddings) regularizer: l2 regularization parameter verbose: the level of verbosity """ def __init__(self, symbols: DefaultVocabulary, tags: DefaultVocabulary, word_rnn: str = "cnn", char_embeddings_size: int = 16, char_conv_layers: int = 1, char_window_size: Union[int, List[int]] = 5, char_filters: Union[int, List[int]] = None, char_filter_multiple: int = 25, char_highway_layers: int = 1, conv_dropout: float = 0.0, highway_dropout: float = 0.0, intermediate_dropout: float = 0.0, lstm_dropout: float = 0.0, word_vectorizers: List[Tuple[int, int]] = None, word_lstm_layers: int = 1, word_lstm_units: Union[int, List[int]] = 128, word_dropout: float = 0.0, regularizer: float = None, verbose: int = 1): self.symbols = symbols self.tags = tags self.word_rnn = word_rnn self.char_embeddings_size = char_embeddings_size self.char_conv_layers = char_conv_layers self.char_window_size = char_window_size self.char_filters = char_filters self.char_filter_multiple = char_filter_multiple self.char_highway_layers = char_highway_layers self.conv_dropout = conv_dropout self.highway_dropout = highway_dropout self.intermediate_dropout = intermediate_dropout self.lstm_dropout = lstm_dropout self.word_dropout = word_dropout self.word_vectorizers = word_vectorizers # a list of additional vectorizer dimensions self.word_lstm_layers = word_lstm_layers self.word_lstm_units = word_lstm_units self.regularizer = regularizer self.verbose = verbose self._initialize() self.build() def _initialize(self): if isinstance(self.char_window_size, int): self.char_window_size = [self.char_window_size] if self.char_filters is None or isinstance(self.char_filters, int): self.char_filters = [self.char_filters] * len(self.char_window_size) if len(self.char_window_size) != len(self.char_filters): raise ValueError("There should be the same number of window sizes and filter sizes") if isinstance(self.word_lstm_units, int): self.word_lstm_units = [self.word_lstm_units] * self.word_lstm_layers if len(self.word_lstm_units) != self.word_lstm_layers: raise ValueError("There should be the same number of lstm layer units and lstm layers") if self.word_vectorizers is None: self.word_vectorizers = [] if self.regularizer is not None: self.regularizer = kreg.l2(self.regularizer) if self.verbose > 0: log.info("{} symbols, {} tags in CharacterTagger".format(self.symbols_number_, self.tags_number_)) @property def symbols_number_(self) -> int: """Character vocabulary size """ return len(self.symbols) @property def tags_number_(self) -> int: """Tag vocabulary size """ return len(self.tags) def build(self): """Builds the network using Keras. """ word_inputs = kl.Input(shape=(None, MAX_WORD_LENGTH+2), dtype="int32") inputs = [word_inputs] word_outputs = self._build_word_cnn(word_inputs) if len(self.word_vectorizers) > 0: additional_word_inputs = [kl.Input(shape=(None, input_dim), dtype="float32") for input_dim, dense_dim in self.word_vectorizers] inputs.extend(additional_word_inputs) additional_word_embeddings = [kl.Dense(dense_dim)(additional_word_inputs[i]) for i, (_, dense_dim) in enumerate(self.word_vectorizers)] word_outputs = kl.Concatenate()([word_outputs] + additional_word_embeddings) outputs, lstm_outputs = self._build_basic_network(word_outputs) compile_args = {"optimizer": ko.nadam(lr=0.002, clipnorm=5.0), "loss": "categorical_crossentropy", "metrics": ["accuracy"]} self.model_ = Model(inputs, outputs) self.model_.compile(**compile_args) if self.verbose > 0: self.model_.summary(print_fn=log.info) return self def _build_word_cnn(self, inputs): """Builds word-level network """ inputs = kl.Lambda(kb.one_hot, arguments={"num_classes": self.symbols_number_}, output_shape=lambda x: tuple(x) + (self.symbols_number_,))(inputs) char_embeddings = kl.Dense(self.char_embeddings_size, use_bias=False)(inputs) conv_outputs = [] self.char_output_dim_ = 0 for window_size, filters_number in zip(self.char_window_size, self.char_filters): curr_output = char_embeddings curr_filters_number = (min(self.char_filter_multiple * window_size, 200) if filters_number is None else filters_number) for _ in range(self.char_conv_layers - 1): curr_output = kl.Conv2D(curr_filters_number, (1, window_size), padding="same", activation="relu", data_format="channels_last")(curr_output) if self.conv_dropout > 0.0: curr_output = kl.Dropout(self.conv_dropout)(curr_output) curr_output = kl.Conv2D(curr_filters_number, (1, window_size), padding="same", activation="relu", data_format="channels_last")(curr_output) conv_outputs.append(curr_output) self.char_output_dim_ += curr_filters_number if len(conv_outputs) > 1: conv_output = kl.Concatenate(axis=-1)(conv_outputs) else: conv_output = conv_outputs[0] highway_input = kl.Lambda(kb.max, arguments={"axis": -2})(conv_output) if self.intermediate_dropout > 0.0: highway_input = kl.Dropout(self.intermediate_dropout)(highway_input) for i in range(self.char_highway_layers - 1): highway_input = Highway(activation="relu")(highway_input) if self.highway_dropout > 0.0: highway_input = kl.Dropout(self.highway_dropout)(highway_input) highway_output = Highway(activation="relu")(highway_input) return highway_output def _build_basic_network(self, word_outputs): """ Creates the basic network architecture, transforming word embeddings to intermediate outputs """ if self.word_dropout > 0.0: lstm_outputs = kl.Dropout(self.word_dropout)(word_outputs) else: lstm_outputs = word_outputs for j in range(self.word_lstm_layers-1): lstm_outputs = kl.Bidirectional( kl.LSTM(self.word_lstm_units[j], return_sequences=True, dropout=self.lstm_dropout))(lstm_outputs) lstm_outputs = kl.Bidirectional( kl.LSTM(self.word_lstm_units[-1], return_sequences=True, dropout=self.lstm_dropout))(lstm_outputs) pre_outputs = kl.TimeDistributed( kl.Dense(self.tags_number_, activation="softmax", activity_regularizer=self.regularizer), name="p")(lstm_outputs) return pre_outputs, lstm_outputs def _transform_batch(self, data, labels=None, transform_to_one_hot=True): data, additional_data = data[0], data[1:] L = max(len(x) for x in data) X = np.array([self._make_sent_vector(x, L) for x in data]) X = [X] + [np.array(x) for x in additional_data] if labels is not None: Y = np.array([self._make_tags_vector(y, L) for y in labels]) if transform_to_one_hot: Y = to_one_hot(Y, len(self.tags)) return X, Y else: return X def train_on_batch(self, data: List[Iterable], labels: Iterable[list]) -> None: """Trains model on a single batch Args: data: a batch of word sequences labels: a batch of correct tag sequences Returns: the trained model """ X, Y = self._transform_batch(data, labels) self.model_.train_on_batch(X, Y) def predict_on_batch(self, data: Union[list, tuple], return_indexes: bool = False) -> List[List[str]]: """ Makes predictions on a single batch Args: data: a batch of word sequences together with additional inputs return_indexes: whether to return tag indexes in vocabulary or tags themselves Returns: a batch of label sequences """ X = self._transform_batch(data) objects_number, lengths = len(X[0]), [len(elem) for elem in data[0]] Y = self.model_.predict_on_batch(X) labels = np.argmax(Y, axis=-1) answer: List[List[str]] = [None] * objects_number for i, (elem, length) in enumerate(zip(labels, lengths)): elem = elem[:length] answer[i] = elem if return_indexes else self.tags.idxs2toks(elem) return answer def _make_sent_vector(self, sent: List, bucket_length: int =None) -> np.ndarray: """Transforms a sentence to Numpy array, which will be the network input. Args: sent: input sentence bucket_length: the width of the bucket Returns: A 3d array, answer[i][j][k] contains the index of k-th letter in j-th word of i-th input sentence. """ bucket_length = bucket_length or len(sent) answer = np.zeros(shape=(bucket_length, MAX_WORD_LENGTH+2), dtype=np.int32) for i, word in enumerate(sent): answer[i, 0] = self.tags.tok2idx("BEGIN") m = min(len(word), MAX_WORD_LENGTH) for j, x in enumerate(word[-m:]): answer[i, j+1] = self.symbols.tok2idx(x) answer[i, m+1] = self.tags.tok2idx("END") answer[i, m+2:] = self.tags.tok2idx("PAD") return answer def _make_tags_vector(self, tags, bucket_length=None) -> np.ndarray: """Transforms a sentence of tags to Numpy array, which will be the network target. Args: tags: input sentence of tags bucket_length: the width of the bucket Returns: A 2d array, answer[i][j] contains the index of j-th tag in i-th input sentence. """ bucket_length = bucket_length or len(tags) answer = np.zeros(shape=(bucket_length,), dtype=np.int32) for i, tag in enumerate(tags): answer[i] = self.tags.tok2idx(tag) return answer def save(self, outfile) -> None: """Saves model weights to a file Args: outfile: file with model weights (other model components should be given in config) """ self.model_.save_weights(outfile) def load(self, infile) -> None: """Loads model weights from a file Args: infile: file to load model weights from """ self.model_.load_weights(infile)
max_pool = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same')(multi) incept_1 = inception_module(max_pool, 72, 64) max_pool = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same')(incept_1) incept_2 = inception_module(max_pool, 128, 96) max_pool = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same')(incept_2) output = Flatten()(max_pool) output = Dense(512, activation='relu')(output) output = Dense(20, activation='softmax')(output) model = Model(inputs=input_spectrum, outputs=output) print(model.summary()) adam = Adam() model.compile(adam, loss='categorical_crossentropy', metrics=['accuracy']) history = model.fit(train_x, train_y, epochs=1, batch_size=256, validation_data=(val_x, val_y)) plot_accuracy(history) plot_confusion_matrix(model, val_x, labeled_val_y) pred_test_y = np.argmax(model.predict(test_x), axis=1) np.save('results.npy', pred_test_y)
def build(self, sentence_length, word_length, num_labels, num_intent_labels, word_vocab_size, char_vocab_size, word_emb_dims=100, char_emb_dims=25, char_lstm_dims=25, tagger_lstm_dims=100, dropout=0.2, embedding_matrix=None): """ Build a model Args: sentence_length (int): max sentence length word_length (int): max word length (in characters) num_labels (int): number of slot labels num_intent_labels (int): number of intent classes word_vocab_size (int): word vocabulary size char_vocab_size (int): character vocabulary size word_emb_dims (int, optional): word embedding dimensions char_emb_dims (int, optional): character embedding dimensions char_lstm_dims (int, optional): character feature LSTM hidden size tagger_lstm_dims (int, optional): tagger LSTM hidden size dropout (float, optional): dropout rate embedding_matrix (dict, optional): external word embedding dictionary """ if embedding_matrix is not None: # load pre-trained word embeddings into an Embedding layer # note that we set trainable = False so as to keep the embeddings fixed embedding_layer = Embedding(word_vocab_size, word_emb_dims, weights=[embedding_matrix], input_length=sentence_length, trainable=True, name='word_embedding_layer') else: # learn embeddings ourselves embedding_layer = Embedding(word_vocab_size, word_emb_dims, input_length=sentence_length, name='word_embedding_layer') # create word embedding input and embedding layer words_input = Input(shape=(sentence_length,), name='words_input') word_embeddings = embedding_layer(words_input) word_embeddings = Dropout(dropout)(word_embeddings) # create word character input and embeddings layer word_chars_input = Input(shape=(sentence_length, word_length), name='word_chars_input') char_embedding_layer = Embedding(char_vocab_size, char_emb_dims, input_length=word_length, name='char_embedding_layer') # apply embedding to each word char_embeddings = TimeDistributed(char_embedding_layer)(word_chars_input) # feed dense char vectors into BiLSTM char_embeddings = TimeDistributed(Bidirectional(LSTM(char_lstm_dims)))(char_embeddings) char_embeddings = Dropout(dropout)(char_embeddings) # first BiLSTM layer (used for intent classification) first_bilstm_layer = Bidirectional( LSTM(tagger_lstm_dims, return_sequences=True, return_state=True)) first_lstm_out = first_bilstm_layer(word_embeddings) lstm_y_sequence = first_lstm_out[:1][0] # save y states of the LSTM layer states = first_lstm_out[1:] hf, cf, hb, cb = states # extract last hidden states h_state = concatenate([hf, hb], axis=-1) intent_out = Dense(num_intent_labels, activation='softmax', name='intent_classifier_output')(h_state) # create the 2nd feature vectors combined_features = concatenate([lstm_y_sequence, char_embeddings], axis=-1) # 2nd BiLSTM layer for label classification second_bilstm_layer = Bidirectional( LSTM(tagger_lstm_dims, return_sequences=True))(combined_features) second_bilstm_layer = Dropout(dropout)(second_bilstm_layer) # feed BiLSTM vectors into CRF crf = CRF(num_labels, sparse_target=False) labels_out = crf(second_bilstm_layer) # compile the model model = Model(inputs=[words_input, word_chars_input], outputs=[intent_out, labels_out]) # define losses and metrics loss_f = {'intent_classifier_output': 'categorical_crossentropy', 'crf_1': crf.loss_function} metrics = {'intent_classifier_output': 'categorical_accuracy', 'crf_1': crf.accuracy} model.compile(loss=loss_f, optimizer='adam', metrics=metrics) self.model = model
class NNClassifier(ClassifierMixin): """ Neural Network classifier, implements the same methods as the sklearn models to make it simple to add """ # noinspection PyTypeChecker def __init__(self, **kwargs: Dict[str, Union[int, str, float]]): """initializes the Neural Network classifier :param kwargs: configuration containing the predictive_model parameters, encoding and training parameters """ self._n_hidden_layers = int(kwargs['n_hidden_layers']) self._n_hidden_units = int(kwargs['n_hidden_units']) self._activation = str(kwargs['activation']) self._n_epochs = int(kwargs['n_epochs']) self._encoding = str(kwargs['encoding']) self._dropout_rate = float(kwargs['dropout_rate']) self._is_binary_classifier = bool(kwargs['is_binary_classifier']) self._encoding_parser = EncodingParser(self._encoding, self._is_binary_classifier, task=PredictiveModels.CLASSIFICATION.value) self._model = None def fit(self, train_data: DataFrame, targets: ndarray) -> None: """creates and fits the predictive_model first the encoded data is parsed, then the predictive_model created and then trained :param train_data: encoded training dataset :param targets: encoded target dataset """ targets = DataFrame(targets, columns=['label']) train_data = self._encoding_parser.parse_training_dataset(train_data) targets = self._encoding_parser.parse_targets(targets) model_inputs = Input(train_data.shape[1:]) predicted = model_inputs if self._encoding in ['simpleIndex', 'complex', 'lastPayload']: predicted = Flatten()(predicted) for _ in range(self._n_hidden_layers): predicted = Dense(self._n_hidden_units, activation=self._activation)(predicted) predicted = Dropout(self._dropout_rate)(predicted) if self._is_binary_classifier: predicted = Dense(1, activation='sigmoid')(predicted) else: predicted = Dense(targets.shape[1], activation='softmax')(predicted) self._model = Model(model_inputs, predicted) if self._is_binary_classifier: self._model.compile(loss='binary_crossentropy', optimizer='adam') else: self._model.compile(loss='categorical_crossentropy', optimizer='adam') self._model.fit(train_data, targets, epochs=self._n_epochs) def predict(self, test_data: DataFrame) -> ndarray: """returns predictive_model predictions parses the encoded test dataset, then returns the predictive_model predictions :param test_data: encoded test dataset :return: predictive_model predictions """ test_data = self._encoding_parser.parse_testing_dataset(test_data) predictions = self._model.predict(test_data) if self._is_binary_classifier: predictions = predictions.astype(bool) else: predictions = np.argmax(predictions, -1) return predictions def predict_proba(self, test_data: DataFrame) -> ndarray: """returns the classification probability parses the test dataset and returns the raw prediction probabilities of the predictive_model :param test_data: encoded test dataset :return: predictive_model prediction probabilities """ test_data = self._encoding_parser.parse_testing_dataset(test_data) predictions = self._model.predict(test_data) if self._is_binary_classifier: predictions = np.max(predictions, -1) predictions = np.vstack((1 - predictions, predictions)).T return predictions def reset(self) -> None: """
class EncDecIntentModel(IntentExtractionModel): """ Encoder Decoder Deep LSTM Tagger Model """ def __init__(self): super(EncDecIntentModel, self).__init__() def build(self, sentence_length, vocab_size, tag_labels, token_emb_size=100, encoder_depth=1, decoder_depth=1, lstm_hidden_size=100, encoder_dropout=0.5, decoder_dropout=0.5, emb_model_path=None): """ Build the model Args: sentence_length (int): max sentence length vocab_size (int): vocabulary size tag_labels (int): number of tag labels token_emb_size (int, optional): token embedding vector size encoder_depth (int, optional): number of encoder LSTM layers decoder_depth (int, optional): number of decoder LSTM layers lstm_hidden_size (int, optional): LSTM layers hidden size encoder_dropout (float, optional): encoder dropout decoder_dropout (float, optional): decoder dropout emb_model_path (str, optional): external embedding model path """ tokens_input, token_emb = self._create_input_embed(sentence_length, emb_model_path is not None, token_emb_size, vocab_size) benc_in = token_emb assert encoder_depth > 0, 'Encoder depth must be > 0' for i in range(encoder_depth): bencoder = LSTM(lstm_hidden_size, return_sequences=True, return_state=True, go_backwards=True, dropout=encoder_dropout, name='encoder_blstm_{}'.format(i))(benc_in) benc_in = bencoder[0] b_states = bencoder[1:] benc_h, bene_c = b_states decoder_inputs = token_emb assert decoder_depth > 0, 'Decoder depth must be > 0' for i in range(decoder_depth): decoder = LSTM(lstm_hidden_size, return_sequences=True, name='decoder_lstm_{}'.format(i))(decoder_inputs, initial_state=[benc_h, bene_c]) decoder_inputs = decoder decoder_outputs = Dropout(decoder_dropout)(decoder) decoder_predictions = TimeDistributed( Dense(tag_labels, activation='softmax'), name='decoder_classifier')(decoder_outputs) self.model = Model(tokens_input, decoder_predictions) self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
# %% from keras import Input, layers from keras import Model input_tensor = Input(shape=(64,)) x = layers.Dense(32, activation='relu')(input_tensor) x = layers.Dense(32, activation='relu')(x) output_tensor = layers.Dense(10, activation='softmax')(x) model = Model(input_tensor, output_tensor) model.summary() # %% import numpy as np model.compile(optimizer='rmsprop', loss='categorical_crossentropy') x_train = np.random.random((1000, 64)) y_train = np.random.random((1000, 10)) model.fit(x_train, y_train, epochs=10, batch_size=128) score = model.evaluate(x_train, y_train)