def resnet_50(input_shape): img_input = Input(input_shape) x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', name='conv1')(img_input) if input_shape[-1] > 3: x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', name='conv1_changed')(img_input) x = BatchNormalization(name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') print("Loading pretrained weights for Resnet50...") weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', resnet50_padding.WEIGHTS_PATH_NO_TOP, cache_subdir='models', md5_hash='a268eb855778b3df3c7506639542a6af') model = Model(img_input, x) model.load_weights(weights_path, by_name=True) if input_shape[-1] > 3: print("Loading weights for conv1 layer separately for the first 3 channels") conv1_weights = np.zeros((7, 7, input_shape[-1], 64), dtype="float32") resnet_ori = ResNet50(include_top=False, input_shape=(224, 224, 3)) conv1_weights[:, :, :3, :] = resnet_ori.get_layer("conv1").get_weights()[0][:, :, :, :] # random init conv1_weights[:, :, 3:, :] = model.get_layer('conv1_changed').get_weights()[0][:, :, 3:, :] bias = resnet_ori.get_layer("conv1").get_weights()[1] model.get_layer('conv1_changed').set_weights((conv1_weights, bias)) model.get_layer('conv1_changed').name = 'conv1' return model
Validation_file_names = get_image_file_names(Validation_dir, 3650) # Set the early stopping early_stopping = EarlyStopping(monitor='val_acc', patience=EarlyStopping_patience, mode='auto') # Set the checkpoint checkpoint = ModelCheckpoint(Models_filepath, monitor='val_acc', verbose=1, save_best_only=False) # Check if have any previous weight if os.path.exists("./Models/weights-resnet-network-01-0.44.hdf5"): model.load_weights("./Models/weights-resnet-network-01-0.44.hdf5") print("Check point loaded!") # Start trainning model.compile(optimizer='adam', loss='mse', metrics=['accuracy']) # keras.backend.get_session().run(tf.global_variables_initializer()) history = model.fit_generator( generator=get_train_batch(Trainning_file_names, Batch_size, img_W, img_H), epochs=Epochs, steps_per_epoch=Steps_per_epoch, verbose=1, validation_data=get_train_batch(Validation_file_names, Batch_size, img_W, img_H), callbacks=[checkpoint, early_stopping], validation_steps=Val_Steps_per_epoch)
def VGGUnet(n_classes, input_height=416, input_width=608, vgg16NoTopWeights=None): assert input_height % 32 == 0 assert input_width % 32 == 0 IMAGE_ORDERING = 'channels_first' if IMAGE_ORDERING == 'channels_last': concat_axis = 3 input_shape = input_height, input_width, 3 elif IMAGE_ORDERING == 'channels_first': concat_axis = 1 input_shape = 3, input_height, input_width else: raise Exception('Unexpected IMAGE_ORDERING') # https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5 img_input = Input(shape=input_shape) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', data_format=IMAGE_ORDERING)(img_input) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', data_format=IMAGE_ORDERING)(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool', data_format=IMAGE_ORDERING)(x) f1 = x # Block 2 x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', data_format=IMAGE_ORDERING)(x) x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', data_format=IMAGE_ORDERING)(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool', data_format=IMAGE_ORDERING)(x) f2 = x # Block 3 x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', data_format=IMAGE_ORDERING)(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', data_format=IMAGE_ORDERING)(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', data_format=IMAGE_ORDERING)(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool', data_format=IMAGE_ORDERING)(x) f3 = x # Block 4 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', data_format=IMAGE_ORDERING)(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', data_format=IMAGE_ORDERING)(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', data_format=IMAGE_ORDERING)(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool', data_format=IMAGE_ORDERING)(x) f4 = x # Block 5 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', data_format=IMAGE_ORDERING)(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', data_format=IMAGE_ORDERING)(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', data_format=IMAGE_ORDERING)(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool', data_format=IMAGE_ORDERING)(x) # f5 = x if vgg16NoTopWeights: vgg = Model(img_input, x) vgg.load_weights(vgg16NoTopWeights) o = f4 o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) o = (Conv2D(512, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o) o = (BatchNormalization())(o) o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o) o = (concatenate([o, f3], axis=concat_axis)) o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) o = (Conv2D(256, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o) o = (BatchNormalization())(o) o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o) o = (concatenate([o, f2], axis=concat_axis)) o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) o = (Conv2D(128, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o) o = (BatchNormalization())(o) o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o) o = (concatenate([o, f1], axis=concat_axis)) o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) o = (Conv2D(64, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o) o = (BatchNormalization())(o) o = Conv2D(n_classes, (3, 3), padding='same', data_format=IMAGE_ORDERING)(o) o_shape = Model(img_input, o).output_shape outputHeight = o_shape[2] outputWidth = o_shape[3] o = (Reshape((n_classes, outputHeight * outputWidth)))(o) o = (Permute((2, 1)))(o) o = (Activation('softmax'))(o) model = Model(img_input, o) model.outputWidth = outputWidth model.outputHeight = outputHeight return model
def vgg16_2d(image_rows=256, image_cols=256, input_channels=3, train_encoder=True): inputs = layers.Input((image_rows, image_cols, input_channels)) # Block 1 x = layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(inputs) x = layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) # Block 2 x = layers.Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x) x = layers.Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) # Block 3 x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x) x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x) x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) # Block 4 x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) # Block 5 x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x) x_output = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) model = Model(inputs=[inputs], outputs=[x_output], name='vgg16') model.summary() weights_path = utils.get_file( 'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5', WEIGHTS_PATH_NO_TOP, cache_subdir='models', file_hash='6d6bbae143d832006294945121d1f1fc') model.load_weights(weights_path) return model
# Env env = Environment(max_steps=1000) # Model Definition if agent_type == "keras-rl": the_input = Input((1, ) + env.render().shape) flatten = Flatten()(the_input) x = Dense(256, activation='relu')(flatten) x = Dense(1024, activation='relu')(x) x = Dense(1024, activation='relu')(x) x = Dense(1024, activation='relu')(x) x = Dense(196, activation='linear')(x) model = Model(inputs=[the_input], outputs=[x]) model.load_weights('pretrained.h5') model.compile(optimizer='adam', loss='mse') agent_spec["model"] = model # Agent Init agent = agent_class(**agent_spec) print("Starting experiment for %s." % name) # Agent Train agent.compile(Adam(lr=1e-2), metrics=['mse']) history = agent.fit(env, nb_steps=EPISODES*150, nb_max_episode_steps=1000, visualize=False, verbose=2) # Fetch Train Summary summary_step = history.history["nb_episode_steps"][:EPISODES]
input = Input(shape=(max_word_length,)) embedding = Embedding(len(tokenizer.word_index) + 1, 128)(input) embedding = SpatialDropout1D(0.2)(embedding) capsule = Capsule(num_classes, 8, 10, True)(embedding) output = Lambda(lambda x : K.sqrt(K.sum(K.square(x), 2)), output_shape=(num_classes, ))(capsule) model = Model(inputs=input, outputs=output) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) model_weight_file = './model_capsule.h5' model_file = './model_capsule.model' early_stopping = EarlyStopping(monitor='val_loss', patience=5) model_checkpoint = ModelCheckpoint(model_weight_file, save_best_only=True, save_weights_only=True) model.fit(x_train_word_index, y_train_index, batch_size=8, epochs=1000, verbose=2, callbacks=[early_stopping, model_checkpoint], validation_data=(x_dev_word_index, y_dev_index), shuffle=True) model.load_weights(model_weight_file) model.save(model_file) evaluate = model.evaluate(x_test_word_index, y_test_index, batch_size=8, verbose=2) print('loss value=' + str(evaluate[0])) print('metrics value=' + str(evaluate[1])) # loss value=0.7480950128464472 # metrics value=0.7619047609586564
concatenation = concatenate([abs_x_minus_y, x_mult_y]) fcnn_input = Reshape((600, ))(concatenation) fcnn_layer_one = Dense(len(scores[0]), input_shape=(600, ), activation='softmax')(fcnn_input) model = Model(inputs=[sent1_input, sent2_input], outputs=[fcnn_layer_one]) print(model.summary()) filepath = path + 'lstm_weights.last.hdf5' exists = os.path.isfile(filepath) if exists: model.load_weights(filepath) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=False, save_weights_only=True, mode='auto') model.fit([data1, data2], scores, validation_data=([valid1, valid2], valid_scores),
dense_input = Input(shape=(7, 7, 512)) dense_output = Flatten(name='flatten')(dense_input) dense_output = Dense(dense_layer_1, activation='relu', name='fc1')(dense_output) dense_output = Dense(dense_layer_2, activation='relu', name='fc2')(dense_output) dense_output = Dense(num_classes, activation='softmax', name='predictions')(dense_output) top_model = Model(inputs=dense_input, outputs=dense_output, name='top_model') # from: https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html # note that it is necessary to start with a fully-trained # classifier, including the top classifier, # in order to successfully do fine-tuning top_model.load_weights(top_model_weights_path) block5_pool = vgg16.get_layer('block5_pool').output # Now combine the two models full_output = top_model(block5_pool) full_model = Model(inputs=vgg16.input, outputs=full_output) # set the first 15 layers (up to the last conv block) # to non-trainable (weights will not be updated) # WARNING: this may not be applicable for Inception V3 for layer in full_model.layers[:15]: layer.trainable = False # Verify things look as expected full_model.summary()
import cv2 from keras.applications import DenseNet121 from keras.layers import Dense, GlobalAveragePooling2D from keras import Model IMG_PATH = ['./chest_xray_images/normal/15268.jpg', '0'] IMG_SHAPE = (320, 320, 3) test_img = load_img(path=IMG_PATH[0], color_mode='grayscale') test_img = img_to_array(img=test_img, data_format='channels_last') test_img = cv2.resize(test_img, dsize=IMG_SHAPE[:2], interpolation=cv2.INTER_NEAREST) test_img = np.expand_dims(test_img, axis=-1) test_img = test_img.astype(np.uint8) test_img = test_img / 255. test_img = np.concatenate((test_img, test_img, test_img), axis=-1) print('external image(s) shape:', test_img.shape) backbone = DenseNet121(include_top=False, weights=None, input_shape=(320, 320, 3)) backbone_out = backbone.output gap = GlobalAveragePooling2D(name='pooling_layer')(backbone_out) output = Dense(units=14, activation='softmax', name='output_layer')(gap) chexnet_model = Model(inputs=backbone.input, outputs=output) chexnet_model.summary() chexnet_model.load_weights('C:/Users/Arman/Desktop/Covid19-Detection/checkpoints/CheXNet/CheXNet_v0.3.0.h5') chexnet_model.compile(optimizer='adam', loss='binary_crossentropy') chexnet_model.save(filepath='./checkpoints/CheXNet/CheXNet_model.hdf5') print('sample prediction: \n', chexnet_model.predict(np.expand_dims(test_img, axis=0)))
batch_size=64, epochs=50, verbose=2, callbacks=[checkpoint]) # get the most recent file in the job directory which happens to be the last best model best_model_file = get_most_recent_file(job_dir) # save current model so that the training can be resumed later vae.save(job_dir + '/model.h5') else: # get the most recent file in the job directory which happens to be the last best model best_model_file = get_most_recent_file(job_dir) # load weights from the best model vae.load_weights(best_model_file) print('loaded weights from', best_model_file) encoder = Model(in_, z_mu) train_xhat = encoder.predict(train_x) test_xhat = encoder.predict(test_x) if latent_dim in [1, 2]: test_xhat_nonfraud = test_xhat[test_y == 0] test_xhat_fraud = test_xhat[test_y == 1] if latent_dim == 1: plt.scatter(test_xhat_nonfraud, np.zeros_like(test_xhat_nonfraud), color='b', alpha=0.25,
def train_model(data, topic, PROCESSED_DIR, SEED_FOLDER, **kwargs): def func(x): liste = [] for i in range(sent_len): temp = TimeDistributed(paths_lstm_1)( x[:, i, :, :, :] ) # [bs, max_paths, max_path_len, emb_dim] * sent_len temp = TimeDistributed(paths_lstm_2)( temp) # [bs, max_paths, max_path_len, emb_dim] * sent_len temp = TimeDistributed(paths_lstm_last)( temp) # [bs, max_paths, max_path_len, emb_dim] * sent_len liste.append(temp) stacked = K.stack(liste, axis=1) return stacked dropout = kwargs['model_settings']["dropout"] lstm_size = kwargs['model_settings']["lstm_size"] monitor = kwargs['model_settings']["monitor"] batch_size = kwargs['model_settings']["batch_size"] epochs = kwargs['model_settings']["epochs"] learning_rate = kwargs['model_settings']["learning_rate"] train_embeddings = kwargs['model_settings']["train_embeddings"] # model file eg: 'results/only_sub_and_inst/model_runs/EvLSTM/seed_0/death_penalty_threelabel_crossdomain_monitor-f1_macro_do-0.3_lsize-32_bs-32_epochs-20_lr-0.001_trainemb-False_kl-only_sub_and_inst' model_file = SEED_FOLDER + topic + "_" + kwargs['model_settings'][ "model_file_suffix"] seed = kwargs['model_settings']['current_seed'] # clear default graph (new model now) #tf.reset_default_graph() # set configs for memory usage and reproducibility: https://stackoverflow.com/questions/38469632/tensorflow-non-repeatable-results os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['PYTHONHASHSEED'] = str(seed) np.random.seed(seed) rn.seed(seed) config = tf.ConfigProto() config.gpu_options.allow_growth = False config.gpu_options.per_process_gpu_memory_fraction = 0.3 np.random.seed(seed) #graph_level_seed = seed operation_level_seed = seed #tf.set_random_seed(graph_level_seed) # load embeddings emb_sents = np.load(PROCESSED_DIR + "index_to_vec_we" + kwargs['model_settings']['word_embeddings'][1] + ".npy") emb_knowledge = np.load(PROCESSED_DIR + "index_to_vec_kge" + kwargs['model_settings']['kg_embeddings'][1] + ".npy") # load data X_train, X_dev, X_test = data["X_train"], data["X_dev"], data[ "X_test"] # [samples, sent_len] kX_train, kX_dev, kX_test = data["kX_train"], data["kX_dev"], data[ "kX_test"] # [samples, sent_len, max_concepts] y_train, y_dev, y_test = data["y_train"], data["y_dev"], data["y_test"] val_y_non_one_hot = [np.argmax(pred) for pred in y_dev] # some constants sent_len = X_train.shape[1] max_paths = kX_train.shape[2] max_path_len = kX_train.shape[3] num_labels = y_train.shape[1] attention_size = kwargs['model_settings'].get('attention_size', emb_sents.shape[1]) ############################ # KNOWLEDGE PROCESSING # ############################ # input for all concepts of a sentence sentence_inputs = Input(shape=(sent_len, ), dtype='int32', name="sentence_inputs") knowledge_inputs = Input(shape=( sent_len, max_paths, max_path_len, ), dtype='int32', name="knowledge_inputs") emb_knowledge_ids = Embedding( emb_knowledge.shape[0], emb_knowledge.shape[1], mask_zero=True, weights=[emb_knowledge], trainable=train_embeddings)( knowledge_inputs) # [samples, sent_len, max_concepts, kge_dim] embedded_word_ids = Embedding( emb_sents.shape[0], emb_sents.shape[1], mask_zero=True, weights=[emb_sents], trainable=train_embeddings, input_length=sent_len)(sentence_inputs) # [samples, sent_len, we_dim] # function that reduces the paths to a single vector => from there on, model is equal to the shallow model # in: [bs, sent_len, max_concepts, max_path_len, kge_dim], out: [bs, sent_len, max_concepts, 2*lstm_size] paths_lstm_1 = LSTM(lstm_size, return_sequences=True ) # define lstm that reduces the paths to one vector paths_lstm_2 = LSTM(lstm_size, return_sequences=True ) # define lstm that reduces the paths to one vector paths_lstm_last = LSTM( lstm_size) # define lstm that reduces the paths to one vector reduce_paths_to_vector = Lambda( func, output_shape=(sent_len, max_paths, lstm_size))(emb_knowledge_ids) attended_knowledge = attention_knowledge( embedded_word_ids, None, attention_size, return_alphas=False, summed_up=True)(reduce_paths_to_vector) concat_sequences = Lambda(lambda x: tf.concat([x[0], x[1]], axis=-1))( [embedded_word_ids, attended_knowledge]) # define bilstm + dropout sent_bilstm = Bidirectional(LSTM(lstm_size))(concat_sequences) sent_bilstm_dropout = Dropout(dropout)(sent_bilstm) output_layer = Dense(num_labels, activation='softmax')(sent_bilstm_dropout) model = Model(inputs=[sentence_inputs, knowledge_inputs], outputs=output_layer) adam = Adam(lr=learning_rate) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) #e = EarlyStopping(monitor=monitor, mode='auto') e = ModelCheckpoint(model_file, monitor=monitor, verbose=0, save_best_only=True, save_weights_only=True, mode='auto', period=1) model.fit([X_train, kX_train], y_train, batch_size=batch_size, epochs=epochs, validation_data=([X_dev, kX_dev], y_dev), callbacks=[e], verbose=1) model.load_weights(model_file) y_pred_test = model.predict([X_test, kX_test], verbose=False) y_pred_dev = model.predict([X_dev, kX_dev], verbose=False) return [np.argmax(pred) for pred in y_pred_test], [np.argmax(pred) for pred in y_pred_dev]
def execute(self, kf: datasets.KFoldedDataSet, model: keras.Model, ec: ExecutionConfig): if 'unfreeze_encoder' in self.dict and self.dict['unfreeze_encoder']: set_trainable(model) if self.loss or self.lr: self.cfg.compile(model, self.cfg.createOptimizer(self.lr), self.loss) cb = [] + self.cfg.callbacks if self.initial_weights is not None: model.load_weights(self.initial_weights) if 'callbacks' in self.dict: cb = configloader.parse("callbacks", self.dict['callbacks']) if 'extra_callbacks' in self.dict: cb = configloader.parse("callbacks", self.dict['extra_callbacks']) kepoch = -1 if self.cfg.resume: kepoch = maxEpoch(ec.metricsPath()) if kepoch != -1: self.epochs = self.epochs - kepoch if os.path.exists(ec.weightsPath()): model.load_weights(ec.weightsPath()) cb.append( CSVLogger(ec.metricsPath(), append=True, start=kepoch)) else: cb.append(CSVLogger(ec.metricsPath())) kepoch = 0 else: kepoch = 0 cb.append(CSVLogger(ec.metricsPath())) md = self.cfg.primary_metric_mode if self.cfg.gpus > 1: cb.append( alt.AltModelCheckpoint(ec.weightsPath(), save_best_only=True, monitor=self.cfg.primary_metric, mode=md, verbose=1)) else: cb.append( keras.callbacks.ModelCheckpoint( ec.weightsPath(), save_best_only=True, monitor=self.cfg.primary_metric, mode=md, verbose=1)) cb.append( DrawResults(self.cfg, kf, ec.fold, ec.stage, negatives=self.negatives)) if self.cfg.showDataExamples: cb.append( DrawResults(self.cfg, kf, ec.fold, ec.stage, negatives=self.negatives, train=True)) if self.epochs - kepoch == 0: return if self.cfg.gpus > 1: model = multi_gpu_model(model, self.cfg.gpus, True, True) kf.trainOnFold(ec.fold, model, cb, self.epochs - kepoch, self.negatives, subsample=ec.subsample, validation_negatives=self.validation_negatives) pass
from keras.applications import DenseNet121 from keras.layers import Dense, GlobalAveragePooling2D from keras import Model IMG_PATH = ['./chest_xray_images/normal/15268.jpg', '0'] IMG_SHAPE = (320, 320, 3) test_img = load_img(path=IMG_PATH[0], color_mode='grayscale') test_img = img_to_array(img=test_img, data_format='channels_last') test_img = cv2.resize(test_img, dsize=IMG_SHAPE[:2], interpolation=cv2.INTER_NEAREST) test_img = np.expand_dims(test_img, axis=-1) test_img = test_img.astype(np.uint8) test_img = test_img / 255. test_img = np.concatenate((test_img, test_img, test_img), axis=-1) print('external image(s) shape:', test_img.shape) backbone = DenseNet121(include_top=False, weights=None, input_shape=(320, 320, 3)) backbone_out = backbone.output gap = GlobalAveragePooling2D(name='pooling_layer')(backbone_out) output = Dense(units=14, activation='sigmoid', name='output_layer')(gap) predictor = Model(inputs=backbone.input, outputs=output) print(predictor.summary()) predictor.load_weights( 'C:/Users/Arman/Desktop/Covid19-Detection/checkpoints/CheXNet/CheXNet_v0.3.0.h5' ) print(predictor.predict(np.expand_dims(test_img, axis=0)))
class HybridModel(object): def __init__(self, C=4, V=40000, MAX_LEN=600, MAX_LEN_TERM=300, NUM_FEAT=8, char_embed_matrix=None, term_embed_matrix=None, use_multi_task=False, name='hybridmodel.h5', PE=False): #+bn2 0.975 +bn1 0.986 #+bn1,max+avg pool 0.987 #squeeze embedding (128)0.985 (64+conv64)0.983 #去除子网络的dense 0.987 squeeze embedding+relu 0.985 #conv 64 0.987 conv 128 0.988 self.name = name self.use_multi_task = use_multi_task input = Input(shape=(MAX_LEN, ), dtype='int32') #CNN不支持mask,即 mask_zero=True if char_embed_matrix is None: x = Embedding(V, 32)(input) else: embed1 = Embedding(char_embed_matrix.shape[0], char_embed_matrix.shape[1], weights=[char_embed_matrix], trainable=False) embed2 = Embedding(char_embed_matrix.shape[0], char_embed_matrix.shape[1], weights=[char_embed_matrix], trainable=True) x = embed1(input) x2 = embed2(input) x = Concatenate()([x, x2]) # x = Dense(64, activation='relu')(x) if PE: echar_input = Input(shape=(MAX_LEN, ), dtype='int32', name='PE_char_in') ex_char = Embedding(MAX_LEN, 32, name='PEchar')(echar_input) x = Concatenate()([x, ex_char]) kss = [2, 3, 4, 5] hs = [] for ks in kss: h = Conv1D(128, ks, activation='relu', padding='same')(x) h1 = GlobalMaxPool1D()(h) h2 = GlobalAveragePooling1D()(h) hs.append(h1) hs.append(h2) hs = Concatenate()(hs) # hs = Dense(128, activation='relu')(hs) if self.use_multi_task: y1 = Dense(C, activation='softmax', name='y1')(hs) input_term = Input(shape=(MAX_LEN_TERM, ), dtype='int32') if term_embed_matrix is None: xterm = Embedding(V, 32)(input_term) else: embed1 = Embedding(term_embed_matrix.shape[0], term_embed_matrix.shape[1], weights=[term_embed_matrix], trainable=False) embed2 = Embedding(term_embed_matrix.shape[0], term_embed_matrix.shape[1], weights=[term_embed_matrix], trainable=True) xterm = embed1(input_term) xterm2 = embed2(input_term) xterm = Concatenate()([xterm, xterm2]) # xterm = Dense(64, activation='relu')(xterm) if PE: eterm_input = Input(shape=(MAX_LEN_TERM, ), dtype='int32', name='PE_term_in') ex_term = Embedding(MAX_LEN_TERM, 32, name='PEterm')(eterm_input) xterm = Concatenate()([xterm, ex_term]) hsterm = [] for ks in kss: h = Conv1D(128, ks, activation='relu', padding='same')(xterm) h1 = GlobalMaxPool1D()(h) h2 = GlobalAveragePooling1D()(h) hsterm.append(h1) hsterm.append(h2) hsterm = Concatenate()(hsterm) # hsterm = Dense(128, activation='relu')(hsterm) input_feat = Input(shape=(NUM_FEAT, ), dtype='float32') hfeat = Dense(8, activation='relu')(input_feat) hs = Concatenate()([hs, hsterm, hfeat]) hs = BatchNormalization()(hs) z = Dense(128, activation='relu')(hs) # z = BatchNormalization()(z) z = Dense(C, activation='softmax', name='y')(z) if PE: model = Model( [input, input_term, input_feat, echar_input, eterm_input], z) else: model = Model([input, input_term, input_feat], z) opt = Adagrad(lr=0.005) # opt = Adam() model.compile(opt, 'categorical_crossentropy', metrics=['acc']) self.model = model if self.use_multi_task: y2 = Dense(C, activation='softmax', name='y2')(hsterm) y3 = Dense(C, activation='softmax', name='y3')(hfeat) if PE: self.train_model = Model( [input, input_term, input_feat, echar_input, eterm_input], [z, y1, y2, y3]) else: self.train_model = Model([input, input_term, input_feat], [z, y1, y2, y3]) self.train_model.compile(opt, 'categorical_crossentropy', metrics=['acc']) def load_weights(self, name=None): if name is None: save_path = self.name else: save_path = name if self.use_multi_task: self.train_model.load_weights(save_path) else: self.model.load_weights(save_path) def train(self, x, y, x_val, y_val, x_ts, y_ts): early_stop = EarlyStopping(min_delta=0.01, patience=2) save_path = self.name save_best = ModelCheckpoint(save_path, save_best_only=True) if self.use_multi_task: self.train_model.fit( x, [y, y, y, y], validation_data=[x_val, [y_val, y_val, y_val, y_val]], batch_size=128, epochs=20, callbacks=[early_stop, save_best]) else: self.model.fit(x, y, validation_data=[x_val, y_val], batch_size=128, epochs=20, callbacks=[early_stop, save_best]) metric = self.model.evaluate(x_ts, y_ts) print(metric) self.load_weights() metric = self.model.evaluate(x_ts, y_ts, batch_size=512) print(metric) y_pred = self.model.predict(x_ts, batch_size=512) cnf_matrix = confusion_matrix(convert_y(y_ts), convert_y(y_pred)) print(cnf_matrix) def test(self, x, ids, out_file): labels = ['人类作者', '自动摘要', '机器作者', '机器翻译'] y_pred = self.model.predict(x, batch_size=512) y_pred = convert_y(y_pred) with open(out_file, 'w', encoding='utf-8') as fout: for id, yi in zip(ids, y_pred): label = labels[yi] fout.write('{},{}\n'.format(id, label)) print('done.') def predict(self, x): y_pred = self.model.predict(x, batch_size=512) return y_pred def error_analysis(self, x_ts, y_ts, texts, start_index): labels = ['人类作者', '自动摘要', '机器作者', '机器翻译'] y_pred = self.model.predict(x_ts, batch_size=512) y_ts, y_pred = convert_y(y_ts), convert_y(y_pred) with open('error.txt', 'w') as fout: for i in range(y_ts.shape[0]): if y_ts[i] != y_pred[i]: fout.write('*****\n{}\n正确标签:{} 分类标签:{}\n'.format( texts[start_index + i], labels[y_ts[i]], labels[y_pred[i]])) print('output error done.')
def vgg_16_cbcnn(input_shape, no_classes, bilinear_output_dim, sum_pool=True, weight_decay_constant=5e-4, multi_label=False, weights_path=None): weights_regularizer = regularizers.l2(weight_decay_constant) # Input layer img_input = Input(shape=input_shape, name='spectr_input') # Block 1 x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', kernel_regularizer=weights_regularizer)(img_input) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', kernel_regularizer=weights_regularizer)(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) # Block 2 x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', kernel_regularizer=weights_regularizer)(x) x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', kernel_regularizer=weights_regularizer)(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) # Block 3 x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', kernel_regularizer=weights_regularizer)(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', kernel_regularizer=weights_regularizer)(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', kernel_regularizer=weights_regularizer)(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) # Block 4 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', kernel_regularizer=weights_regularizer)(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', kernel_regularizer=weights_regularizer)(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', kernel_regularizer=weights_regularizer)(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) # Block 5 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', kernel_regularizer=weights_regularizer)(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', kernel_regularizer=weights_regularizer)(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', kernel_regularizer=weights_regularizer)(x) # Merge using compact bilinear method # dummy_tensor_for_output_dim = K.placeholder(shape=(bilinear_output_dim,)) compact_bilinear_arg_list = [x, x] output_shape_x = x.get_shape().as_list()[1:] output_shape_cb = ( output_shape_x[0], output_shape_x[1], bilinear_output_dim, ) x = merge(compact_bilinear_arg_list, mode=compact_bilinear, name='compact_bilinear', output_shape=output_shape_cb) # If sum_pool=True do a global sum pooling if sum_pool: # Since using tf. Hence 3rd would represent channels x = Lambda(lambda x: K.sum(x, axis=[1, 2]))(x) # Sign sqrt and L2 normalize result x = Lambda(lambda x: K.sign(x) * K.sqrt(K.abs(x)))(x) x = Lambda(lambda x: K.l2_normalize(x, axis=-1))(x) # final dense layer if not multi_label: final_activation = 'softmax' else: final_activation = 'sigmoid' x = Dense(no_classes, activation=final_activation, name='softmax_layer', kernel_regularizer=weights_regularizer)(x) # Put together input and output to form model model = Model(inputs=[img_input], outputs=[x]) if weights_path: model.load_weights(weights_path, by_name=True) return model
def get_model(pre_weight, input_size): inputs = Input(input_size) # convolution1 convolution1_1 = Conv2D(64, (3, 3), padding='same', activation='relu')(inputs) convolution1_2 = Conv2D(64, (3, 3), padding='same', activation='relu')(convolution1_1) # pooling1 pooling1 = MaxPool2D((2, 2), strides=(2, 2))(convolution1_2) # convolution2 convolution2_1 = Conv2D(128, (3, 3), padding='same', activation='relu')(pooling1) convolution2_2 = Conv2D(128, (3, 3), padding='same', activation='relu')(convolution2_1) # pooling2 pooling2 = MaxPool2D((2, 2), strides=(2, 2))(convolution2_2) # convolution3 convolution3_1 = Conv2D(256, (3, 3), padding='same', activation='relu')(pooling2) convolution3_2 = Conv2D(256, (3, 3), padding='same', activation='relu')(convolution3_1) convolution3_3 = Conv2D(256, (3, 3), padding='same', activation='relu')(convolution3_2) # pooling3 pooling3 = MaxPool2D((2, 2), strides=(2, 2))(convolution3_3) # convolution4 convolution4_1 = Conv2D(512, (3, 3), padding='same', activation='relu')(pooling3) convolution4_2 = Conv2D(512, (3, 3), padding='same', activation='relu')(convolution4_1) convolution4_3 = Conv2D(512, (3, 3), padding='same', activation='relu')(convolution4_2) # pooling4 pooling4 = MaxPool2D((2, 2), strides=(2, 2))(convolution4_3) # convolution5 convolution5_1 = Conv2D(512, (3, 3), padding='same', activation='relu')(pooling4) convolution5_2 = Conv2D(512, (3, 3), padding='same', activation='relu')(convolution5_1) convolution5_3 = Conv2D(512, (3, 3), padding='same', activation='relu')(convolution5_2) # pooling5 pooling5 = MaxPool2D((2, 2), strides=(2, 2))(convolution5_3) # fc1 fc1 = Flatten()(pooling5) # fc2 fc2 = Dense(4096, activation='relu')(fc1) # fc3 fc3 = Dense(4096, activation='relu')(fc2) # output output = Dense(1000, activation='softmax')(fc3) model = Model(inputs=inputs, outputs=output) model.summary() if os.path.exists(pre_weight): print('exist') model.load_weights(pre_weight) adam = Adam(lr=1e-4, decay=0.5) model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) return model
# x = BatchNormalization(axis=-1)(x) # x = Dense(120,activation='relu')(x) # x = BatchNormalization(axis=-1)(x) # x = Dense(60,activation='relu')(x) # x = BatchNormalization(axis=-1)(x) # x = Dense(30,activation='relu')(x) # x = BatchNormalization(axis=-1)(x) # x = Dense(10,activation='relu')(x) # x = BatchNormalization(axis=-1)(x) x = Dense(2, activation='sigmoid')(x) final_model = Model(input=input, output=x) final_model.compile(loss='mean_squared_logarithmic_error', optimizer='adam', metrics=['accuracy']) final_model.summary() final_model.load_weights('keras_models/weights.best.Inceptionv3.hdf5') predictions = [] prd = [] for tensor in test_tensors: prediction = final_model.predict(np.expand_dims(tensor, axis=0)) prd.append(prediction[0]) predictions.append(prediction[0][1]) print(log_loss(test_targets, np.array(prd))) with open('kera_data/upload_data/result.csv', 'w', newline='') as f: writer = csv.writer(f) header = ['id', 'label'] writer.writerow(header) for i in range(len(test_files)): row = [os.path.basename(test_files[i]).split('.')[0], predictions[i]] writer.writerow(row)
model = Model([input, input_pos_x, input_pos_y], x) model.compile(loss=loss, optimizer=OPTIMIZER(lr=learning_rate, decay=0.1)) model.summary() # Prepare callbacks for model saving and for learning rate adjustment. checkpoint = ModelCheckpoint(filepath=model_save_path, monitor='val_loss', verbose=1, save_best_only=True) lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6) train_logger = CSVLogger(log_save_path) num_epochs_per_decay = 2.4 step_decay = len(train_gen) * num_epochs_per_decay lr_exp = LearningRateExponentialDecay(0.94, step_decay) callbacks = [checkpoint, lr_exp, train_logger] try: model.load_weights(model_save_path) print('Loading pretrain_weights!') except Exception as e: print(e) pass print('Using real-time data augmentation.') model.fit_generator(train_gen, validation_data=val_gen, validation_steps=len(val_gen), epochs=epochs, workers=1, steps_per_epoch=len(train_gen) / 8, callbacks=callbacks)
def VGG19(input_shape, include_top=True, weights='imagenet', pooling=None, classes=1000, final_activation = 'sigmoid', **kwargs): input = Input(input_shape) # Block 1 x = Conv2D_Initialize(64, (3, 3), activation='relu', padding='same', name='block1_conv1', bias_initializer='zero')(input) x = Conv2D_Initialize(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) # Block 2 x = Conv2D_Initialize(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x) x = Conv2D_Initialize(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) # Block 3 x = Conv2D_Initialize(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x) x = Conv2D_Initialize(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x) x = Conv2D_Initialize(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x) x = Conv2D_Initialize(256, (3, 3), activation='relu', padding='same', name='block3_conv4')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) # Block 4 x = Conv2D_Initialize(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x) x = Conv2D_Initialize(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x) x = Conv2D_Initialize(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x) x = Conv2D_Initialize(512, (3, 3), activation='relu', padding='same', name='block4_conv4')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) # Block 5 x = Conv2D_Initialize(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x) x = Conv2D_Initialize(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x) x = Conv2D_Initialize(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x) x = Conv2D_Initialize(512, (3, 3), activation='relu', padding='same', name='block5_conv4')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) if include_top: # Classification block x = layers.Flatten(name='flatten')(x) x = Dense_Initialize(4096, activation='relu', name='fc1')(x) x = Dense_Initialize(4096, activation='relu', name='fc2')(x) x = Dense_Initialize(classes, activation=final_activation, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D()(x) # Load weights. weights_path = None if weights == 'imagenet': if include_top: weights_path = keras_utils.get_file( 'vgg19_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models', file_hash='cbe5617147190e668d6c5d5026f83318') else: weights_path = keras_utils.get_file( 'vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5', WEIGHTS_PATH_NO_TOP, cache_subdir='models', file_hash='253f8cb515780f3b799900260a226db6') model = Model(input, x, name = 'vgg19') if weights_path and weights: model.load_weights(weights_path, by_name=True, skip_mismatch=True) return model
output = dec_dense(dec_outputs) # compile our model model = Model([enc_inputs, dec_inputs], output) model.compile(optimizer=RMSprop(), loss='categorical_crossentropy') model.summary() # train model # model.fit([encoder_input_data, decoder_input_data], decoder_output_data) # (use weights from previous training) path_to_weight = "chatbot_seq2seq_v3.h5" model.load_weights(path_to_weight) # set up our evaluation step: def make_inference_models(): dec_state_input_h = Input(shape=(200,)) dec_state_input_c = Input(shape=(200,)) dec_states_inputs = [dec_state_input_h, dec_state_input_c] dec_outputs, state_h, state_c = dec_lstm(dec_embedding, initial_state=dec_states_inputs) dec_states = [state_h, state_c] dec_outputs = dec_dense(dec_outputs) dec_model = Model( inputs=[dec_inputs] + dec_states_inputs, outputs=[dec_outputs] + dec_states) print('Inference decoder:') dec_model.summary()
def main(args): typeName = args.mode_type if typeName.startswith('train'): if not os.path.exists(c.MODEL_DIR): os.mkdir(c.MODEL_DIR) train_dataset, val_dataset = CreateDataset(args, split_ratio=0.1) nclass = len(set(train_dataset[1])) print("nclass = ",nclass) labels_to_id = Map_label_to_dict(labels=train_dataset[1]) # load the model model = models.SE_ResNet(c.INPUT_SHPE) # model = models.Deep_speaker_model(c.INPUT_SHPE) # add softmax layer x = model.output x = Dense(nclass, activation='softmax', name=f'softmax')(x) model = Model(model.input, x) # model.summary() # exit() # 加载预训练模型 filenames = os.listdir(f'{c.MODEL_DIR}/aishell') filenames = [hfile for hfile in glob.iglob(c.TRAIN_DEV_SET + "/*.h5")] if len(filenames): acc_lists = [os.path.splitext(f)[0].split("-")[1].split("_")[1] for f in filenames] optimal_model_index = acc_lists.index(min(acc_lists)) model.load_weights(f'{c.MODEL_DIR}/aishell/{filenames[optimal_model_index]}') # train model sgd = optimizers.SGD(lr=c.LEARN_RATE,momentum=0.9) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) model.fit_generator(Batch_generator(train_dataset, labels_to_id, c.BATCH_SIZE, nclass), steps_per_epoch=len(train_dataset[0])//c.BATCH_SIZE, epochs=30, validation_data=load_validation_data( val_dataset, labels_to_id, nclass), validation_steps=len(val_dataset[0])//c.BATCH_SIZE, callbacks=[ ModelCheckpoint(f'{c.MODEL_DIR}/aishell/best.h5', monitor='val_loss', save_best_only=True, mode='min'), ReduceLROnPlateau(monitor='val_loss',factor=0.1,patience=10,mode='min'), EarlyStopping(monitor='val_loss', patience=10), ]) else: test_dataset, enroll_dataset = CreateDataset(args,split_ratio=0,target=c.TARGET) # load weights model_se = models.SE_ResNet(c.INPUT_SHPE) model_se.load_weights(f'{c.MODEL_DIR}/aishell/seresnet/acc_0.707-eer_0.292.h5', by_name='True') model_dp = models.Deep_speaker_model(c.INPUT_SHPE) model_dp.load_weights(f'{c.MODEL_DIR}/aishell/deepspeaker/acc_0.685-eer_0.313.h5',by_name='True') # load all data print("loading data...") (enroll_x, enroll_y) = load_all_data(enroll_dataset, 'enroll') (test_x, test_y) = load_all_data(test_dataset, 'test') def distance_of_model(model): enroll_pre = np.squeeze(model.predict(enroll_x)) test_pre = np.squeeze(model.predict(test_x)) distances = caculate_distance(enroll_dataset, enroll_pre, test_pre) return distances distances_dp = distance_of_model(model_dp) distances_se = distance_of_model(model_se) distances = 0.3*normalization_frames(distances_dp) + 0.7*normalization_frames(distances_se) # speaker identification test_y_pre = speaker_identification(enroll_dataset, distances, enroll_y) # compute result result = compute_result(test_y_pre, test_y) score = sum(result)/len(result) print(f"score={score}")
train_gen.fit(x_train) valid_gen.fit(x_valid) filename = "cancer_classification.h5" # Save the model according to the conditions checkpoint = ModelCheckpoint(filename, monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) # early = EarlyStopping(monitor='val_acc', min_delta=0, patience=10, verbose=1, mode='auto') # validation_steps=validation_size//batch_size # fits the model on batches with real-time data augmentation: model_final.fit_generator(train_gen.flow(x_train, y_train, batch_size=32), samples_per_epoch = nb_train_samples, epochs = epochs, validation_data = valid_gen.flow(x_valid, y_valid), nb_val_samples = nb_validation_samples, callbacks = [checkpoint], steps_per_epoch=len(x_train) / 32) model_final.load_weights(filename) predictions = [] for feature in x_test: pred = model_final.predict(feature) predictions.append(pred) predictions = np.asarray(predictions) print(predictions.shape) print(predictions[0])
def train_model(data, topic, PROCESSED_DIR, SEED_FOLDER, **kwargs): dropout = kwargs['model_settings']["dropout"] lstm_size = kwargs['model_settings']["lstm_size"] monitor = kwargs['model_settings']["monitor"] batch_size = kwargs['model_settings']["batch_size"] epochs = kwargs['model_settings']["epochs"] learning_rate = kwargs['model_settings']["learning_rate"] train_embeddings = kwargs['model_settings']["train_embeddings"] return_probs = False return_model = False model_file = SEED_FOLDER+topic+"_"+kwargs['model_settings']["model_file_suffix"] seed = kwargs['model_settings']['current_seed'] # set reproducibility # set configs for memory usage and reproducibility: https://stackoverflow.com/questions/38469632/tensorflow-non-repeatable-results os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['PYTHONHASHSEED'] = str(seed) np.random.seed(seed) rn.seed(seed) config = tf.ConfigProto() config.gpu_options.allow_growth = False config.gpu_options.per_process_gpu_memory_fraction = 0.3 np.random.seed(seed) graph_level_seed = 1 operation_level_seed = 1 tf.set_random_seed(graph_level_seed) sess = tf.Session(config=config) K.set_session(sess) # load vocab we and get indices for topic vocab_we = load_from_pickle(PROCESSED_DIR+"vocab_we.pkl") # load word embeddings embeddings_lookup = np.load(PROCESSED_DIR + "index_to_vec_we"+kwargs['model_settings']['word_embeddings'][1]+".npy") # load data X_train, X_dev, X_test = data["X_train"], data["X_dev"], data["X_test"] y_train, y_dev, y_test = data["y_train"], data["y_dev"], data["y_test"] # generate topic data data['X_topic_train'] = [get_avg_embedding(topic.split('_'), embeddings_lookup, vocab_we)] * len(data['X_train']) data['X_topic_dev'] = [get_avg_embedding(topic.split('_'), embeddings_lookup, vocab_we)] * len(data['X_dev']) data['X_topic_test'] = [get_avg_embedding(topic.split('_'), embeddings_lookup, vocab_we)] * len(data['X_test']) X_topic_train, X_topic_dev, X_topic_test = data["X_topic_train"], data["X_topic_dev"], data["X_topic_test"] # some constants sent_len = X_train.shape[1] num_labels = y_train.shape[1] sentence_input = Input(shape=(sent_len,), dtype='int32', name="text_input") gate_vector_input = Input(shape=(300,), dtype='float32', name="gate_vectors_each_sentence") embedded_layer = Embedding(embeddings_lookup.shape[0], embeddings_lookup.shape[1], mask_zero=True, trainable=train_embeddings, input_length=sent_len, weights=[embeddings_lookup])(sentence_input) bilstm_layer = Bidirectional(custom_LSTM_fo(lstm_size))([embedded_layer, gate_vector_input]) dropout_layer = Dropout(dropout)(bilstm_layer) output_layer = Dense(num_labels, activation='softmax')(dropout_layer) model = Model(inputs=[sentence_input,gate_vector_input], output=output_layer) adam = Adam(lr=learning_rate) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) #e = EarlyStopping(monitor=monitor, mode='auto') e = ModelCheckpoint(model_file, monitor=monitor, verbose=0, save_best_only=True, save_weights_only=True, mode='auto', period=1) model.fit([X_train, X_topic_train], y_train, batch_size=batch_size, epochs=epochs, validation_data=([X_dev, X_topic_dev], y_dev), callbacks=[e], verbose=1) model.load_weights(model_file) if return_model == True: return model else: test_predictions = model.predict([X_test, X_topic_test], verbose=False) val_predictions = model.predict([X_dev, X_topic_dev], verbose=False) if return_probs == False: test_predictions = [np.argmax(pred) for pred in test_predictions] val_predictions = [np.argmax(pred) for pred in val_predictions] return test_predictions, val_predictions
def main(): print('Training the join cardinality estimator') is_train = True num_rows, num_columns = 16, 16 # target = 'join_selectivity' target = 'mbr_tests_selectivity' datasets_features_path = 'data/spatial_descriptors/spatial_descriptors_small_datasets.csv' datasets_histograms_path = 'data/histograms/small_datasets' join_results_path = 'data/join_results/join_results_small_datasets_no_bit.csv' features_df = datasets.load_datasets_feature(datasets_features_path) join_data, ds1_histograms, ds2_histograms, ds_all_histogram, ds_bops_histogram = datasets.load_join_data( features_df, join_results_path, datasets_histograms_path, num_rows, num_columns) train_attributes, test_attributes, ds1_histograms_train, ds1_histograms_test, ds2_histograms_train, ds2_histograms_test, ds_all_histogram_train, ds_all_histogram_test, ds_bops_histogram_train, ds_bops_histogram_test = train_test_split( join_data, ds1_histograms, ds2_histograms, ds_all_histogram, ds_bops_histogram, test_size=0.20, random_state=42) # train_attributes, val_attributes, ds1_histograms_train, ds1_histograms_val, ds2_histograms_train, ds2_histograms_val, ds_all_histogram_train, ds_all_histogram_val = train_test_split( # train_attributes, ds1_histograms_train, ds2_histograms_train, ds_all_histogram_train, test_size=0.20, random_state=32) num_features = len(train_attributes.columns) - 10 # print (join_data) X_train = pd.DataFrame.to_numpy( train_attributes[[i for i in range(num_features)]]) X_test = pd.DataFrame.to_numpy( test_attributes[[i for i in range(num_features)]]) y_train = train_attributes[target] y_test = test_attributes[target] # y_train = train_attributes['result_size'] # y_test = test_attributes['result_size'] mlp = models.create_mlp(X_train.shape[1], regress=False) cnn1 = models.create_cnn(num_rows, num_columns, 1, regress=False) # cnn2 = models.create_cnn(num_rows, num_columns, 1, regress=False) # cnn3 = models.create_cnn(num_rows, num_columns, 1, regress=False) # combined_input = concatenate([mlp.output, cnn1.output, cnn2.output, cnn3.output]) combined_input = concatenate([mlp.output, cnn1.output]) x = Dense(4, activation="relu")(combined_input) x = Dense(1, activation="linear")(x) # model = Model(inputs=[mlp.input, cnn1.input, cnn2.input, cnn3.input], outputs=x) model = Model(inputs=[mlp.input, cnn1.input], outputs=x) EPOCHS = 40 LR = 1e-2 # opt = Adam(lr=1e-4, decay=1e-4 / 200) opt = Adam(lr=LR, decay=LR / EPOCHS) model.compile(loss="mean_absolute_percentage_error", optimizer=opt) # print (model.summary()) # train the model if is_train: print("[INFO] training model...") # model.fit( # [X_train, ds1_histograms_train, ds2_histograms_train], y_train, # validation_data=([X_test, ds1_histograms_test, ds2_histograms_test], y_test), # epochs=EPOCHS, batch_size=128) model.fit([X_train, ds_bops_histogram_train], y_train, validation_data=([X_test, ds_bops_histogram_test], y_test), epochs=EPOCHS, batch_size=256) model.save('trained_models/model.h5') model.save_weights('trained_models/model_weights.h5') else: model = keras.models.load_model('trained_models/model.h5') model.load_weights('trained_models/model_weights.h5') print('Test on small datasets') y_pred = model.predict([X_test, ds_bops_histogram_test]) print('r2 score: {}'.format(r2_score(y_test, y_pred))) diff = y_pred.flatten() - y_test percent_diff = (diff / y_test) abs_percent_diff = np.abs(percent_diff) # test_attributes['join_selectivity_pred'] = y_pred # test_attributes['percent_diff'] = abs_percent_diff # test_attributes.to_csv('prediction_small.csv') # compute the mean and standard deviation of the absolute percentage # difference mean = np.mean(abs_percent_diff) std = np.std(abs_percent_diff) print('mean = {}, std = {}'.format(mean, std)) print('Test on large datasets') datasets_features_path = 'data/spatial_descriptors/spatial_descriptors_large_datasets.csv' datasets_histograms_path = 'data/histograms/large_datasets' join_results_path = 'data/join_results/join_results_large_datasets_no_bit.csv' features_df = datasets.load_datasets_feature(datasets_features_path) join_data, ds1_histograms, ds2_histograms, ds_all_histogram, ds_bops_histogram = datasets.load_join_data( features_df, join_results_path, datasets_histograms_path, num_rows, num_columns) X_test = pd.DataFrame.to_numpy(join_data[[i for i in range(num_features)]]) y_test = join_data[target] y_pred = model.predict([X_test, ds_bops_histogram]) print('r2 score: {}'.format(r2_score(y_test, y_pred))) diff = y_pred.flatten() - y_test percent_diff = (diff / y_test) abs_percent_diff = np.abs(percent_diff) mean = np.mean(abs_percent_diff) std = np.std(abs_percent_diff) print('mean = {}, std = {}'.format(mean, std))
class NoteTaggerLSTMTrain(NoteTaggerModelTrain): def __init__(self, lstm_config_path, data, text_column_name, outcome_column_name, window_size, model_save_path, model_name='lstm', word_tags=constants.TAGS, stride_length=None, grid_search=False): """ Implements the NoteTaggerModelTrain class for a Random Forest Model. Most Arguments and Keyword Arguments inherited from the parent class Arguments: lstm_config_path (str): path to json file with random forest configuration parameters """ super().__init__(model_name=model_name, data=data, text_column_name=text_column_name, outcome_column_name=outcome_column_name, window_size=window_size, model_save_path=model_save_path, word_tags=word_tags, stride_length=stride_length, grid_search=grid_search) # load configuration file with open(lstm_config_path, 'r') as f: self._config["model_params"] = json.load(f) with open(self._config['model_params']['embedding_path'], 'rb') as embedding_file: self._embedding_layer = pickle.load(embedding_file) with open(self._config['model_params']['word_to_index'], 'r') as word_to_index_file: self._word_to_index = json.load(word_to_index_file) # set base model to random forest self._create_model() def _create_model(self): input_layer = layers.Input( shape=(self._config["notetagger_params"]['window_size'] * 2, ), name='input_layer') model_layer = self._embedding_layer(input_layer) model_layer = layers.Dropout( self._config['model_params']['model']['lstm_dropout'])(model_layer) for i, lstm_layer in enumerate( self._config['model_params']['model']['lstm_layers']): return_sequences = i < len( self._config['model_params']['model']['lstm_layers']) - 1 model_layer = layers.Bidirectional( layers.LSTM(lstm_layer, return_sequences=return_sequences, name='lstm_layer_{}'.format(i)))(model_layer) dense_layer = layers.Dense(1, name='dense_layer')(model_layer) output_layer = layers.Activation('sigmoid', name='activation_layer')(dense_layer) self._model = Model(input_layer, output_layer) self._model.compile(**self._config['model_params']['compile']) print(self._model.summary()) def _token_to_index(self, tokenized_data): unk_token = self._word_to_index['unk'] indexed_data = tokenized_data['tokenized_text'].map( lambda tokens: [self._word_to_index.get(token, unk_token) for token in tokens]) max_size = self._config["notetagger_params"]['window_size'] * 2 padded_data = indexed_data.map(lambda tokens: tokens + [0] * (max_size - len(tokens))) X = np.array(padded_data.tolist()) return X def _process_text(self, raw_data): """ Takes in a dataframe with raw note text and training features and outcomes by first tokenizing the text, then transforming it with tfidf before reducing dimensionality with pca Arguments: raw_data (Pandas DataFrame): data with a raw text column and outcome column Returns: X_train (array): Array with training features y_train (array): Array with training outcomes """ tokenized_data = self._tokenize_text(raw_data=raw_data) X_train = self._token_to_index(tokenized_data=tokenized_data) y_train = self._get_outcome_value(data=tokenized_data) return X_train, y_train def _create_saved_model(self): """ Creates and saves a `NoteTaggerTrainedRandomForest` class object with the necessary components """ print("Saving Model") # initialize trained random forest class self._trained_model = NoteTaggerTrainedLSTM( window_size=self._config["notetagger_params"]['window_size'], word_tags=self._config["notetagger_params"]['word_tags'], stride_length=self._config["notetagger_params"]['stride_length'], model_config=self._config['model_params']) # set word_to_index self._trained_model._word_to_index = self._word_to_index # load the best model weights and store it best_model_weights = os.path.join( self._checkpoints_save_dir, os.listdir(self._checkpoints_save_dir)[-1]) self._model.load_weights(best_model_weights) self._trained_model._model = self._model # save model to pickle file with open(self._model_save_file, 'wb') as outfile: pickle.dump(self._trained_model, outfile) def train_model(self, validation_data=None, store_result=True): with tempfile.TemporaryDirectory() as temp_dir: model_callbacks = [ EarlyStopping(**self._config['model_params']['callbacks'] ['early_stopping']), ModelCheckpoint( filepath=os.path.join(temp_dir, '{epoch:02d}-{val_loss:.4f}.hdf5'), **self._config['model_params']['callbacks']['checkpoints']) ] self._checkpoints_save_dir = temp_dir super().train_model(validation_data=validation_data, store_result=store_result, callbacks=model_callbacks, **self._config['model_params']['training'])
trdata = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=90) traindata = trdata.flow(x=X_train, y=y_train) tsdata = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=90) testdata = tsdata.flow(x=X_test, y=y_test) from keras.callbacks import ModelCheckpoint, EarlyStopping checkpoint = ModelCheckpoint("ieeercnn_vgg16_1.h5", monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) early = EarlyStopping(monitor='val_loss', min_delta=0, patience=100, verbose=1, mode='auto') hist = model_final.fit_generator(generator= traindata, steps_per_epoch= 100, epochs= 1000, validation_data= testdata, validation_steps=2, callbacks=[checkpoint,early]) model_final.save_weights('model_final_weights.h5') model_final.save('model_final_architecure.h5') ## read_file ''' model_final.load_weights('model_final_weights.h5') img = cv2.imread('images/IMG_6975.jpg') ss.setBaseImage(img) ss.switchToSelectiveSearchFast() ssresults = ss.process() imout = img.copy() for e,result in enumerate(ssresults): if e < 2000: x,y,w,h = result timage = imout[y:y+h,x:x+w] resized = cv2.resize(timage, (224,224), interpolation = cv2.INTER_AREA) img = np.expand_dims(resized, axis=0) out= model_final.predict(img)
def main(): encoder_input_data, decoder_input_data, decoder_target_data = get_data() encoder_input_data = to_categorical(encoder_input_data, num_classes=26) decoder_input_data = to_categorical(decoder_input_data, num_classes=130) decoder_target_data = to_categorical(decoder_target_data, num_classes=130) num_encoder_tokens = 26 num_decoder_tokens = 130 num_duration = 50 latent_dim = 128 # Define an input sequence and process it. encoder_inputs = Input(shape=(None, num_encoder_tokens)) encoder = LSTM(latent_dim, return_state=True) encoder_outputs, state_h, state_c = encoder(encoder_inputs) # We discard `encoder_outputs` and only keep the states. encoder_states = [state_h, state_c] # Set up the decoder, using `encoder_states` as initial state. decoder_inputs = Input(shape=(None, num_decoder_tokens)) # We set up our decoder to return full output sequences, # and to return internal states as well. We don't use the # return states in the training model, but we will use them in inference. decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True) decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states) decoder_dense = Dense(num_decoder_tokens, activation='softmax') decoder_outputs = decoder_dense(decoder_outputs) # Define the model that will turn # `encoder_input_data` & `decoder_input_data` into `decoder_target_data` model = Model([encoder_inputs, decoder_inputs], decoder_outputs) # Run training if not os.path.exists('s2s_note.h5'): optimizer = Adam(clipnorm=1.0) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics='categorical_accuracy') history = model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size=32, epochs=5, validation_split=0.1) plt.plot(range(len(history.history['loss'])), history.history['loss'], label='train loss') plt.plot(range(len(history.history['val_loss'])), history.history['val_loss'], label='validation loss') plt.savefig('loss_train_test.png') # Save model model.save_weights('s2s_note.h5') else: model.load_weights('s2s_fariz.h5') # Define sampling models encoder_model = Model(encoder_inputs, encoder_states) decoder_state_input_h = Input(shape=(latent_dim, )) decoder_state_input_c = Input(shape=(latent_dim, )) decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] decoder_outputs, state_h, state_c = decoder_lstm( decoder_inputs, initial_state=decoder_states_inputs) decoder_states = [state_h, state_c] decoder_outputs = decoder_dense(decoder_outputs) decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states) def decode(input_seq, beam_width=3): if beam_width > 1: candidate_list, _ = beam_search(input_seq, encoder_model, decoder_model, num_decoder_tokens, beam_width=beam_width) else: candidate_list = decode_sequence(input_seq, encoder_model, decoder_model, num_decoder_tokens) return candidate_list ind = 10 input_seq = np.expand_dims(encoder_input_data[ind], axis=0) beam_width = 1 res = decode(input_seq, beam_width=beam_width) if beam_width == 1: print(res) else: for r in res: print("res: ", r) print(len(r)) print(np.argmax(decoder_input_data, axis=-1)[ind][:40]) print(np.argmax(decoder_target_data, axis=-1)[ind][:40]) for i in range(1, 15, 2): res2 = model.predict([ np.expand_dims(encoder_input_data[ind], axis=0), np.expand_dims(decoder_input_data[ind][:i], axis=0) ]) print("Input: {}".format( np.argmax(decoder_input_data[ind][:i], axis=-1))) print("Output: {}".format(np.argmax(res2, axis=-1))) print()
class CharacterTagger: """ A class for character-based neural morphological tagger """ def __init__(self, symbols: DefaultVocabulary, tags: DefaultVocabulary, reverse=False, word_rnn="cnn", char_embeddings_size=16, char_conv_layers=1, char_window_size=5, char_filters=None, char_filter_multiple=25, char_highway_layers=1, conv_dropout=0.0, highway_dropout=0.0, intermediate_dropout=0.0, lstm_dropout=0.0, word_vectorizers=None, word_lstm_layers=1, word_lstm_units=128, word_dropout=0.0, regularizer=None, verbose=1): self.symbols = symbols self.tags = tags self.reverse = reverse self.word_rnn = word_rnn self.char_embeddings_size = char_embeddings_size self.char_conv_layers = char_conv_layers self.char_window_size = char_window_size self.char_filters = char_filters self.char_filter_multiple = char_filter_multiple self.char_highway_layers = char_highway_layers self.conv_dropout = conv_dropout self.highway_dropout = highway_dropout self.intermediate_dropout = intermediate_dropout self.lstm_dropout = lstm_dropout self.word_dropout = word_dropout self.word_vectorizers = word_vectorizers # a list of additional vectorizer dimensions self.word_lstm_layers = word_lstm_layers self.word_lstm_units = word_lstm_units self.regularizer = regularizer self.verbose = verbose self.initialize() log.info("{} symbols, {} tags in CharacterTagger".format( self.symbols_number_, self.tags_number_)) self.build() def initialize(self): if isinstance(self.char_window_size, int): self.char_window_size = [self.char_window_size] if self.char_filters is None or isinstance(self.char_filters, int): self.char_filters = [self.char_filters] * len( self.char_window_size) if len(self.char_window_size) != len(self.char_filters): raise ValueError( "There should be the same number of window sizes and filter sizes" ) if isinstance(self.word_lstm_units, int): self.word_lstm_units = [self.word_lstm_units ] * self.word_lstm_layers if len(self.word_lstm_units) != self.word_lstm_layers: raise ValueError( "There should be the same number of lstm layer units and lstm layers" ) if self.word_vectorizers is None: self.word_vectorizers = [] if self.regularizer is not None: self.regularizer = kreg.l2(self.regularizer) @property def symbols_number_(self): return len(self.symbols) @property def tags_number_(self): return len(self.tags) def build(self): word_inputs = kl.Input(shape=(None, MAX_WORD_LENGTH + 2), dtype="int32") inputs = [word_inputs] word_outputs = self.build_word_cnn(word_inputs) if len(self.word_vectorizers) > 0: additional_word_inputs = [ kl.Input(shape=(None, input_dim), dtype="float32") for input_dim, dense_dim in self.word_vectorizers ] inputs.extend(additional_word_inputs) additional_word_embeddings = [ kl.Dense(dense_dim)(additional_word_inputs[i]) for i, (_, dense_dim) in enumerate(self.word_vectorizers) ] word_outputs = kl.Concatenate()([word_outputs] + additional_word_embeddings) outputs, lstm_outputs = self.build_basic_network(word_outputs) compile_args = { "optimizer": ko.nadam(lr=0.002, clipnorm=5.0), "loss": "categorical_crossentropy", "metrics": ["accuracy"] } self.model_ = Model(inputs, outputs) self.model_.compile(**compile_args) if self.verbose > 0: log.info(str(self.model_.summary())) return self def build_word_cnn(self, inputs): # inputs = kl.Input(shape=(MAX_WORD_LENGTH,), dtype="int32") inputs = kl.Lambda(kb.one_hot, arguments={"num_classes": self.symbols_number_}, output_shape=lambda x: tuple(x) + (self.symbols_number_, ))(inputs) char_embeddings = kl.Dense(self.char_embeddings_size, use_bias=False)(inputs) conv_outputs = [] self.char_output_dim_ = 0 for window_size, filters_number in zip(self.char_window_size, self.char_filters): curr_output = char_embeddings curr_filters_number = (min(self.char_filter_multiple * window_size, 200) if filters_number is None else filters_number) for _ in range(self.char_conv_layers - 1): curr_output = kl.Conv2D( curr_filters_number, (1, window_size), padding="same", activation="relu", data_format="channels_last")(curr_output) if self.conv_dropout > 0.0: curr_output = kl.Dropout(self.conv_dropout)(curr_output) curr_output = kl.Conv2D(curr_filters_number, (1, window_size), padding="same", activation="relu", data_format="channels_last")(curr_output) conv_outputs.append(curr_output) self.char_output_dim_ += curr_filters_number if len(conv_outputs) > 1: conv_output = kl.Concatenate(axis=-1)(conv_outputs) else: conv_output = conv_outputs[0] highway_input = kl.Lambda(kb.max, arguments={"axis": -2})(conv_output) if self.intermediate_dropout > 0.0: highway_input = kl.Dropout( self.intermediate_dropout)(highway_input) for i in range(self.char_highway_layers - 1): highway_input = Highway(activation="relu")(highway_input) if self.highway_dropout > 0.0: highway_input = kl.Dropout(self.highway_dropout)(highway_input) highway_output = Highway(activation="relu")(highway_input) return highway_output def build_basic_network(self, word_outputs): """ Creates the basic network architecture, transforming word embeddings to intermediate outputs """ if self.word_dropout > 0.0: lstm_outputs = kl.Dropout(self.word_dropout)(word_outputs) else: lstm_outputs = word_outputs for j in range(self.word_lstm_layers - 1): lstm_outputs = kl.Bidirectional( kl.LSTM(self.word_lstm_units[j], return_sequences=True, dropout=self.lstm_dropout))(lstm_outputs) lstm_outputs = kl.Bidirectional( kl.LSTM(self.word_lstm_units[-1], return_sequences=True, dropout=self.lstm_dropout))(lstm_outputs) pre_outputs = kl.TimeDistributed(kl.Dense( self.tags_number_, activation="softmax", activity_regularizer=self.regularizer), name="p")(lstm_outputs) return pre_outputs, lstm_outputs def _transform_batch(self, data, labels=None, transform_to_one_hot=True): if len(self.word_vectorizers) > 0: data, additional_data = data[0], data[1:] L = max(len(x) for x in data) X = np.array([self._make_sent_vector(x, L) for x in data]) if len(self.word_vectorizers) > 0: X = [X] + [np.array(x) for x in additional_data] if labels is not None: Y = np.array([self._make_tags_vector(y, L) for y in labels]) if transform_to_one_hot: Y = to_one_hot(Y, len(self.tags)) return X, Y else: return X def train_on_batch(self, data, labels): """ Trains model on a single batch data: a batch of word sequences labels: a batch of correct tag sequences """ X, Y = self._transform_batch(data, labels) # TO_DO: add weights to deal with padded instances return self.model_.train_on_batch(X, Y) def predict_on_batch(self, data: List, return_indexes=False): """ Makes predictions on a single batch data: a batch of word sequences, ----------------------------------------- answer: a batch of label sequences """ X = self._transform_batch(data) if len(self.word_vectorizers) > 0: objects_number, lengths = len( X[0]), [len(elem) for elem in data[0]] else: objects_number, lengths = len(X), [len(elem) for elem in data] Y = self.model_.predict_on_batch(X) labels = np.argmax(Y, axis=-1) answer: List[List[str]] = [None] * objects_number for i, (elem, length) in enumerate(zip(labels, lengths)): elem = elem[:length] answer[i] = elem if return_indexes else self.tags.idxs2toks(elem) return answer def _make_sent_vector(self, sent, bucket_length=None): bucket_length = bucket_length or len(sent) answer = np.zeros(shape=(bucket_length, MAX_WORD_LENGTH + 2), dtype=np.int32) for i, word in enumerate(sent): answer[i, 0] = self.tags.tok2idx("BEGIN") m = min(len(word), MAX_WORD_LENGTH) for j, x in enumerate(word[-m:]): answer[i, j + 1] = self.symbols.tok2idx(x) answer[i, m + 1] = self.tags.tok2idx("END") answer[i, m + 2:] = self.tags.tok2idx("PAD") return answer def _make_tags_vector(self, tags, bucket_length=None): bucket_length = bucket_length or len(tags) answer = np.zeros(shape=(bucket_length, ), dtype=np.int32) for i, tag in enumerate(tags): answer[i] = self.tags.tok2idx(tag) return answer def save(self, outfile): """ outfile: file with model weights (other model components should be given in config) """ self.model_.save_weights(outfile) def load(self, infile): self.model_.load_weights(infile)
class ModelOrientation: """ Le model doit trouver le deplacement effectue par le rectangle dans l'image vide Pour cela il dispose du flot optique entre l'image precedente et celle actuelle """ def __init__(self, img_hau, img_lar, rect_x, rect_y, rect_hau, rect_lar): self.hau = img_hau self.lar = img_lar self.img = np.zeros((img_hau, img_lar, 3), dtype="uint8") self.w_img2 = copy.deepcopy(self.img) self.rect = Rectangle(self.img, rect_x, rect_y, rect_hau, rect_lar) self.w_img2 = self.rect.draw(self.w_img2) self.create_model() def create_model(self): img_input = layers.Input(shape=(self.hau, self.lar, 2)) x = layers.Conv2D(16, 3, activation='relu')(img_input) x = layers.Conv2D(32, 3, activation='relu')(img_input) x = layers.Conv2D(64, 3, activation='relu')(x) x = layers.MaxPooling2D(2)(x) x = layers.Dropout(0.5)(x) # x = layers.Conv2D(64, 3, activation='relu')(x) # x = layers.Conv2D(128, 3, activation='relu')(x) # x = layers.MaxPooling2D(2)(x) # x = layers.Dropout(0.5)(x) # Flatten feature map to a 1-dim tensor so we can add fully connected layers x = layers.Flatten()(x) # Create a fully connected layer with ReLU activation x = layers.Dense(50, activation='relu')(x) x = layers.Dropout(0.5)(x) x = layers.Dense(50, activation='relu')(x) # Create output layer with a single node and sigmoid activation output = layers.Dense(2, activation='linear')(x) # output = keras.layers.Linear(x) # Create model: self.model = Model(img_input, output) self.model.compile(loss='mean_squared_error', optimizer='adam') if os.path.isfile("./weights.hdf5"): self.model.load_weights('./weights.hdf5') print(self.model.summary()) def fit(self, steps, size_of_training): self.prvs = cv.cvtColor(self.w_img2, cv.COLOR_BGR2GRAY) # arrays pour contenir les donnees pour le train self.features = np.zeros((size_of_training, self.hau * self.lar * 2)) self.targets = np.zeros((size_of_training, 2)) for i in tqdm(range(steps)): if i % (size_of_training / 10) == 0 and i > 0: # on entraine le model sur les donnees crees self.train(i, size_of_training) self.create_data(i, size_of_training) def create_data(self, i, size_of_training): """ cree des donnees et remplace les plus vielles au passage """ # de combien de case va on deplacer le rectangle longx = random.randint(0, 3) longy = random.randint(0, 3) # # pour choisir l'axe sur lequel on deplace # bool_choix1 = random.randint(0, 1) # pour choisir la direction sur lequel on deplace dirx = random.choice([-1, 1]) diry = random.choice([-1, 1]) # si on peut pas deplacer le rectangle dans ce sens car il sortirait de l'image if self.rect.x + longx * dirx < 0 or self.rect.x + longx * dirx + self.rect.lar > self.img.shape[ 0]: dirx *= -1 longx *= dirx # si on peut pas deplacer le rectangle dans ce sens car il sortirait de l'image if self.rect.y + longy * diry < 0 or self.rect.y + longy * diry + self.rect.hau > self.img.shape[ 1]: diry *= -1 longy *= diry self.rect.move(longx, longy) w_img2 = copy.deepcopy(self.img) w_img2 = self.rect.draw(w_img2) next = cv.cvtColor(w_img2, cv.COLOR_BGR2GRAY) flow = cv.calcOpticalFlowFarneback(self.prvs, next, None, 0.5, 3, 15, 3, 5, 1.2, 0) self.features[i % size_of_training, :] = flow.flatten() self.targets[i % size_of_training, :] = [longx, longy] # mag, ang = cv.cartToPolar(flow[..., 0], flow[..., 1]) # hsv = np.zeros((self.img.shape[0], self.img.shape[1], self.img.shape[2]), dtype='uint8') # hsv[..., 0] = ang*180/np.pi/2 # hsv[..., 1] = 255 # hsv[..., 2] = cv.normalize(mag, None, 0, 255, cv.NORM_MINMAX) # bgr = cv.cvtColor(hsv, cv.COLOR_HSV2BGR) # cv.imshow('frame2', bgr) # k = cv.waitKey(30) & 0xff # if k == 27: # break # elif k == ord('s'): # cv.imwrite('opticalfb.png', w_img2) # cv.imwrite('opticalhsv.png', bgr) self.prvs = next def train(self, i, size_of_training): print("==========================================================") print("Step: ", i) self.features = np.reshape(self.features, (size_of_training, self.hau, self.lar, 2)) print("----------------") print("Train") self.model_checkpoint = ModelCheckpoint('weights.hdf5', monitor='loss', verbose=1, save_best_only=True, save_weights_only=True) if i < size_of_training: self.model.fit(self.features[:i], self.targets[:i], shuffle=True, callbacks=[self.model_checkpoint]) else: self.model.fit(self.features, self.targets, shuffle=True, callbacks=[self.model_checkpoint]) print("----------------") print("Test") for j in range(10): [[predx, predy]] = self.model.predict(np.array([self.features[j, :, :]])) predx = round(predx) predy = round(predy) print( "Loss: ", np.linalg.norm([ predx - self.targets[j, 0], predy - self.targets[j, 1] ]), " / Pred: ", predx, predy, " / Diff: ", self.targets[j, 0] - predx, self.targets[j, 1] - predy) # on reset les arrays self.features = np.reshape(self.features, (size_of_training, self.hau * self.lar * 2)) print()
def SegNet(nClasses, input_height, input_width): assert input_height % 32 == 0 assert input_width % 32 == 0 img_input = Input(shape=(input_height, input_width, 3)) # Block 1 x = layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input) x = layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x) x, mask_1 = MaxPoolingWithArgmax2D(name='block1_pool')(x) # Block 2 x = layers.Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x) x = layers.Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x) x, mask_2 = MaxPoolingWithArgmax2D(name='block2_pool')(x) # Block 3 x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x) x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x) x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x) x, mask_3 = MaxPoolingWithArgmax2D(name='block3_pool')(x) # Block 4 x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x) x, mask_4 = MaxPoolingWithArgmax2D(name='block4_pool')(x) # Block 5 x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x) x, mask_5 = MaxPoolingWithArgmax2D(name='block5_pool')(x) Vgg_streamlined = Model(inputs=img_input, outputs=x) # o=None # fcn8=Model(inputs=img_input,outputs=o) # mymodel.summary() # 加载vgg16的预训练权重 Vgg_streamlined.load_weights( 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels.h5 ' ) # 解码层 unpool_1 = MaxUnpooling2D()([x, mask_5]) y = Conv2D(512, (3, 3), padding="same")(unpool_1) y = BatchNormalization()(y) y = Activation("relu")(y) y = Conv2D(512, (3, 3), padding="same")(y) y = BatchNormalization()(y) y = Activation("relu")(y) y = Conv2D(512, (3, 3), padding="same")(y) y = BatchNormalization()(y) y = Activation("relu")(y) unpool_2 = MaxUnpooling2D()([y, mask_4]) y = Conv2D(512, (3, 3), padding="same")(unpool_2) y = BatchNormalization()(y) y = Activation("relu")(y) y = Conv2D(512, (3, 3), padding="same")(y) y = BatchNormalization()(y) y = Activation("relu")(y) y = Conv2D(256, (3, 3), padding="same")(y) y = BatchNormalization()(y) y = Activation("relu")(y) unpool_3 = MaxUnpooling2D()([y, mask_3]) y = Conv2D(256, (3, 3), padding="same")(unpool_3) y = BatchNormalization()(y) y = Activation("relu")(y) y = Conv2D(256, (3, 3), padding="same")(y) y = BatchNormalization()(y) y = Activation("relu")(y) y = Conv2D(128, (3, 3), padding="same")(y) y = BatchNormalization()(y) y = Activation("relu")(y) unpool_4 = MaxUnpooling2D()([y, mask_2]) y = Conv2D(128, (3, 3), padding="same")(unpool_4) y = BatchNormalization()(y) y = Activation("relu")(y) y = Conv2D(64, (3, 3), padding="same")(y) y = BatchNormalization()(y) y = Activation("relu")(y) unpool_5 = MaxUnpooling2D()([y, mask_1]) y = Conv2D(64, (3, 3), padding="same")(unpool_5) y = BatchNormalization()(y) y = Activation("relu")(y) y = Conv2D(nClasses, (1, 1), padding="same")(y) y = BatchNormalization()(y) y = Activation("relu")(y) y = Reshape((-1, nClasses))(y) y = Activation("softmax")(y) model = Model(inputs=img_input, outputs=y) return model
class CharacterTagger: """A class for character-based neural morphological tagger Parameters: symbols: character vocabulary tags: morphological tags vocabulary word_rnn: the type of character-level network (only `cnn` implemented) char_embeddings_size: the size of character embeddings char_conv_layers: the number of convolutional layers on character level char_window_size: the width of convolutional filter (filters). It can be a list if several parallel filters are applied, for example, [2, 3, 4, 5]. char_filters: the number of convolutional filters for each window width. It can be a number, a list (when there are several windows of different width on a single convolution layer), a list of lists, if there are more than 1 convolution layers, or **None**. If **None**, a layer with width **width** contains min(**char_filter_multiple** * **width**, 200) filters. char_filter_multiple: the ratio between filters number and window width char_highway_layers: the number of highway layers on character level conv_dropout: the ratio of dropout between convolutional layers highway_dropout: the ratio of dropout between highway layers, intermediate_dropout: the ratio of dropout between convolutional and highway layers on character level lstm_dropout: dropout ratio in word-level LSTM word_vectorizers: list of parameters for additional word-level vectorizers, for each vectorizer it stores a pair of vectorizer dimension and the dimension of the corresponding word embedding word_lstm_layers: the number of word-level LSTM layers word_lstm_units: hidden dimensions of word-level LSTMs word_dropout: the ratio of dropout before word level (it is applied to word embeddings) regularizer: l2 regularization parameter verbose: the level of verbosity """ def __init__(self, symbols: DefaultVocabulary, tags: DefaultVocabulary, word_rnn: str = "cnn", char_embeddings_size: int = 16, char_conv_layers: int = 1, char_window_size: Union[int, List[int]] = 5, char_filters: Union[int, List[int]] = None, char_filter_multiple: int = 25, char_highway_layers: int = 1, conv_dropout: float = 0.0, highway_dropout: float = 0.0, intermediate_dropout: float = 0.0, lstm_dropout: float = 0.0, word_vectorizers: List[Tuple[int, int]] = None, word_lstm_layers: int = 1, word_lstm_units: Union[int, List[int]] = 128, word_dropout: float = 0.0, regularizer: float = None, verbose: int = 1): self.symbols = symbols self.tags = tags self.word_rnn = word_rnn self.char_embeddings_size = char_embeddings_size self.char_conv_layers = char_conv_layers self.char_window_size = char_window_size self.char_filters = char_filters self.char_filter_multiple = char_filter_multiple self.char_highway_layers = char_highway_layers self.conv_dropout = conv_dropout self.highway_dropout = highway_dropout self.intermediate_dropout = intermediate_dropout self.lstm_dropout = lstm_dropout self.word_dropout = word_dropout self.word_vectorizers = word_vectorizers # a list of additional vectorizer dimensions self.word_lstm_layers = word_lstm_layers self.word_lstm_units = word_lstm_units self.regularizer = regularizer self.verbose = verbose self._initialize() self.build() def _initialize(self): if isinstance(self.char_window_size, int): self.char_window_size = [self.char_window_size] if self.char_filters is None or isinstance(self.char_filters, int): self.char_filters = [self.char_filters] * len(self.char_window_size) if len(self.char_window_size) != len(self.char_filters): raise ValueError("There should be the same number of window sizes and filter sizes") if isinstance(self.word_lstm_units, int): self.word_lstm_units = [self.word_lstm_units] * self.word_lstm_layers if len(self.word_lstm_units) != self.word_lstm_layers: raise ValueError("There should be the same number of lstm layer units and lstm layers") if self.word_vectorizers is None: self.word_vectorizers = [] if self.regularizer is not None: self.regularizer = kreg.l2(self.regularizer) if self.verbose > 0: log.info("{} symbols, {} tags in CharacterTagger".format(self.symbols_number_, self.tags_number_)) @property def symbols_number_(self) -> int: """Character vocabulary size """ return len(self.symbols) @property def tags_number_(self) -> int: """Tag vocabulary size """ return len(self.tags) def build(self): """Builds the network using Keras. """ word_inputs = kl.Input(shape=(None, MAX_WORD_LENGTH+2), dtype="int32") inputs = [word_inputs] word_outputs = self._build_word_cnn(word_inputs) if len(self.word_vectorizers) > 0: additional_word_inputs = [kl.Input(shape=(None, input_dim), dtype="float32") for input_dim, dense_dim in self.word_vectorizers] inputs.extend(additional_word_inputs) additional_word_embeddings = [kl.Dense(dense_dim)(additional_word_inputs[i]) for i, (_, dense_dim) in enumerate(self.word_vectorizers)] word_outputs = kl.Concatenate()([word_outputs] + additional_word_embeddings) outputs, lstm_outputs = self._build_basic_network(word_outputs) compile_args = {"optimizer": ko.nadam(lr=0.002, clipnorm=5.0), "loss": "categorical_crossentropy", "metrics": ["accuracy"]} self.model_ = Model(inputs, outputs) self.model_.compile(**compile_args) if self.verbose > 0: self.model_.summary(print_fn=log.info) return self def _build_word_cnn(self, inputs): """Builds word-level network """ inputs = kl.Lambda(kb.one_hot, arguments={"num_classes": self.symbols_number_}, output_shape=lambda x: tuple(x) + (self.symbols_number_,))(inputs) char_embeddings = kl.Dense(self.char_embeddings_size, use_bias=False)(inputs) conv_outputs = [] self.char_output_dim_ = 0 for window_size, filters_number in zip(self.char_window_size, self.char_filters): curr_output = char_embeddings curr_filters_number = (min(self.char_filter_multiple * window_size, 200) if filters_number is None else filters_number) for _ in range(self.char_conv_layers - 1): curr_output = kl.Conv2D(curr_filters_number, (1, window_size), padding="same", activation="relu", data_format="channels_last")(curr_output) if self.conv_dropout > 0.0: curr_output = kl.Dropout(self.conv_dropout)(curr_output) curr_output = kl.Conv2D(curr_filters_number, (1, window_size), padding="same", activation="relu", data_format="channels_last")(curr_output) conv_outputs.append(curr_output) self.char_output_dim_ += curr_filters_number if len(conv_outputs) > 1: conv_output = kl.Concatenate(axis=-1)(conv_outputs) else: conv_output = conv_outputs[0] highway_input = kl.Lambda(kb.max, arguments={"axis": -2})(conv_output) if self.intermediate_dropout > 0.0: highway_input = kl.Dropout(self.intermediate_dropout)(highway_input) for i in range(self.char_highway_layers - 1): highway_input = Highway(activation="relu")(highway_input) if self.highway_dropout > 0.0: highway_input = kl.Dropout(self.highway_dropout)(highway_input) highway_output = Highway(activation="relu")(highway_input) return highway_output def _build_basic_network(self, word_outputs): """ Creates the basic network architecture, transforming word embeddings to intermediate outputs """ if self.word_dropout > 0.0: lstm_outputs = kl.Dropout(self.word_dropout)(word_outputs) else: lstm_outputs = word_outputs for j in range(self.word_lstm_layers-1): lstm_outputs = kl.Bidirectional( kl.LSTM(self.word_lstm_units[j], return_sequences=True, dropout=self.lstm_dropout))(lstm_outputs) lstm_outputs = kl.Bidirectional( kl.LSTM(self.word_lstm_units[-1], return_sequences=True, dropout=self.lstm_dropout))(lstm_outputs) pre_outputs = kl.TimeDistributed( kl.Dense(self.tags_number_, activation="softmax", activity_regularizer=self.regularizer), name="p")(lstm_outputs) return pre_outputs, lstm_outputs def _transform_batch(self, data, labels=None, transform_to_one_hot=True): data, additional_data = data[0], data[1:] L = max(len(x) for x in data) X = np.array([self._make_sent_vector(x, L) for x in data]) X = [X] + [np.array(x) for x in additional_data] if labels is not None: Y = np.array([self._make_tags_vector(y, L) for y in labels]) if transform_to_one_hot: Y = to_one_hot(Y, len(self.tags)) return X, Y else: return X def train_on_batch(self, data: List[Iterable], labels: Iterable[list]) -> None: """Trains model on a single batch Args: data: a batch of word sequences labels: a batch of correct tag sequences Returns: the trained model """ X, Y = self._transform_batch(data, labels) self.model_.train_on_batch(X, Y) def predict_on_batch(self, data: Union[list, tuple], return_indexes: bool = False) -> List[List[str]]: """ Makes predictions on a single batch Args: data: a batch of word sequences together with additional inputs return_indexes: whether to return tag indexes in vocabulary or tags themselves Returns: a batch of label sequences """ X = self._transform_batch(data) objects_number, lengths = len(X[0]), [len(elem) for elem in data[0]] Y = self.model_.predict_on_batch(X) labels = np.argmax(Y, axis=-1) answer: List[List[str]] = [None] * objects_number for i, (elem, length) in enumerate(zip(labels, lengths)): elem = elem[:length] answer[i] = elem if return_indexes else self.tags.idxs2toks(elem) return answer def _make_sent_vector(self, sent: List, bucket_length: int =None) -> np.ndarray: """Transforms a sentence to Numpy array, which will be the network input. Args: sent: input sentence bucket_length: the width of the bucket Returns: A 3d array, answer[i][j][k] contains the index of k-th letter in j-th word of i-th input sentence. """ bucket_length = bucket_length or len(sent) answer = np.zeros(shape=(bucket_length, MAX_WORD_LENGTH+2), dtype=np.int32) for i, word in enumerate(sent): answer[i, 0] = self.tags.tok2idx("BEGIN") m = min(len(word), MAX_WORD_LENGTH) for j, x in enumerate(word[-m:]): answer[i, j+1] = self.symbols.tok2idx(x) answer[i, m+1] = self.tags.tok2idx("END") answer[i, m+2:] = self.tags.tok2idx("PAD") return answer def _make_tags_vector(self, tags, bucket_length=None) -> np.ndarray: """Transforms a sentence of tags to Numpy array, which will be the network target. Args: tags: input sentence of tags bucket_length: the width of the bucket Returns: A 2d array, answer[i][j] contains the index of j-th tag in i-th input sentence. """ bucket_length = bucket_length or len(tags) answer = np.zeros(shape=(bucket_length,), dtype=np.int32) for i, tag in enumerate(tags): answer[i] = self.tags.tok2idx(tag) return answer def save(self, outfile) -> None: """Saves model weights to a file Args: outfile: file with model weights (other model components should be given in config) """ self.model_.save_weights(outfile) def load(self, infile) -> None: """Loads model weights from a file Args: infile: file to load model weights from """ self.model_.load_weights(infile)
class CharacterTagger: """A class for character-based neural morphological tagger Parameters: symbols: character vocabulary tags: morphological tags vocabulary word_rnn: the type of character-level network (only `cnn` implemented) char_embeddings_size: the size of character embeddings char_conv_layers: the number of convolutional layers on character level char_window_size: the width of convolutional filter (filters). It can be a list if several parallel filters are applied, for example, [2, 3, 4, 5]. char_filters: the number of convolutional filters for each window width. It can be a number, a list (when there are several windows of different width on a single convolution layer), a list of lists, if there are more than 1 convolution layers, or **None**. If **None**, a layer with width **width** contains min(**char_filter_multiple** * **width**, 200) filters. char_filter_multiple: the ratio between filters number and window width char_highway_layers: the number of highway layers on character level conv_dropout: the ratio of dropout between convolutional layers highway_dropout: the ratio of dropout between highway layers, intermediate_dropout: the ratio of dropout between convolutional and highway layers on character level lstm_dropout: dropout ratio in word-level LSTM word_vectorizers: list of parameters for additional word-level vectorizers, for each vectorizer it stores a pair of vectorizer dimension and the dimension of the corresponding word embedding word_lstm_layers: the number of word-level LSTM layers word_lstm_units: hidden dimensions of word-level LSTMs word_dropout: the ratio of dropout before word level (it is applied to word embeddings) regularizer: l2 regularization parameter verbose: the level of verbosity """ def __init__(self, symbols: DefaultVocabulary, tags: DefaultVocabulary, word_rnn: str = "cnn", char_embeddings_size: int = 16, char_conv_layers: int = 1, char_window_size: Union[int, List[int]] = 5, char_filters: Union[int, List[int]] = None, char_filter_multiple: int = 25, char_highway_layers: int = 1, conv_dropout: float = 0.0, highway_dropout: float = 0.0, intermediate_dropout: float = 0.0, lstm_dropout: float = 0.0, word_vectorizers: List[Tuple[int, int]] = None, word_lstm_layers: int = 1, word_lstm_units: Union[int, List[int]] = 128, word_dropout: float = 0.0, regularizer: float = None, verbose: int = 1): self.symbols = symbols self.tags = tags self.word_rnn = word_rnn self.char_embeddings_size = char_embeddings_size self.char_conv_layers = char_conv_layers self.char_window_size = char_window_size self.char_filters = char_filters self.char_filter_multiple = char_filter_multiple self.char_highway_layers = char_highway_layers self.conv_dropout = conv_dropout self.highway_dropout = highway_dropout self.intermediate_dropout = intermediate_dropout self.lstm_dropout = lstm_dropout self.word_dropout = word_dropout self.word_vectorizers = word_vectorizers # a list of additional vectorizer dimensions self.word_lstm_layers = word_lstm_layers self.word_lstm_units = word_lstm_units self.regularizer = regularizer self.verbose = verbose self._initialize() self.build() def _initialize(self): if isinstance(self.char_window_size, int): self.char_window_size = [self.char_window_size] if self.char_filters is None or isinstance(self.char_filters, int): self.char_filters = [self.char_filters] * len(self.char_window_size) if len(self.char_window_size) != len(self.char_filters): raise ValueError("There should be the same number of window sizes and filter sizes") if isinstance(self.word_lstm_units, int): self.word_lstm_units = [self.word_lstm_units] * self.word_lstm_layers if len(self.word_lstm_units) != self.word_lstm_layers: raise ValueError("There should be the same number of lstm layer units and lstm layers") if self.word_vectorizers is None: self.word_vectorizers = [] if self.regularizer is not None: self.regularizer = kreg.l2(self.regularizer) if self.verbose > 0: log.info("{} symbols, {} tags in CharacterTagger".format(self.symbols_number_, self.tags_number_)) @property def symbols_number_(self) -> int: """Character vocabulary size """ return len(self.symbols) @property def tags_number_(self) -> int: """Tag vocabulary size """ return len(self.tags) def build(self): """Builds the network using Keras. """ word_inputs = kl.Input(shape=(None, MAX_WORD_LENGTH+2), dtype="int32") inputs = [word_inputs] word_outputs = self._build_word_cnn(word_inputs) if len(self.word_vectorizers) > 0: additional_word_inputs = [kl.Input(shape=(None, input_dim), dtype="float32") for input_dim, dense_dim in self.word_vectorizers] inputs.extend(additional_word_inputs) additional_word_embeddings = [kl.Dense(dense_dim)(additional_word_inputs[i]) for i, (_, dense_dim) in enumerate(self.word_vectorizers)] word_outputs = kl.Concatenate()([word_outputs] + additional_word_embeddings) outputs, lstm_outputs = self._build_basic_network(word_outputs) compile_args = {"optimizer": ko.nadam(lr=0.002, clipnorm=5.0), "loss": "categorical_crossentropy", "metrics": ["accuracy"]} self.model_ = Model(inputs, outputs) self.model_.compile(**compile_args) if self.verbose > 0: self.model_.summary(print_fn=log.info) return self def _build_word_cnn(self, inputs): """Builds word-level network """ inputs = kl.Lambda(kb.one_hot, arguments={"num_classes": self.symbols_number_}, output_shape=lambda x: tuple(x) + (self.symbols_number_,))(inputs) char_embeddings = kl.Dense(self.char_embeddings_size, use_bias=False)(inputs) conv_outputs = [] self.char_output_dim_ = 0 for window_size, filters_number in zip(self.char_window_size, self.char_filters): curr_output = char_embeddings curr_filters_number = (min(self.char_filter_multiple * window_size, 200) if filters_number is None else filters_number) for _ in range(self.char_conv_layers - 1): curr_output = kl.Conv2D(curr_filters_number, (1, window_size), padding="same", activation="relu", data_format="channels_last")(curr_output) if self.conv_dropout > 0.0: curr_output = kl.Dropout(self.conv_dropout)(curr_output) curr_output = kl.Conv2D(curr_filters_number, (1, window_size), padding="same", activation="relu", data_format="channels_last")(curr_output) conv_outputs.append(curr_output) self.char_output_dim_ += curr_filters_number if len(conv_outputs) > 1: conv_output = kl.Concatenate(axis=-1)(conv_outputs) else: conv_output = conv_outputs[0] highway_input = kl.Lambda(kb.max, arguments={"axis": -2})(conv_output) if self.intermediate_dropout > 0.0: highway_input = kl.Dropout(self.intermediate_dropout)(highway_input) for i in range(self.char_highway_layers - 1): highway_input = Highway(activation="relu")(highway_input) if self.highway_dropout > 0.0: highway_input = kl.Dropout(self.highway_dropout)(highway_input) highway_output = Highway(activation="relu")(highway_input) return highway_output def _build_basic_network(self, word_outputs): """ Creates the basic network architecture, transforming word embeddings to intermediate outputs """ if self.word_dropout > 0.0: lstm_outputs = kl.Dropout(self.word_dropout)(word_outputs) else: lstm_outputs = word_outputs for j in range(self.word_lstm_layers-1): lstm_outputs = kl.Bidirectional( kl.LSTM(self.word_lstm_units[j], return_sequences=True, dropout=self.lstm_dropout))(lstm_outputs) lstm_outputs = kl.Bidirectional( kl.LSTM(self.word_lstm_units[-1], return_sequences=True, dropout=self.lstm_dropout))(lstm_outputs) pre_outputs = kl.TimeDistributed( kl.Dense(self.tags_number_, activation="softmax", activity_regularizer=self.regularizer), name="p")(lstm_outputs) return pre_outputs, lstm_outputs def _transform_batch(self, data, labels=None, transform_to_one_hot=True): data, additional_data = data[0], data[1:] L = max(len(x) for x in data) X = np.array([self._make_sent_vector(x, L) for x in data]) X = [X] + [np.array(x) for x in additional_data] if labels is not None: Y = np.array([self._make_tags_vector(y, L) for y in labels]) if transform_to_one_hot: Y = to_one_hot(Y, len(self.tags)) return X, Y else: return X def train_on_batch(self, data: List[Iterable], labels: Iterable[list]) -> None: """Trains model on a single batch Args: data: a batch of word sequences labels: a batch of correct tag sequences Returns: the trained model """ X, Y = self._transform_batch(data, labels) self.model_.train_on_batch(X, Y) def predict_on_batch(self, data: Union[list, tuple], return_indexes: bool = False) -> List[List[str]]: """ Makes predictions on a single batch Args: data: a batch of word sequences together with additional inputs return_indexes: whether to return tag indexes in vocabulary or tags themselves Returns: a batch of label sequences """ X = self._transform_batch(data) objects_number, lengths = len(X[0]), [len(elem) for elem in data[0]] Y = self.model_.predict_on_batch(X) labels = np.argmax(Y, axis=-1) answer: List[List[str]] = [None] * objects_number for i, (elem, length) in enumerate(zip(labels, lengths)): elem = elem[:length] answer[i] = elem if return_indexes else self.tags.idxs2toks(elem) return answer def _make_sent_vector(self, sent: List, bucket_length: int =None) -> np.ndarray: """Transforms a sentence to Numpy array, which will be the network input. Args: sent: input sentence bucket_length: the width of the bucket Returns: A 3d array, answer[i][j][k] contains the index of k-th letter in j-th word of i-th input sentence. """ bucket_length = bucket_length or len(sent) answer = np.zeros(shape=(bucket_length, MAX_WORD_LENGTH+2), dtype=np.int32) for i, word in enumerate(sent): answer[i, 0] = self.tags.tok2idx("BEGIN") m = min(len(word), MAX_WORD_LENGTH) for j, x in enumerate(word[-m:]): answer[i, j+1] = self.symbols.tok2idx(x) answer[i, m+1] = self.tags.tok2idx("END") answer[i, m+2:] = self.tags.tok2idx("PAD") return answer def _make_tags_vector(self, tags, bucket_length=None) -> np.ndarray: """Transforms a sentence of tags to Numpy array, which will be the network target. Args: tags: input sentence of tags bucket_length: the width of the bucket Returns: A 2d array, answer[i][j] contains the index of j-th tag in i-th input sentence. """ bucket_length = bucket_length or len(tags) answer = np.zeros(shape=(bucket_length,), dtype=np.int32) for i, tag in enumerate(tags): answer[i] = self.tags.tok2idx(tag) return answer def save(self, outfile) -> None: """Saves model weights to a file Args: outfile: file with model weights (other model components should be given in config) """ self.model_.save_weights(outfile) def load(self, infile) -> None: """Loads model weights from a file Args: infile: file to load model weights from """ self.model_.load_weights(infile)
class PolicyValueNetwork: """ AlphaZero Residual-CNN """ def __init__(self, model_file=None): # Build Network Architecture input_shape = Board().encoded_states().shape # (6, 15, 15) inputs = Input(input_shape) shared_net = Sequential([ *ConvBlock(32, input_shape=input_shape), *ConvBlock(64), *ConvBlock(128) ], "shared_net") policy_head = Sequential([ shared_net, *ConvBlock(4, (1, 1), "relu"), Flatten(), Dense(Game["board_size"], kernel_regularizer=l2()), Activation("softmax") ], "policy_head") value_head = Sequential([ shared_net, *ConvBlock(2, (1, 1), "relu"), Flatten(), Dense(64, activation="relu", kernel_regularizer=l2()), Dense(1, kernel_regularizer=l2()), Activation("tanh") ], "value_head") self.model = Model( inputs, [value_head(inputs), policy_head(inputs)] ) if model_file is not None: self.restore_model(model_file) def compile(self, opt): """ Optimization and Loss definition """ self.model.compile( optimizer=sgd(), loss=["mse", "categorical_crossentropy"] ) def eval_state(self, state): """ Evaluate a board state. """ vp = self.model.predict_on_batch(state.encoded_states()[np.newaxis, :]) # format to (float, np.array((255,1),dtype=float)) structure return vp[0][0][0], vp[1][0] def train_step(self, optimizer): """ One Network Tranning step. """ opt = self.model.optimizer K.set_value(opt.lr, optimizer["lr"]) K.set_value(opt.momentum, optimizer["momentum"]) # loss = self.model.train_on_batch(inputs, [winner, probs]) # return loss def save_model(self, filename): base_path = "{}/keras".format(TRAINING_CONFIG["model_path"]) if not os.path.exists(base_path): os.mkdir(base_path) self.model.save_weights("{}/{}.h5".format(base_path, filename)) def restore_model(self, filename): base_path = "{}/keras".format(TRAINING_CONFIG["model_path"]) if os.path.exists("{}/{}.h5".format(base_path, filename)): self.model.load_weights("{}/{}.h5".format(base_path, filename))