def main(): print("Transforming Data...") train_data = transform_data("../data/dataset/image_patch/Train/") test_data = transform_data("../data/dataset/image_patch/Test/") train_data_noise = transform_data( "../data/dataset/image_patch_noise/Train/") test_data_noise = transform_data("../data/dataset/image_patch_noise/Test/") print("Training/Running model...") input_img = Input(shape=(256, 256, 1)) output = denoise_model(input_img) auto_encoder = Model(input_img, output) auto_encoder.compile(optimizer='Adam', loss='mse') auto_encoder.fit(train_data_noise, train_data, epochs=10, batch_size=8, shuffle=True, validation_data=(test_data_noise, test_data)) print("Saving trained model weights...") auto_encoder.save("../model_new.h5")
def main(_): print("Getting hyperparameters ...") print("Using command {}".format(" ".join(sys.argv))) flag_values_dict = FLAGS.flag_values_dict() for flag_name in sorted(flag_values_dict.keys()): flag_value = flag_values_dict[flag_name] print(flag_name, flag_value) model_file_path = FLAGS.model_file_path suffix = FLAGS.suffix print("Loading the model from training ...") model = load_model(model_file_path, custom_objects={ "tf": tf, "swish": tf.nn.swish }, compile=False) inference_model_file_path = os.path.abspath( os.path.join(model_file_path, "../inference_{}.h5".format(suffix))) print("Saving the model for inference to {} ...".format( inference_model_file_path)) inference_model = Model(inputs=[model.input], outputs=model.output[1:]) if os.path.isfile(inference_model_file_path): os.remove(inference_model_file_path) inference_model.save(inference_model_file_path) print("All done!")
def save_model(self, model: Model, variat: Variat, score: float, description: str) -> str: test_descriptive_label = datetime.now().strftime( "%Y%m%d%H%M%S") + '-' + description + '-' + str(score) model_directory = os.getcwd( ) + '\\' + 'network_models\\borova\\' + test_descriptive_label os.mkdir(model_directory) model.save(filepath=model_directory, save_format='tf') with open(model_directory + '\\variat.json', 'w') as f: yaml.dump(variat, f) return model_directory
def export_model(model_file): input = Input(shape=(None, int_char_corr.num_vocab)) gru_out = GRU(512)(input) y = Dense(32, activation='relu')(gru_out) y = Dropout(0.5)(y) y = Dense(32, activation='relu')(y) y = Dropout(0.5)(y) output = Dense(1, activation='softmax')(y) model = Model(input, output) model.compile(loss='binary_crossentropy', optimizer='adagrad') model.save(model_file)
def train_raw(): # show class indices print('****************') for cls, idx in train_batches.class_indices.items(): print('Class #{} = {}'.format(idx, cls)) print('****************') # build our classifier model based on pre-trained InceptionResNetV2: # 1. we don't include the top (fully connected) layers of InceptionResNetV2 # 2. we add a DropOut layer followed by a Dense (fully connected) # layer which generates softmax class score for each class # 3. we compile the final model using an Adam optimizer, with a # low learning rate (since we are 'fine-tuning') net = InceptionResNetV2(include_top=False, weights='imagenet', input_tensor=None, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)) x = net.output x = Flatten()(x) x = Dropout(0.5)(x) output_layer = Dense(NUM_CLASSES, activation='softmax', name='softmax')(x) net_final = Model(inputs=net.input, outputs=output_layer) for layer in net_final.layers[:FREEZE_LAYERS]: layer.trainable = False for layer in net_final.layers[FREEZE_LAYERS:]: layer.trainable = True net_final.compile(optimizer=Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy']) #print(net_final.summary()) # train the model for i in range(1): net_final.fit_generator( train_batches, steps_per_epoch=train_batches.samples // BATCH_SIZE // 10, validation_data=valid_batches, validation_steps=valid_batches.samples // BATCH_SIZE // 10, epochs=1) gen_sub(net_final, testdf, sn=i) WEIGHTS_FINAL = f'./output/model-inception_resnet_v{i}-27.h5' # save trained weights net_final.save(WEIGHTS_FINAL) print(f'weight save to {WEIGHTS_FINAL}') return WEIGHTS_FINAL
def _model(args, feature1, feature2, labels): ''' Creates, trains and saves the model ''' # creating model A1 = Input(shape=(2048,),name='A1') model1 = Model(A1) B1 = Input(shape=(1,),name='B1') B2 = Dense(32, activation=None,name='B2')(B1) model2 = Model(B1,B2) concatenated = concatenate([A1, B2]) dense1 = Dense(1000, activation='relu',name='dense1')(concatenated) dense2 = Dense(1000, activation='relu',name='dense2')(dense1) dense3 = Dense(1, activation=None,name='dense3')(dense2) model = Model([A1, B1], dense3) # hyperparameters for training lr = args.lr batch_size = args.batch_size epochs = args.num_epochs decay_ratio = args.decay_ratio decay_rate = lr/decay_ratio # Defining optimizer if args.optimizer == 'adam': optimizer = Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=None, decay=decay_rate, amsgrad=False) elif args.optimizer == 'adagrad': optimizer= Adagrad(lr=lr,epsilon=1e-08, decay=decay_rate) # Compiling model model.compile(optimizer=optimizer, loss='mean_absolute_error', metrics=['accuracy']) # Training model start = time.time() model.fit({'A1':feature1,'B1':feature2},{'dense3':labels}, batch_size=batch_size, epochs=epochs, verbose=1) end = time.time() print('Time elapsed: %f'%(end-start)) model.save(args.save_path + '_ep' + str(epochs) + '_lr' + str(lr) + '_bs' + str(batch_size)) print('model training completed')
def build_model(name, nb_policy, height, width, depth, nb_resnet=8, **config): if config == {}: config = default_config #common core in_x = x = Input((height, width, depth)) x = _build_normal_block(x, config) for _ in range(nb_resnet): x = _build_residual_block(x, config) res_out = x #policy branch x = Conv2D(filters=2, kernel_size=1, strides=(1, 1), padding="same", kernel_regularizer=l2(1e-4))(res_out) x = BatchNormalization()(x) x = Activation("relu")(x) x = Flatten()(x) policy_out = Dense(nb_policy, kernel_regularizer=l2(1e-4), activation="softmax", name="policy_out")(x) #value branch x = Conv2D(filters=1, kernel_size=1, strides=(1, 1), padding="same", kernel_regularizer=l2(1e-4))(res_out) x = BatchNormalization()(x) x = Activation("relu")(x) x = Flatten()(x) x = Dense(256, kernel_regularizer=l2(1e-4), activation="relu")(x) value_out = Dense(1, kernel_regularizer=l2(1e-4), activation="sigmoid", name="value_out")(x) #compile and save mod = Model(in_x, [policy_out, value_out], name=name) mod.compile(optimizer="sgd", loss=[categorical_crossentropy, mean_squared_error]) mod.save(name) return mod
def model1(x_train, y_train, x_test, y_test): #200维度的三元组(head,relation,tail) inputs = keras.Input(shape=( 100, 3, 1, )) cnn1 = Conv2D(filters=50, kernel_initializer=keras.initializers.TruncatedNormal( mean=0.0, stddev=0.05, seed=None), kernel_size=(1, 3), padding='valid', strides=1, activation='relu')(inputs) flat = Flatten()(cnn1) drop = Dropout(0.2)(flat) #out1 = Dense(units=1,use_bias=False,kernel_regularizer=keras.regularizers.l2(0.0005))(drop) out1 = Dense(units=1, use_bias=False)(drop) # net model1 = Model(inputs, out1) #model1.compile() #model1.summary() #ou1_output=model1.predict(x_train,) #shape为(-1,1) model1.compile(loss=myLoss, optimizer=Adam(6e-6)) model1.summary() loss_value = [] history = model1.fit(x_train, y_train, batch_size=30, epochs=200, validation_data=(x_test, y_test)) # plot history pyplot.plot(history.history['loss'], label='train') pyplot.plot(history.history['val_loss'], label='valid') pyplot.legend() pyplot.show() model1.save('../data/modelFile/originalConvKB_onlyType13_%s_%s.h5' % (round(history.history['loss'][-1], 4), round(history.history['val_loss'][-1], 4)))
def AE_train(encoding_dim, x_train, epochs_num): # 编码层 input_data = Input(shape=[29]) encoded = Dense(24, activation='relu')(input_data) encoded = Dense(16, activation='relu')(encoded) encoded = Dense(8, activation='relu')(encoded) encoder_output = Dense(encoding_dim)(encoded) # 解码层 decoded = Dense(8, activation='relu')(encoder_output) decoded = Dense(16, activation='relu')(decoded) decoded = Dense(24, activation='relu')(decoded) decoded = Dense(29, activation='tanh')(decoded) autoencoder = Model(inputs=input_data, outputs=decoded) encoder = Model(inputs=input_data, outputs=encoder_output) autoencoder.compile(optimizer='adam', loss='mse') def step_decay(epoch): initial_lrate = 0.01 drop = 0.5 epochs_drop = 10.0 _lrate = initial_lrate * math.pow( drop, math.floor((1 + epoch) / epochs_drop)) return _lrate lrate = LearningRateScheduler(step_decay) history = autoencoder.fit(x_train, x_train, epochs=epochs_num, batch_size=256, callbacks=[lrate]) loss = history.history['loss'] epochs = range(1, epochs_num + 1) plt.title('Loss') plt.plot(epochs, loss, 'blue', label='loss') plt.legend() plt.show() encoder.save("encoder_model.h5")
def save_keras_model(model: Model, path: str, fmt: str = None, tf_serving_version: int = None) -> None: logging.getLogger("tensorflow").setLevel(logging.WARNING) if fmt: keras_model_path = get_keras_model_path(path=path, format=fmt) logger.info(f"saving keras model to path:{keras_model_path}") dir_path = os.path.dirname(keras_model_path) if not os.path.exists(dir_path): os.makedirs(dir_path) model.save(keras_model_path, include_optimizer=False, save_format=fmt) if tf_serving_version: tf_serving_model_path = get_tf_serving_model_path(path=path, tf_serving_version=tf_serving_version) dir_path = os.path.dirname(tf_serving_model_path) if not os.path.exists(dir_path): os.makedirs(dir_path) logger.info(f"saving tf serving model to path:{tf_serving_model_path}") model.save(tf_serving_model_path, save_format="tf") logger.info(f"compress... tf serving model(for euler deployment)...") cmd = f"cd {os.path.dirname(tf_serving_model_path)}; tar czvf {tf_serving_version}.tar.gz {tf_serving_version}" execute_cmd(cmd)
y_train = np_utils.to_categorical(y_train, num_classes) y_test = np_utils.to_categorical(y_test, num_classes) X_train = X_train.astype("float") / 255.0 X_test = X_test.astype("float") / 255.0 model = VGG16(weights='imagenet', include_top=False, input_shape=(image_size, image_size, 3)) top_model = Sequential() top_model.add(Flatten(input_shape=model.output_shape[1:])) top_model.add(Dense(256, activation='relu')) top_model.add(Dropout(0.5)) top_model.add(Dense(num_classes, activation="softmax")) model = Model(inputs=model.input, outputs=top_model(model.output)) for layer in model.layers[:15]: layer.trainable = False opt = Adam(lr=0.0001) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) model.fit(X_train, y_train, batch_size=32, epochs=17) score = model.evaluate(X_test, y_test, batch_size=32) model.save('./vgg16_transfer.h5')
def iterative_prune_model(): # build the inception v3 network base_model = inception_v3.InceptionV3(include_top=False, weights='imagenet', pooling='avg', input_shape=(299, 299, 3)) print('Model loaded.') top_output = Dense(5, activation='softmax')(base_model.output) # add the model on top of the convolutional base model = Model(base_model.inputs, top_output) del base_model model.load_weights(tuned_weights_path) # compile the model with a SGD/momentum optimizer # and a very slow learning rate. model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=1e-4, momentum=0.9), metrics=['accuracy']) # Set up data generators train_datagen = ImageDataGenerator( preprocessing_function=inception_v3.preprocess_input, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) train_generator = train_datagen.flow_from_directory( train_data_dir, target_size=(img_height, img_width), batch_size=batch_size, class_mode='categorical') train_steps = train_generator.n // train_generator.batch_size test_datagen = ImageDataGenerator( preprocessing_function=inception_v3.preprocess_input) validation_generator = test_datagen.flow_from_directory( validation_data_dir, target_size=(img_height, img_width), batch_size=val_batch_size, class_mode='categorical') val_steps = validation_generator.n // validation_generator.batch_size # Evaluate the model performance before pruning loss = model.evaluate_generator(validation_generator, validation_generator.n // validation_generator.batch_size) print('original model validation loss: ', loss[0], ', acc: ', loss[1]) total_channels = get_total_channels(model) n_channels_delete = int(math.floor(percent_pruning / 100 * total_channels)) # Incrementally prune the network, retraining it each time percent_pruned = 0 # If percent_pruned > 0, continue pruning from previous checkpoint if percent_pruned > 0: checkpoint_name = ('inception_flowers_pruning_' + str(percent_pruned) + 'percent') model = load_model(output_dir + checkpoint_name + '.h5') while percent_pruned <= total_percent_pruning: # Prune the model apoz_df = get_model_apoz(model, validation_generator) percent_pruned += percent_pruning print('pruning up to ', str(percent_pruned), '% of the original model weights') model = prune_model(model, apoz_df, n_channels_delete) # Clean up tensorflow session after pruning and re-load model checkpoint_name = ('inception_flowers_pruning_' + str(percent_pruned) + 'percent') model.save(output_dir + checkpoint_name + '.h5') del model tensorflow.python.keras.backend.clear_session() tf.reset_default_graph() model = load_model(output_dir + checkpoint_name + '.h5') # Re-train the model model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=1e-4, momentum=0.9), metrics=['accuracy']) checkpoint_name = ('inception_flowers_pruning_' + str(percent_pruned) + 'percent') csv_logger = CSVLogger(output_dir + checkpoint_name + '.csv') model.fit_generator(train_generator, steps_per_epoch=train_steps, epochs=epochs, validation_data=validation_generator, validation_steps=val_steps, workers=4, callbacks=[csv_logger]) # Evaluate the final model performance loss = model.evaluate_generator(validation_generator, validation_generator.n // validation_generator.batch_size) print('pruned model loss: ', loss[0], ', acc: ', loss[1])
def fit(self, learning_rate=1e-4, epochs=5, activation='relu', dropout=0, hidden_size=1024, nb_layers=1, include_class_weight=False, batch_size=20, save_model=False, verbose=True, fine_tuning=False, NB_IV3_LAYERS_TO_FREEZE=279, use_TPU=False, transfer_model='Inception', min_accuracy=None, extract_SavedModel=False): if transfer_model in ['Inception', 'Xception', 'Inception_Resnet']: target_size = (299, 299) else: target_size = (224, 224) #We expect the classes to be the name of the folders in the training set self.categories = os.listdir(TRAIN_DIR) """ helper functions to to build tensors inspired by https://www.tensorflow.org/tutorials/load_data/images """ def prepare_image(img_path): #reshape the image image = Image.open(img_path) image = image.resize(target_size, PIL.Image.BILINEAR).convert("RGB") #convert the image into a numpy array, and expend to a size 4 tensor image = img_to_array(image) #rescale the pixels to a 0-1 range image = image.astype(np.float32) / 255 return image def generate_tuples(img_folder): #loop through all the images # Get all file names of images present in folder classes = os.listdir(img_folder) classes_paths = [ os.path.abspath(os.path.join(img_folder, i)) for i in classes ] x = [] y = [] for i, j in enumerate(classes): #for all the classes, get the list of pictures img_paths = os.listdir(classes_paths[i]) img_paths = [ os.path.abspath(os.path.join(classes_paths[i], x)) for x in img_paths ] for img_path in img_paths: x.append(prepare_image(img_path)) y = y + [i] return (np.array(x), np.array(y).astype(np.int32)) #get training data (x_train, y_train) = generate_tuples(parentdir + '/data/image_dataset/train') (x_val, y_val) = generate_tuples(parentdir + '/data/image_dataset/val') #train input_function: see https://colab.research.google.com/drive/1F8txK1JLXKtAkcvSRQz2o7NSTNoksuU2#scrollTo=abbwQQfH0td3 def get_training_dataset(batch_size=batch_size): # Convert the inputs to a Dataset. dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) # Shuffle, repeat, and batch the examples. dataset = dataset.shuffle(1000).repeat().batch(batch_size, drop_remainder=True) return dataset def get_validation_dataset(batch_size=batch_size): # Convert the inputs to a Dataset. dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)) # Shuffle, repeat, and batch the examples. dataset = dataset.shuffle(1000).repeat().batch(batch_size, drop_remainder=True) return dataset #if we want stop training when no sufficient improvement in accuracy has been achieved if min_accuracy is not None: callback = EarlyStopping(monitor='acc', baseline=min_accuracy) callback = [callback] else: callback = None #load the pretrained model, without the classification (top) layers if transfer_model == 'Xception': base_model = Xception(weights='imagenet', include_top=False, input_shape=(299, 299, 3)) elif transfer_model == 'Inception_Resnet': base_model = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(299, 299, 3)) elif transfer_model == 'Resnet': base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) else: base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3)) #Add the classification layers using Keras functional API x = base_model.output x = GlobalAveragePooling2D()(x) for _ in range(nb_layers): x = Dense(hidden_size, activation=activation)( x) #Hidden layer for classification if dropout > 0: x = Dropout(rate=dropout)(x) predictions = Dense(len(self.categories), activation='softmax')(x) #Output layer model = Model(inputs=base_model.input, outputs=predictions) #Set only the top layers as trainable (if we want to do fine-tuning, #we can train the base layers as a second step) for layer in base_model.layers: layer.trainable = False #Define the optimizer and the loss, and compile the model loss = 'sparse_categorical_crossentropy' if use_TPU: #if we want to try out the TPU, it looks like we currently need to use #tensorflow optimizers...see https://stackoverflow.com/questions/52940552/valueerror-operation-utpu-140462710602256-varisinitializedop-has-been-marked #...and https://www.youtube.com/watch?v=jgNwywYcH4w optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) model.compile(optimizer=optimizer, loss=sparse_softmax_cross_entropy, metrics=['acc']) TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR'] model = tf.contrib.tpu.keras_to_tpu_model( model, strategy=tf.contrib.tpu.TPUDistributionStrategy( tf.contrib.cluster_resolver.TPUClusterResolver( TPU_WORKER))) tf.logging.set_verbosity(tf.logging.INFO) else: optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) model.compile(optimizer=optimizer, loss=loss, metrics=['acc']) #if we want to weight the classes given the imbalanced number of images if include_class_weight: from sklearn.utils.class_weight import compute_class_weight cls_train = self.categories class_weight = compute_class_weight(class_weight='balanced', classes=np.unique(cls_train), y=cls_train) else: class_weight = None steps_per_epoch = int( sum([ len(files) for r, d, files in os.walk(parentdir + '/data/image_dataset/train') ]) / batch_size) validation_steps = int( sum([ len(files) for r, d, files in os.walk(parentdir + '/data/image_dataset/val') ]) / batch_size) #Fit the model if use_TPU: history = model.fit(get_training_dataset, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=get_validation_dataset, validation_steps=validation_steps, verbose=verbose, callbacks=callback, class_weight=class_weight) else: history = model.fit(get_training_dataset(), steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=get_validation_dataset(), validation_steps=validation_steps, verbose=verbose, callbacks=callback, class_weight=class_weight) #Fine-tune the model, if we wish so if fine_tuning and not model.stop_training: print('============') print('Begin fine-tuning') print('============') #declare the first layers as trainable for layer in model.layers[:NB_IV3_LAYERS_TO_FREEZE]: layer.trainable = False for layer in model.layers[NB_IV3_LAYERS_TO_FREEZE:]: layer.trainable = True model.compile(optimizer=tf.train.AdamOptimizer( learning_rate=learning_rate * 0.1), loss=loss, metrics=['acc']) #Fit the model if use_TPU: history = model.fit(get_training_dataset, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=get_validation_dataset, validation_steps=validation_steps, verbose=verbose, callbacks=callback, class_weight=class_weight) else: history = model.fit(get_training_dataset(), steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=get_validation_dataset(), validation_steps=validation_steps, verbose=verbose, callbacks=callback, class_weight=class_weight) #Evaluate the model, just to be sure self.fitness = history.history['val_categorical_accuracy'][-1] #Save the model if save_model: if not os.path.exists(parentdir + '/data/trained_models'): os.makedirs(parentdir + '/data/trained_models') model.save(parentdir + '/data/trained_models/trained_model.h5') print('Model saved!') #save model in production format if extract_SavedModel: export_path = "./image_classifier/1/" with K.get_session() as sess: tf.saved_model.simple_save( sess, export_path, inputs={'input_image': model.input}, outputs={t.name: t for t in model.outputs}) else: self.model = model del history del model
class JointBertCRFModel(JointBertModel): def __init__(self, slots_num, intents_num, bert_hub_path, sess, num_bert_fine_tune_layers=10, is_bert=True, is_crf=True, learning_rate=5e-5): super(JointBertCRFModel, self).__init__(slots_num, intents_num, bert_hub_path, sess, num_bert_fine_tune_layers, is_bert, is_crf, learning_rate) def compile_model(self): # Instead of `using categorical_crossentropy`, # we use `sparse_categorical_crossentropy`, which does expect integer targets. optimizer = tf.keras.optimizers.Adam(lr=self.learning_rate) losses = { 'slots_tagger': self.crf.loss, 'intent_classifier': 'sparse_categorical_crossentropy', } loss_weights = {'slots_tagger': 3.0, 'intent_classifier': 1.0} metrics = {'intent_classifier': 'acc'} self.model.compile(optimizer=optimizer, loss=losses, loss_weights=loss_weights, metrics=metrics) self.model.summary() def build_model(self): in_id = Input(shape=(None,), name='input_ids') in_mask = Input(shape=(None,), name='input_masks') in_segment = Input(shape=(None,), name='segment_ids') in_valid_positions = Input(shape=(None, self.slots_num), name='valid_positions') sequence_lengths = Input(shape=(1), dtype='int32', name='sequence_lengths') bert_inputs = [in_id, in_mask, in_segment, in_valid_positions] if self.is_bert: bert_pooled_output, bert_sequence_output = BertLayer( n_fine_tune_layers=self.num_bert_fine_tune_layers, bert_path=self.bert_hub_path, pooling='mean', name='BertLayer')(bert_inputs) else: bert_pooled_output, bert_sequence_output = AlbertLayer( fine_tune=True if self.num_bert_fine_tune_layers > 0 else False, albert_path=self.bert_hub_path, pooling='mean', name='AlbertLayer')(bert_inputs) intents_fc = Dense(self.intents_num, activation='softmax', name='intent_classifier')(bert_pooled_output) self.crf = CRFLayer(name='slots_tagger') slots_output = self.crf(inputs=[bert_sequence_output, sequence_lengths]) self.model = Model(inputs=bert_inputs + [sequence_lengths], outputs=[slots_output, intents_fc]) def fit(self, X, Y, validation_data=None, epochs=5, batch_size=32): """ X: batch of [input_ids, input_mask, segment_ids, valid_positions] """ X = (X[0], X[1], X[2], self.prepare_valid_positions(X[3]), X[4]) if validation_data is not None: X_val, Y_val = validation_data validation_data = ((X_val[0], X_val[1], X_val[2], self.prepare_valid_positions(X_val[3]), X_val[4]), Y_val) self.model.fit(X, Y, validation_data=validation_data, epochs=epochs, batch_size=batch_size) def predict_slots_intent(self, x, slots_vectorizer, intent_vectorizer, remove_start_end=True): valid_positions = x[3] x = (x[0], x[1], x[2], self.prepare_valid_positions(valid_positions), x[4]) y_slots, y_intent = self.predict(x) slots = slots_vectorizer.inverse_transform(y_slots, valid_positions) if remove_start_end: slots = [x[1:-1] for x in slots] intents = np.array([intent_vectorizer.inverse_transform([np.argmax(y_intent[i])])[0] for i in range(y_intent.shape[0])]) return slots, intents def save(self, model_path): with open(os.path.join(model_path, 'params.json'), 'w') as json_file: json.dumps(self.model_params, json_file, indent=2) self.model.save(os.path.join(model_path, 'joint_bert_crf_model.h5')) def load(load_folder_path, sess): with open(os.path.join(load_folder_path, 'params.json'), 'r') as json_file: model_params = json.load(json_file) slots_num = model_params['slots_num'] intents_num = model_params['intents_num'] bert_hub_path = model_params['bert_hub_path'] num_bert_fine_tune_layers = model_params['num_bert_fine_tune_layers'] is_bert = model_params['is_bert'] if 'is_crf' in model_params: is_crf = model_params['is_crf'] else: is_crf = True if 'learning_rate' in model_params: learning_rate = model_params['learning_rate'] else: learning_rate = 5e-5 new_model = JointBertCRFModel(slots_num, intents_num, bert_hub_path, sess, num_bert_fine_tune_layers, is_bert, is_crf, learning_rate) new_model.model.load_weights(os.path.join(load_folder_path,'joint_bert_crf_model.h5')) return new_model
class JointBertModel1(NLUModel): def __init__(self, intents_num, bert_hub_path, num_bert_fine_tune_layers=10, is_bert=True): #self.slots_num = slots_num self.intents_num = intents_num self.bert_hub_path = bert_hub_path self.num_bert_fine_tune_layers = num_bert_fine_tune_layers self.is_bert = is_bert self.model_params = { 'intents_num': intents_num, 'bert_hub_path': bert_hub_path, 'num_bert_fine_tune_layers': num_bert_fine_tune_layers, 'is_bert': is_bert } self.build_model() self.compile_model() def compile_model(self): # Instead of `using categorical_crossentropy`, # we use `sparse_categorical_crossentropy`, which does expect integer targets. optimizer = tf.keras.optimizers.Adam(lr=5e-5) #0.001) losses = { 'intent_classifier': 'sparse_categorical_crossentropy', } loss_weights = {'intent_classifier': 1.0} metrics = {'intent_classifier': 'acc'} self.model.compile(optimizer=optimizer, loss=losses, loss_weights=loss_weights, metrics=metrics) self.model.summary() def build_model(self): in_id = Input(shape=(None, ), name='input_word_ids', dtype=tf.int32) in_mask = Input(shape=(None, ), name='input_mask', dtype=tf.int32) in_segment = Input(shape=(None, ), name='input_type_ids', dtype=tf.int32) #in_valid_positions = Input(shape=(None, self.slots_num), name='valid_positions') bert_inputs = [in_id, in_mask, in_segment] inputs = bert_inputs if self.is_bert: name = 'BertLayer' else: name = 'AlbertLayer' bert_pooled_output, bert_sequence_output = hub.KerasLayer( self.bert_hub_path, trainable=True, name=name)(bert_inputs) intents_fc = Dense(self.intents_num, activation='softmax', name='intent_classifier')(bert_pooled_output) self.model = Model(inputs=inputs, outputs=intents_fc) def fit(self, X, Y, validation_data=None, epochs=5, batch_size=32): """ X: batch of [input_ids, input_mask, segment_ids, valid_positions] """ X = (X[0], X[1], X[2]) if validation_data is not None: print("INSIDE") X_val, Y_val = validation_data validation_data = ((X_val[0], X_val[1], X_val[2]), Y_val) history = self.model.fit(X, Y, validation_data=validation_data, epochs=epochs, batch_size=batch_size) #self.visualize_metric(history.history, 'slots_tagger_loss') #self.visualize_metric(history.history, 'intent_classifier_loss') #self.visualize_metric(history.history, 'loss') #self.visualize_metric(history.history, 'intent_classifier_acc') def prepare_valid_positions(self, in_valid_positions): in_valid_positions = np.expand_dims(in_valid_positions, axis=2) in_valid_positions = np.tile(in_valid_positions, (1, 1, self.slots_num)) return in_valid_positions def predict_slots_intent(self, x, slots_vectorizer, intent_vectorizer, remove_start_end=True, include_intent_prob=False): valid_positions = x[3] x = (x[0], x[1], x[2], self.prepare_valid_positions(valid_positions)) y_slots, y_intent = self.predict(x) slots = slots_vectorizer.inverse_transform(y_slots, valid_positions) if remove_start_end: slots = [x[1:-1] for x in slots] if not include_intent_prob: intents = np.array([ intent_vectorizer.inverse_transform([np.argmax(i)])[0] for i in y_intent ]) else: intents = np.array([ (intent_vectorizer.inverse_transform([np.argmax(i)])[0], round(float(np.max(i)), 4)) for i in y_intent ]) return slots, intents def save(self, model_path): with open(os.path.join(model_path, 'params.json'), 'w') as json_file: json.dump(self.model_params, json_file) self.model.save(os.path.join(model_path, 'joint_bert_model.h5')) def load(load_folder_path): with open(os.path.join(load_folder_path, 'params.json'), 'r') as json_file: model_params = json.load(json_file) #slots_num = model_params['slots_num'] intents_num = model_params['intents_num'] bert_hub_path = model_params['bert_hub_path'] num_bert_fine_tune_layers = model_params['num_bert_fine_tune_layers'] is_bert = model_params['is_bert'] new_model = JointBertModel(intents_num, bert_hub_path, num_bert_fine_tune_layers, is_bert) new_model.model.load_weights( os.path.join(load_folder_path, 'joint_bert_model.h5')) return new_model
class MTmodel: def __init__(self, fl, mode, hparams, labels_norm=True): """ Initialises new DNN model based on input features_dim, labels_dim, hparams :param features_dim: Number of input feature nodes. Integer :param labels_dim: Number of output label nodes. Integer :param hparams: Dict containing hyperparameter information. Dict can be created using create_hparams() function. hparams includes: hidden_layers: List containing number of nodes in each hidden layer. [10, 20] means 10 then 20 nodes. """ self.features_dim = fl.features_c_dim self.labels_dim = [ 1 for _ in range(fl.labels_dim) ] # Assuming that each task has only 1 dimensional output self.hparams = hparams self.labels_norm = labels_norm features_in = Input(shape=(self.features_dim, ), name='main_features_c_input') # Selection of model if mode == 'hps': hps_model = hps(self.features_dim, self.labels_dim, self.hparams) x = hps_model(features_in) elif mode == 'cs': cs_model = cross_stitch(self.features_dim, self.labels_dim, self.hparams) x = cs_model(features_in) self.model = Model(inputs=features_in, outputs=x) self.model.compile(optimizer=hparams['optimizer'], loss='mean_squared_error') def train_model(self, fl, i_fl, save_name='mt.h5', save_dir='./save/models/', save_mode=False, plot_name=None): # Training model training_features = fl.features_c_norm if self.labels_norm: training_labels = fl.labels_norm.T.tolist() else: training_labels = fl.labels.T.tolist() if plot_name: history = self.model.fit(training_features, training_labels, epochs=self.hparams['epochs'], batch_size=self.hparams['batch_size'], verbose=self.hparams['verbose']) # Debugging check to see features and prediction # pprint.pprint(training_features) # pprint.pprint(self.model.predict(training_features)) # pprint.pprint(training_labels) # Saving Model # summarize history for accuracy plt.plot(history.history['loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train'], loc='upper left') plt.savefig(plot_name, bbox_inches='tight') plt.close() else: self.model.fit(training_features, training_labels, epochs=self.hparams['epochs'], batch_size=self.hparams['batch_size'], verbose=self.hparams['verbose']) if save_mode: self.model.save(save_dir + save_name) return self.model def eval(self, eval_fl): features = eval_fl.features_c_norm if self.labels_norm: labels = eval_fl.labels_norm.tolist() labels_actual = eval_fl.labels.tolist() predictions = self.model.predict(features) predictions = [prediction.T for prediction in predictions] predictions = np.vstack(predictions).T predictions = predictions.tolist() predictions_actual = eval_fl.labels_scaler.inverse_transform( predictions) # Calculating metrics mse = mean_squared_error(labels_actual, predictions_actual) mse_norm = mean_squared_error(labels, predictions) else: labels = eval_fl.labels.tolist() predictions = self.model.predict(features) predictions = [prediction.T for prediction in predictions] predictions = np.vstack(predictions).T predictions_actual = predictions.tolist() mse = mean_squared_error(labels, predictions_actual) mse_norm = mse return predictions_actual, mse, mse_norm
class Kmodel: def __init__(self, fl, mode, hparams): """ Initialises new DNN model based on input features_dim, labels_dim, hparams :param features_dim: Number of input feature nodes. Integer :param labels_dim: Number of output label nodes. Integer :param hparams: Dict containing hyperparameter information. Dict can be created using create_hparams() function. hparams includes: hidden_layers: List containing number of nodes in each hidden layer. [10, 20] means 10 then 20 nodes. """ self.features_dim = fl.features_c_dim self.labels_dim = fl.labels_dim # Assuming that each task has only 1 dimensional output self.hparams = hparams self.mode = mode self.normalise_labels = fl.normalise_labels self.labels_scaler = fl.labels_scaler features_in = Input(shape=(self.features_dim, ), name='main_features_c_input') # Selection of model if mode == 'ann': model = ann(self.features_dim, self.labels_dim, self.hparams) x = model(features_in) self.model = Model(inputs=features_in, outputs=x) elif mode == 'ann2': model_1 = ann(self.features_dim, 50, self.hparams) x = model_1(features_in) model_end = ann(50, 50, self.hparams) end = model_end(x) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) model_2 = ann(50, self.labels_dim - 1, self.hparams) x = model_2(x) self.model = Model(inputs=features_in, outputs=[end_node, x]) elif mode == 'ann3': x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(0))(features_in) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(2))(x) # x = BatchNormalization()(x) x = Dense(units=self.labels_dim, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Final')(x) self.model = Model(inputs=features_in, outputs=x) elif mode == 'conv1': if fl.label_type == 'gf20': final_dim = 20 else: final_dim = 19 x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='shared' + str(1))(features_in) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) #x = BatchNormalization()(x) x = Dense(units=final_dim, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_set_19')(x) #x = BatchNormalization()(x) x = Reshape(target_shape=(final_dim, 1))(x) x = Conv1D(filters=hparams['filters'], kernel_size=3, strides=1, padding='same', activation='relu')(x) #x = BatchNormalization()(x) x = Conv1D(filters=hparams['filters'] * 2, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = Conv1D(filters=hparams['filters'] * 4, kernel_size=3, strides=1, padding='same', activation='relu')(x) #x = Permute((2,1))(x) #x = GlobalAveragePooling1D()(x) x = TimeDistributed(Dense(1, activation='linear'))(x) x = Reshape(target_shape=(final_dim, ))(x) self.model = Model(inputs=features_in, outputs=x) elif mode == 'conv2': x = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(1))(features_in) x = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(2))(x) end = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(1))(x) end = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(2))(end) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) x = Dense(units=80, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Reshape(target_shape=(80, 1))(x) x = Conv1D(filters=8, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = MaxPooling1D(pool_size=2)(x) x = Conv1D(filters=16, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = MaxPooling1D(pool_size=2)(x) #x = Permute((2,1))(x) #x = GlobalAveragePooling1D()(x) x = TimeDistributed(Dense(1, activation='linear'))(x) x = Reshape(target_shape=(20, ))(x) self.model = Model(inputs=features_in, outputs=[end_node, x]) elif mode == 'lstm': x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(1))(features_in) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(2))(x) end = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(1))(x) end = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(2))(end) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(2))(x) x = RepeatVector(n=20)(x) x = LSTM(units=30, activation='relu', return_sequences=True)(x) x = LSTM(units=30, activation='relu', return_sequences=True)(x) x = TimeDistributed(Dense(1))(x) x = Reshape(target_shape=(20, ))(x) ''' x = Permute((2,1))(x) x = GlobalAveragePooling1D()(x) ''' self.model = Model(inputs=features_in, outputs=[end_node, x]) optimizer = Adam(learning_rate=hparams['learning_rate'], clipnorm=1) def weighted_mse(y_true, y_pred): loss_weights = np.sqrt(np.arange(1, 20)) #loss_weights = np.arange(1, 20) return K.mean(K.square(y_pred - y_true) * loss_weights, axis=-1) def haitao_error(y_true, y_pred): diff = K.abs( (y_true - y_pred) / K.reshape(K.clip(K.abs(y_true[:, -1]), K.epsilon(), None), (-1, 1))) return 100. * K.mean(diff, axis=-1) if hparams['loss'] == 'mape': self.model.compile(optimizer=optimizer, loss=MeanAbsolutePercentageError()) elif hparams['loss'] == 'haitao': self.model.compile(optimizer=optimizer, loss=haitao_error) elif hparams['loss'] == 'mse': self.model.compile(optimizer=optimizer, loss='mean_squared_error') #self.model.summary() def train_model(self, fl, i_fl, save_name='mt.h5', save_dir='./save/models/', save_mode=False, plot_name=None): # Training model training_features = fl.features_c_norm val_features = i_fl.features_c_norm if self.normalise_labels: training_labels = fl.labels_norm val_labels = i_fl.labels_norm else: training_labels = fl.labels val_labels = i_fl.labels # Plotting if plot_name: history = self.model.fit(training_features, training_labels, validation_data=(val_features, val_labels), epochs=self.hparams['epochs'], batch_size=self.hparams['batch_size'], verbose=self.hparams['verbose']) # Debugging check to see features and prediction # pprint.pprint(training_features) # pprint.pprint(self.model.predict(training_features)) # pprint.pprint(training_labels) # summarize history for accuracy plt.semilogy(history.history['loss'], label=['train']) plt.semilogy(history.history['val_loss'], label=['test']) plt.plot([], [], ' ', label='Final train: {:.3e}'.format( history.history['loss'][-1])) plt.plot([], [], ' ', label='Final val: {:.3e}'.format( history.history['val_loss'][-1])) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(loc='upper right') plt.savefig(plot_name, bbox_inches='tight') plt.close() else: history = self.model.fit(training_features, training_labels, epochs=self.hparams['epochs'], batch_size=self.hparams['batch_size'], verbose=self.hparams['verbose']) # Saving Model if save_mode: self.model.save(save_dir + save_name) return self.model, history def eval(self, eval_fl): features = eval_fl.features_c_norm predictions = self.model.predict(features) if self.normalise_labels: mse_norm = mean_squared_error(eval_fl.labels_norm, predictions) mse = mean_squared_error( eval_fl.labels, self.labels_scaler.inverse_transform(predictions)) else: mse = mean_squared_error(eval_fl.labels, predictions) mse_norm = mse return predictions, mse, mse_norm
class ConvMnist: def __init__(self, filename=None): ''' 学習済みモデルファイルをロードする (optional) ''' self.model = None if filename is not None: print('load model: ', filename) self.model = load_model(filename) self.model.summary() def train(self): ''' 学習する ''' # MNISTの学習用データ、テストデータをロードする (x_train_org, y_train), (x_test_org, y_test) = mnist.load_data() # 学習データの前処理 # X: 6000x28x28x1のTensorに変換し、値を0~1.0に正規化 # Y: one-hot化(6000x1 -> 6000x10) x_train = np.empty((x_train_org.shape[0], x_train_org.shape[1], x_train_org.shape[2], 3)) x_train[:, :, :, 0] = x_train_org x_train[:, :, :, 1] = x_train_org x_train[:, :, :, 2] = x_train_org x_test = np.empty( (x_test_org.shape[0], x_test_org.shape[1], x_test_org.shape[2], 3)) x_test[:, :, :, 0] = x_test_org x_test[:, :, :, 1] = x_test_org x_test[:, :, :, 2] = x_test_org x_train = x_train / 255. x_test = x_test / 255. y_train = to_categorical(y_train, 10) y_test = to_categorical(y_test, 10) # 学習状態は悪用のTensorBoard設定 # tsb = TensorBoard(log_dir='./logs') # Convolutionモデルの作成 input = Input(shape=(28, 28, 3)) conv1 = Conv2D(filters=8, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu')(input) pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) conv2 = Conv2D(filters=4, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu')(pool1) dropout1 = Dropout(0.2)(conv2) flatten1 = Flatten()(dropout1) output = Dense(units=10, activation='softmax')(flatten1) self.model = Model(inputs=[input], outputs=[output]) self.model.summary() self.model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # Convolutionモデルの学習 self.model.fit( x_train, y_train, batch_size=128, epochs=10, validation_split=0.2, # callbacks=[tsb], ) # 学習したモデルを使用して、テスト用データで評価する score = self.model.evaluate(x_test, y_test, verbose=0) print("test data score: ", score) def save_trained_model(self, filename): ''' 学習済みモデルをファイル(h5)に保存する ''' self.model.save(filename) def predict(self, input_image): ''' 1つのカラー入力画像(28x28のndarray)に対して、数字(0~9)を判定する ret: result, score ''' if input_image.shape != (28, 28, 3): return -1, -1 input_image = input_image.reshape(1, input_image.shape[0], input_image.shape[1], 3) input_image = input_image / 255. probs = self.model.predict(input_image) result = np.argmax(probs[0]) return result, probs[0][result]
def main(): from tensorflow.examples.tutorials.mnist import input_data data = input_data.read_data_sets("data/MNIST/", one_hot=True) # data_train = tfds.load(name="mnist", split="train") # data_test = tfds.load(name="mnist", split="test") print("Size of:") print("- Training-set:\t\t{}".format(len(data.train.labels))) print("- Test-set:\t\t{}".format(data.test.labels)) # Get the first images from the test-set. data.test.cls = np.array([label.argmax() for label in data.test.labels]) # images = data.x_test[0:9] images = data.test.images[0:9] #Get the true classes # cls_true = data.y_test_cls[0:9] cls_true = data.test.cls[0:9] # Plot the images and labels using our helper-function above. plot_images(images=images, cls_true=cls_true) if using_seq_model: model = Sequential() # Add an input layer which is similar to a feed_dict in TensorFlow. # Note that the input-shape must be a tuple containing the image-size. model.add(InputLayer(input_shape=(img_size_flat, ))) # The input is a flattened array with 784 elements, # but the convolutional layers expect images with shape (28, 28, 1) model.add(Reshape(img_shape_full)) # x = tf.placeholder(tf.float32, shape=[None, img_size_flat], name='x') # x_image = tf.reshape(x, [-1, img_size, img_size, num_channels]) # y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true') # y_true_cls = tf.argmax(y_true, axis=1) # First convolutional layer with ReLU-activation and max-pooling. model.add( Conv2D(kernel_size=5, strides=1, filters=16, padding='same', activation='relu', name='layer_conv1')) model.add(MaxPooling2D(pool_size=2, strides=2)) # layer_conv1, weights_conv1 = new_conv_layer(input=x_image, # num_input_channels=num_channels, # filter_size=filter_size1, # num_filters=num_filters1, # use_pooling=True) # print (layer_conv1) # Second convolutional layer with ReLU-activation and max-pooling. model.add( Conv2D(kernel_size=5, strides=1, filters=36, padding='same', activation='relu', name='layer_conv2')) model.add(MaxPooling2D(pool_size=2, strides=2)) # layer_conv2, weights_conv2 = new_conv_layer(input=layer_conv1, # num_input_channels=num_filters1, # filter_size=filter_size2, # num_filters=num_filters2, # use_pooling=True) # print (layer_conv2) # Flatten the 4-rank output of the convolutional layers # to 2-rank that can be input to a fully-connected / dense layer. model.add(Flatten()) # layer_flat, num_features = flatten_layer(layer_conv2) # print (layer_flat) # print (num_features) # First fully-connected / dense layer with ReLU-activation. model.add(Dense(128, activation='relu')) # layer_fc1 = new_fc_layer(input=layer_flat, # num_inputs=num_features, # num_outputs=fc_size, # use_relu=True) # print (layer_fc1) # Last fully-connected / dense layer with softmax-activation # for use in classification. model.add(Dense(num_classes, activation='softmax')) # layer_fc2 = new_fc_layer(input=layer_fc1, # num_inputs=fc_size, # num_outputs=num_classes, # use_relu=False) # print(layer_fc2) # y_pred = tf.nn.softmax(layer_fc2) # y_pred_cls = tf.argmax(y_pred, axis=1) # cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc2, # labels=y_true) # cost = tf.reduce_mean(cross_entropy) from tensorflow.keras.optimizers import Adam optimizer = Adam(lr=1e-3) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) # optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost) # correct_prediction = tf.equal(y_pred_cls, y_true_cls) # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # session = tf.Session() # session.run(tf.global_variables_initializer()) model.fit(x=data.train.images, y=data.train.labels, epochs=1, batch_size=128) result = model.evaluate(x=data.test.images, y=data.test.labels) print('') for name, value in zip(model.metrics_names, result): print(name, value) print("{0}: {1:.2%}".format(model.metrics_names[1], result[1])) # `save_model` requires h5py model.save(path_model) del model if using_fun_model: # Create an input layer which is similar to a feed_dict in TensorFlow. # Note that the input-shape must be a tuple containing the image-size. inputs = Input(shape=(img_size_flat, )) # Variable used for building the Neural Network. net = inputs # The input is an image as a flattened array with 784 elements. # But the convolutional layers expect images with shape (28, 28, 1) net = Reshape(img_shape_full)(net) # First convolutional layer with ReLU-activation and max-pooling. net = Conv2D(kernel_size=5, strides=1, filters=16, padding='same', activation='relu', name='layer_conv1')(net) net = MaxPooling2D(pool_size=2, strides=2)(net) # Second convolutional layer with ReLU-activation and max-pooling. net = Conv2D(kernel_size=5, strides=1, filters=36, padding='same', activation='relu', name='layer_conv2')(net) net = MaxPooling2D(pool_size=2, strides=2)(net) # Flatten the output of the conv-layer from 4-dim to 2-dim. net = Flatten()(net) # First fully-connected / dense layer with ReLU-activation. net = Dense(128, activation='relu')(net) # Last fully-connected / dense layer with softmax-activation # so it can be used for classification. net = Dense(num_classes, activation='softmax')(net) # Output of the Neural Network. outputs = net from tensorflow.python.keras.models import Model model2 = Model(inputs=inputs, outputs=outputs) model2.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) model2.fit(x=data.train.images, y=data.train.labels, epochs=1, batch_size=128) result = model2.evaluate(x=data.test.images, y=data.test.labels) print('') for name, value in zip(model2.metrics_names, result): print(name, value) print("{0}: {1:.2%}".format(model2.metrics_names[1], result[1])) # `save_model` requires h5py model2.save(path_model) if reload_model: from tensorflow.python.keras.models import load_model model3 = load_model(path_model) #images = data.x_test[0:9] images = data.test.images[0:9] #cls_true = data.y_test_cls[0:9] cls_true = data.test.labels[0:9] y_pred = model3.predict(x=images) cls_pred = np.argmax(y_pred, axis=1) plot_images(images=images, cls_true=cls_true, cls_pred=cls_pred) y_pred = model3.predict(x=data.test.images) cls_pred = np.argmax(y_pred, axis=1) cls_true = data.test.cls correct = (cls_true == cls_pred) plot_example_errors(data, cls_pred=cls_pred, correct=correct) model3.summary() # Attention: the functional and sequential models are different in # layers, for sequential ones: if reading_seq_model: layer_input = model3.layers[0] layer_conv1 = model3.layers[1] print(layer_conv1) layer_conv2 = model3.layers[3] elif reading_fun_model: layer_input = model3.layers[0] layer_conv1 = model3.layers[2] print(layer_conv1) layer_conv2 = model3.layers[4] weights_conv1 = layer_conv1.get_weights()[0] print(weights_conv1.shape) plot_conv_weights(weights=weights_conv1, input_channel=0) weights_conv2 = layer_conv2.get_weights()[0] plot_conv_weights(weights=weights_conv2, input_channel=0) image1 = data.test.images[0] plot_image(image1) # from tensorflow.keras import backend as K # output_conv1 = K.function(inputs=[layer_input.input], # outputs=[layer_conv1.output]) # print(output_conv1) # print(output_conv1([[image1]])) # layer_output1 = output_conv1([[image1]])[0] # print(layer_output1.shape) # plot_conv_output(values=layer_output1) from tensorflow.keras.models import Model output_conv2 = Model(inputs=layer_input.input, outputs=layer_conv2.output) layer_output2 = output_conv2.predict(np.array([image1])) layer_output2.shape plot_conv_output(values=layer_output2)
# Step 2-1: Replace softmax Layer and add one dense layer # include top = false will remove the last softmax layer, remove 1000 neutron from the model base_model = VGG16(weights='imagenet', include_top=False) x = base_model.output x = GlobalAveragePooling2D()(x) x = Dense(1024, activation='relu')(x) prediction = Dense(2, activation='softmax')(x) model = Model(inputs=base_model.input, outputs=prediction) for layer in model.layers: layer.trainable = False model.compile(loss='binary_crossentropy', optimizer=optimizers.SGD(lr=1e-4, momentum=0.9), metrics=['accuracy']) model.fit_generator(train_generator, steps_per_epoch=10, epochs=epochs) # Step 2-2: Unfreeze and train the top 5 layers for layer in model.layers[:5]: layer.trainable = False for layer in model.layers[5:]: layer.trainable = True model.compile(loss='binary_crossentropy', optimizer=optimizers.SGD(lr=1e-4, momentum=0.9), metrics=['accuracy']) model.fit_generator(train_generator, steps_per_epoch=10, epochs=epochs) # Save fine tuned weight model.save('./model/vgg16_cat_dog.h5')
class Pmodel: def __init__(self, fl, mode, hparams): """ Initialises new DNN model based on input features_dim, labels_dim, hparams :param features_dim: Number of input feature nodes. Integer :param labels_dim: Number of output label nodes. Integer :param hparams: Dict containing hyperparameter information. Dict can be created using create_hparams() function. hparams includes: hidden_layers: List containing number of nodes in each hidden layer. [10, 20] means 10 then 20 nodes. """ # self.features_dim = fl.features_c_dim # self.labels_dim = fl.labels_dim # Assuming that each task has only 1 dimensional output self.features_dim = fl.features_c_dim + 1 # 1 for the positional argument self.labels_dim = 1 self.numel = fl.labels.shape[1] + 1 self.hparams = hparams self.mode = mode self.normalise_labels = fl.normalise_labels self.labels_scaler = fl.labels_scaler features_in = Input(shape=(self.features_dim, ), name='main_features_c_input') # Selection of model if mode == 'ann': model = ann(self.features_dim, self.labels_dim, self.hparams) x = model(features_in) self.model = Model(inputs=features_in, outputs=x) elif mode == 'ann2': model_1 = ann(self.features_dim, 50, self.hparams) x = model_1(features_in) model_end = ann(50, 50, self.hparams) end = model_end(x) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) model_2 = ann(50, self.labels_dim - 1, self.hparams) x = model_2(x) self.model = Model(inputs=features_in, outputs=[end_node, x]) elif mode == 'ann3': x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(0))(features_in) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(2))(x) # x = BatchNormalization()(x) x = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_set_19')(x) self.model = Model(inputs=features_in, outputs=x) elif mode == 'conv1': x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='shared' + str(1))(features_in) x = Dense(units=hparams['pre'], activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) #x = BatchNormalization()(x) x = Dense(units=19, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_set_19')(x) #x = BatchNormalization()(x) x = Reshape(target_shape=(19, 1))(x) x = Conv1D(filters=hparams['filters'], kernel_size=3, strides=1, padding='same', activation='relu')(x) #x = BatchNormalization()(x) x = Conv1D(filters=hparams['filters'] * 2, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = Conv1D(filters=hparams['filters'] * 4, kernel_size=3, strides=1, padding='same', activation='relu')(x) #x = Permute((2,1))(x) #x = GlobalAveragePooling1D()(x) x = TimeDistributed(Dense(1, activation='linear'))(x) x = Reshape(target_shape=(19, ))(x) self.model = Model(inputs=features_in, outputs=x) elif mode == 'conv2': x = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(1))(features_in) x = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(2))(x) end = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(1))(x) end = Dense(units=10, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(2))(end) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) x = Dense(units=80, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Reshape(target_shape=(80, 1))(x) x = Conv1D(filters=8, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = MaxPooling1D(pool_size=2)(x) x = Conv1D(filters=16, kernel_size=3, strides=1, padding='same', activation='relu')(x) x = MaxPooling1D(pool_size=2)(x) #x = Permute((2,1))(x) #x = GlobalAveragePooling1D()(x) x = TimeDistributed(Dense(1, activation='linear'))(x) x = Reshape(target_shape=(20, ))(x) self.model = Model(inputs=features_in, outputs=[end_node, x]) elif mode == 'lstm': x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(1))(features_in) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Shared_e_' + str(2))(x) end = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(1))(x) end = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Dense_e_' + str(2))(end) end_node = Dense(units=1, activation='linear', kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='output_layer')(end) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(1))(x) x = Dense(units=20, activation=hparams['activation'], kernel_regularizer=regularizers.l1_l2( l1=hparams['reg_l1'], l2=hparams['reg_l2']), name='Pre_' + str(2))(x) x = RepeatVector(n=20)(x) x = LSTM(units=30, activation='relu', return_sequences=True)(x) x = LSTM(units=30, activation='relu', return_sequences=True)(x) x = TimeDistributed(Dense(1))(x) x = Reshape(target_shape=(20, ))(x) ''' x = Permute((2,1))(x) x = GlobalAveragePooling1D()(x) ''' self.model = Model(inputs=features_in, outputs=[end_node, x]) optimizer = Adam(clipnorm=1) self.model.compile(optimizer=optimizer, loss='mean_squared_error') #self.model.summary() def train_model(self, fl, i_fl, save_name='mt.h5', save_dir='./save/models/', save_mode=False, plot_name=None): # Training model training_features = fl.features_c_norm val_features = i_fl.features_c_norm if self.normalise_labels: training_labels = fl.labels_norm val_labels = i_fl.labels_norm else: training_labels = fl.labels val_labels = i_fl.labels p_features = [] for features in training_features.tolist(): for idx in list(range(1, self.numel)): p_features.append(features + [idx]) training_features = np.array(p_features) training_labels = training_labels.flatten()[:, None] # Plotting if plot_name: p_features = [] for features in val_features.tolist(): for idx in list(range(1, self.numel)): p_features.append(features + [idx]) val_features = np.array(p_features) val_labels = val_labels.flatten()[:, None] history = self.model.fit(training_features, training_labels, validation_data=(val_features, val_labels), epochs=self.hparams['epochs'], batch_size=self.hparams['batch_size'], verbose=self.hparams['verbose']) # Debugging check to see features and prediction # pprint.pprint(training_features) # pprint.pprint(self.model.predict(training_features)) # pprint.pprint(training_labels) # summarize history for accuracy plt.semilogy(history.history['loss'], label=['train']) plt.semilogy(history.history['val_loss'], label=['test']) plt.plot([], [], ' ', label='Final train: {:.3e}'.format( history.history['loss'][-1])) plt.plot([], [], ' ', label='Final val: {:.3e}'.format( history.history['val_loss'][-1])) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(loc='upper right') plt.savefig(plot_name, bbox_inches='tight') plt.close() else: history = self.model.fit(training_features, training_labels, epochs=self.hparams['epochs'], batch_size=self.hparams['batch_size'], verbose=self.hparams['verbose']) # Saving Model if save_mode: self.model.save(save_dir + save_name) return self.model, history def eval(self, eval_fl): eval_features = eval_fl.features_c_norm predictions = [] for features in eval_features.tolist(): single_expt = [] for idx in list(range(1, self.numel)): single_expt.append( self.model.predict(np.array(features + [idx])[None, ...])[0][0]) predictions.append(single_expt) predictions = np.array(predictions) if self.normalise_labels: mse_norm = mean_squared_error(eval_fl.labels_norm, predictions) mse = mean_squared_error( eval_fl.labels, self.labels_scaler.inverse_transform(predictions)) else: mse = mean_squared_error(eval_fl.labels, predictions) mse_norm = mse return predictions, mse, mse_norm
class JointBertModel(NLUModel): def __init__(self, slots_num, intents_num, sess, num_bert_fine_tune_layers=12): self.slots_num = slots_num self.intents_num = intents_num self.num_bert_fine_tune_layers = num_bert_fine_tune_layers self.model_params = { 'slots_num': slots_num, 'intents_num': intents_num, 'num_bert_fine_tune_layers': num_bert_fine_tune_layers } self.build_model() self.compile_model() self.initialize_vars(sess) def build_model(self): in_id = Input(shape=(None, ), name='input_ids') in_mask = Input(shape=(None, ), name='input_masks') in_segment = Input(shape=(None, ), name='segment_ids') in_valid_positions = Input(shape=(None, self.slots_num), name='valid_positions') bert_inputs = [in_id, in_mask, in_segment, in_valid_positions] # the output of trained Bert bert_pooled_output, bert_sequence_output = BertLayer( n_fine_tune_layer=self.num_bert_fine_tune_layers, name='BertLayer')(bert_inputs) # add the additional layer for intent classification and slot filling intents_drop = Dropout(rate=0.1)(bert_pooled_output) intents_fc = Dense(self.intents_num, activation='softmax', name='intent_classifier')(intents_drop) slots_drop = Dropout(rate=0.1)(bert_sequence_output) slots_output = TimeDistributed( Dense(self.slots_num, activation='softmax'))(slots_drop) slots_output = Multiply(name='slots_tagger')( [slots_output, in_valid_positions]) self.model = Model(inputs=bert_inputs, outputs=[slots_output, intents_fc]) def compile_model(self): optimizer = tf.keras.optimizers.Adam(lr=5e-5) # if the targets are one-hot labels, using 'categorical_crossentropy'; while if targets are integers, using 'sparse_categorical_crossentropy' losses = { 'slots_tagger': 'sparse_categorical_crossentropy', 'intent_classifier': 'sparse_categorical_crossentropy' } ## loss_weights: to weight the loss contributions of different model outputs. loss_weights = {'slots_tagger': 3.0, 'intent_classifier': 1.0} metrics = {'intent_classifier': 'acc'} self.model.compile(optimizer=optimizer, loss=losses, loss_weights=loss_weights, metrics=metrics) self.model.summary() def fit(self, X, Y, validation_data=None, epochs=5, batch_size=32): X = (X[0], X[1], X[2], self.prepare_valid_positions(X[3])) if validation_data is not None: X_val, Y_val = validation_data validation_data = ((X_val[0], X_val[1], X_val[2], self.prepare_valid_positions(X_val[3])), Y_val) history = self.model.fit(X, Y, validation_data=validation_data, epochs=epochs, batch_size=batch_size) self.visualize_metric(history.history, 'slots_tagger_loss') self.visualize_metric(history.history, 'intent_classifier_loss') self.visualize_metric(history.history, 'loss') self.visualize_metric(history.history, 'intent_classifier_acc') def prepare_valid_positions(self, in_valid_positions): ## the input is 2-D in_valid_position in_valid_positions = np.expand_dims( in_valid_positions, axis=2) ## expand the shape of the array to axis=2 ## 3-D in_valid_position in_valid_positions = np.tile(in_valid_positions, (1, 1, self.slots_num)) ## return in_valid_positions def predict_slots_intent(self, x, slots_vectorizer, intent_vectorizer, remove_start_end=True): valid_positions = x[3] x = (x[0], x[1], x[2], self.prepare_valid_positions(valid_positions)) y_slots, y_intent = self.predict(x) ### get the real slot-tags using 'inverse_transform' of slots-vectorizer slots = slots_vectorizer.inverse_transform(y_slots, valid_positions) if remove_start_end: ## remove the first '[CLS]' and the last '[SEP]' tokens. slots = np.array([x[1:-1] for x in slots]) ### get the real intents using 'inverse-transform' of intents-vectorizer intents = np.array([ intent_vectorizer.inverse_transform([np.argmax(y_intent[i])])[0] for i in range(y_intent.shape[0]) ]) return slots, intents def initialize_vars(self, sess): sess.run(tf.compat.v1.local_variables_initializer()) sess.run(tf.compat.v1.global_variables_initializer()) K.set_session(sess) def save(self, model_path): with open(os.path.join(model_path, 'params.json'), 'w') as json_file: json.dump(self.model_params, json_file) self.model.save(os.path.join(model_path, 'joint_bert_model.h5')) def load(load_folder_path, sess): with open(os.path.join(load_folder_path, 'params.json'), 'r') as json_file: model_params = json.load(json_file) slots_num = model_params['slots_num'] intents_num = model_params['intents_num'] num_bert_fine_tune_layers = model_params['num_bert_fine_tune_layers'] new_model = JointBertModel(slots_num, intents_num, sess, num_bert_fine_tune_layers) new_model.model.load_weights( os.path.join(load_folder_path, 'joint_bert_model.h5')) return new_model
#Fully connected layer 1 fc1 = tf.keras.layers.Dense(100, activation='relu', name="AddedDense1")(x) # Use softmax output_layer = Dense(NUM_CLASSES, activation='softmax', name='softmax')(fc1) net_final = Model(inputs=net.input, outputs=output_layer) for layer in net_final.layers[:FREEZE_LAYERS]: layer.trainable = False for layer in net_final.layers[FREEZE_LAYERS:]: layer.trainable = True # Use Adam optimizer net_final.compile(optimizer=Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['accuracy']) # Whole network print(net_final.summary()) # Training model net_final.fit_generator(train_batches, steps_per_epoch=train_batches.samples // BATCH_SIZE, validation_data=valid_batches, validation_steps=valid_batches.samples // BATCH_SIZE, epochs=NUM_EPOCHS, callbacks=callbacks) net_final.save(WEIGHTS_FINAL)
target_size=(150, 150), class_mode='binary') test_data_gen = test_image_generator.flow_from_directory(batch_size=16, directory=test_dir, target_size=(150, 150), class_mode='binary') # 모델 학습 history = new_model.fit(train_data_gen, epochs=5, validation_data=test_data_gen) new_model.save("newVGG16") # 최종 결과 리포트 acc = history.history['accuracy'] val_acc = history.history['val_accuracy'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(len(acc)) from matplotlib import pyplot as plt plt.plot(epochs, acc, 'r', label='Training acc') plt.plot(epochs, val_acc, 'b', label='testing acc') plt.title('Training and testing accuracy') plt.legend() plt.figure()
def _save(self, model: Model) -> None: save_path = Path(self._get_save_path()) save_path.parent.mkdir(parents=True, exist_ok=True) model.save(str(save_path))
def plot_example_errors(cls_pred): incorrect = (cls_pred != data.test.cls) images = data.test.images[incorrect] cls_pred = cls_pred[incorrect] cls_true = data.test.cls[incorrect] plot_images(images=images[0:9], cls_true=cls_true[0:9], cls_pred=cls_pred[0:9]) plot_example_errors(cls_pred) path_model = 'model.keras' model.save(path_model) del model model_2 = load_model(path_model) # Prediction images = data.test.images[0:9] cls_true = data.test.cls[0:9] y_pred = model_2.predict(x=images) cls_pred = np.argmax(y_pred, axis=1) plot_images(images=images, cls_true=cls_true, cls_pred=cls_pred) model_2.summary() layer_input = model_2.layers[0]
dec_dense = Dense(spa_vocab_size, activation='softmax') pred = dec_dense(dec_output) # compile and fit model = Model(inputs=[enc_inp, dec_inp], outputs=pred) model.compile(optimizer=Adam(0.005), loss='categorical_crossentropy', metrics=['accuracy']) model.fit([enc_sequence_inps, dec_sequence_inps], dec_sequence_outputs, batch_size=128, epochs=100) # save model model.save('s2s.hd5') ################################################################################ ################################################################################ ################################################################################ # retrieve model model = load_model('s2s.hd5') encoder_inputs = model.input[0] # input_1 _, encoder_states = model.layers[4].output # gru_1 encoder_model = Model(encoder_inputs, encoder_states) decoder_inputs = model.input[1] # input_2 decoder_one_hot = model.layers[3](decoder_inputs) decoder_states_inputs = [Input(shape=(256, ), name='input_3')] decoder_gru = model.layers[5]
def fit(self, learning_rate=1e-4, epochs=5, activation='relu', dropout=0, hidden_size=1024, nb_layers=1, include_class_weight=False, batch_size=20, save_model=False, verbose=True, fine_tuning=False, NB_IV3_LAYERS_TO_FREEZE=279, use_TPU=False, transfer_model='Inception', min_accuracy=None, extract_SavedModel=False): #read the tfrecords data TRAIN_DATA = tf.data.TFRecordDataset(['train.tfrecord']) VAL_DATA = tf.data.TFRecordDataset(['val.tfrecord']) print('Read the TFrecords') if transfer_model in ['Inception', 'Xception', 'Inception_Resnet']: target_size = (299, 299) else: target_size = (224, 224) #We expect the classes to be the name of the folders in the training set self.categories = os.listdir(TRAIN_DIR) """ helper functions to load tfrecords. Strongly inspired by https://colab.research.google.com/github/GoogleCloudPlatform/training-data-analyst/blob/master/courses/fast-and-lean-data-science/07_Keras_Flowers_TPU_playground.ipynb#scrollTo=LtAVr-4CP1rp """ def read_tfrecord(example): features = { "image": tf.FixedLenFeature( (), tf.string), # tf.string means byte string "label": tf.FixedLenFeature((), tf.int64) } example = tf.parse_single_example(example, features) image = tf.image.decode_jpeg(example['image']) image = tf.cast( image, tf.float32) / 255.0 # convert image to floats in [0, 1] range image = tf.image.resize_images( image, size=[*target_size], method=tf.image.ResizeMethod.BILINEAR) feature = tf.reshape(image, [*target_size, 3]) label = tf.cast(example['label'], tf.int32) # byte string target = tf.one_hot(label, len(self.categories)) return feature, target def get_training_dataset(): dataset = TRAIN_DATA.map(read_tfrecord) dataset = dataset.cache() dataset = dataset.repeat() dataset = dataset.shuffle(1000) dataset = dataset.batch( batch_size, drop_remainder=True) # drop_remainder needed on TPU dataset = dataset.prefetch( -1 ) # prefetch next batch while training (-1: autotune prefetch buffer size) return dataset def get_validation_dataset(): dataset = VAL_DATA.map(read_tfrecord) dataset = dataset.cache() dataset = dataset.repeat() dataset = dataset.shuffle(1000) dataset = dataset.batch( batch_size, drop_remainder=True) # drop_remainder needed on TPU dataset = dataset.prefetch( -1 ) # prefetch next batch while training (-1: autotune prefetch buffer size) return dataset #if we want stop training when no sufficient improvement in accuracy has been achieved if min_accuracy is not None: callback = EarlyStopping(monitor='categorical_accuracy', baseline=min_accuracy) callback = [callback] else: callback = None #load the pretrained model, without the classification (top) layers if transfer_model == 'Xception': base_model = Xception(weights='imagenet', include_top=False, input_shape=(299, 299, 3)) elif transfer_model == 'Inception_Resnet': base_model = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(299, 299, 3)) elif transfer_model == 'Resnet': base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) else: base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3)) #Add the classification layers using Keras functional API x = base_model.output x = GlobalAveragePooling2D()(x) for _ in range(nb_layers): x = Dense(hidden_size, activation=activation)( x) #Hidden layer for classification if dropout > 0: x = Dropout(rate=dropout)(x) predictions = Dense(len(self.categories), activation='softmax')(x) #Output layer model = Model(inputs=base_model.input, outputs=predictions) #Set only the top layers as trainable (if we want to do fine-tuning, #we can train the base layers as a second step) for layer in base_model.layers: layer.trainable = False #Define the optimizer and the loss, and compile the model loss = 'categorical_crossentropy' if use_TPU: #if we want to try out the TPU, it looks like we currently need to use #tensorflow optimizers...see https://stackoverflow.com/questions/52940552/valueerror-operation-utpu-140462710602256-varisinitializedop-has-been-marked #...and https://www.youtube.com/watch?v=jgNwywYcH4w optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) tpu_optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer) model.compile(optimizer=tpu_optimizer, loss=loss, metrics=['categorical_accuracy']) TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR'] model = tf.contrib.tpu.keras_to_tpu_model( model, strategy=tf.contrib.tpu.TPUDistributionStrategy( tf.contrib.cluster_resolver.TPUClusterResolver( TPU_WORKER))) tf.logging.set_verbosity(tf.logging.INFO) else: optimizer = Adam(lr=learning_rate) model.compile(optimizer=optimizer, loss=loss, metrics=['categorical_accuracy']) #if we want to weight the classes given the imbalanced number of images if include_class_weight: from sklearn.utils.class_weight import compute_class_weight cls_train = self.categories class_weight = compute_class_weight(class_weight='balanced', classes=np.unique(cls_train), y=cls_train) else: class_weight = None steps_per_epoch = int( sum([ len(files) for r, d, files in os.walk(parentdir + '/data/image_dataset/train') ]) / batch_size) validation_steps = int( sum([ len(files) for r, d, files in os.walk(parentdir + '/data/image_dataset/val') ]) / batch_size) #Fit the model if use_TPU: history = model.fit(get_training_dataset, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=get_validation_dataset, validation_steps=validation_steps, verbose=verbose, callbacks=callback, class_weight=class_weight) else: history = model.fit(get_training_dataset(), steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=get_validation_dataset(), validation_steps=validation_steps, verbose=verbose, callbacks=callback, class_weight=class_weight) #Fine-tune the model, if we wish so if fine_tuning and not model.stop_training: print('============') print('Begin fine-tuning') print('============') #declare the first layers as trainable for layer in model.layers[:NB_IV3_LAYERS_TO_FREEZE]: layer.trainable = False for layer in model.layers[NB_IV3_LAYERS_TO_FREEZE:]: layer.trainable = True model.compile(optimizer=Adam(lr=learning_rate * 0.1), loss=loss, metrics=['categorical_accuracy']) #Fit the model if use_TPU: history = model.fit(get_training_dataset, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=get_validation_dataset, validation_steps=validation_steps, verbose=verbose, callbacks=callback, class_weight=class_weight) else: history = model.fit(get_training_dataset(), steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=get_validation_dataset(), validation_steps=validation_steps, verbose=verbose, callbacks=callback, class_weight=class_weight) #Evaluate the model, just to be sure self.fitness = history.history['val_categorical_accuracy'][-1] #Save the model if save_model: if not os.path.exists(parentdir + '/data/trained_models'): os.makedirs(parentdir + '/data/trained_models') model.save(parentdir + '/data/trained_models/trained_model.h5') print('Model saved!') #save model in production format if extract_SavedModel: export_path = "./image_classifier/1/" with K.get_session() as sess: tf.saved_model.simple_save( sess, export_path, inputs={'input_image': model.input}, outputs={t.name: t for t in model.outputs}) else: self.model = model del history del model
class ConvMnist: def __init__(self, filename=None): ''' 学習済みモデルファイルをロードする (optional) ''' self.model = None if filename is not None: print('load model: ', filename) self.model = load_model(filename) self.model.summary() def preprocess_input(x, **kwargs): ''' 画像前処理(ここでは何もしない) ''' # x = 255 - x return x.astype(np.float32) def train(self): ''' 学習する ''' # Convolutionモデルの作成 input = Input(shape=(MODEL_WIDTH, MODEL_HEIGHT, 1)) conv1 = Conv2D(filters=8, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu')(input) pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) conv2 = Conv2D(filters=4, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu')(pool1) dropout1 = Dropout(0.2)(conv2) flatten1 = Flatten()(dropout1) output = Dense(units=10, activation='softmax')(flatten1) self.model = Model(inputs=[input], outputs=[output]) self.model.summary() self.model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) ## fit_generatorを使用する場合。windowsだと遅い(画像読み込みをうまく並列化できない) # データセットをディレクトリ内画像から読み込む用意 idg_train = ImageDataGenerator( validation_split=0.2, rescale=1 / 255., # preprocessing_function=preprocess_input ) # training用データのgenerator(全学習画像の内80%) img_itr_train = idg_train.flow_from_directory( 'mnist_images/train', subset="training", color_mode="grayscale", target_size=(MODEL_WIDTH, MODEL_HEIGHT), batch_size=BATCH_SIZE, class_mode='categorical', ) # validation用データのgenerator(全学習画像の内20%) img_itr_validation = idg_train.flow_from_directory( 'mnist_images/train', subset="validation", color_mode="grayscale", target_size=(MODEL_WIDTH, MODEL_HEIGHT), batch_size=BATCH_SIZE, class_mode='categorical', ) # Convolutionモデルの学習 self.model.fit_generator( img_itr_train, steps_per_epoch=math.ceil(img_itr_train.samples / BATCH_SIZE), epochs=EPOCH_NUM, validation_data=img_itr_validation, validation_steps=math.ceil(img_itr_validation.samples / BATCH_SIZE), ) # テスト用データで評価する idg_test = ImageDataGenerator( rescale=1 / 255., # preprocessing_function=preprocess_input ) img_itr_test = idg_test.flow_from_directory('mnist_images/test', color_mode="grayscale", target_size=(MODEL_WIDTH, MODEL_HEIGHT), batch_size=BATCH_SIZE, class_mode=None, shuffle=False) # 識別処理実施 probs = self.model.predict_generator( img_itr_test, steps=math.ceil(img_itr_test.samples / BATCH_SIZE)) # 識別精度を計算 predictions = np.argmax(probs, axis=1) print("score: " + str(1.0 * np.sum(predictions == img_itr_test.classes) / img_itr_test.n)) def save_trained_model(self, filename): ''' 学習済みモデルをファイル(h5)に保存する ''' self.model.save(filename) def predict(self, input_image): ''' 1つのグレースケール入力画像(28x28のndarray)に対して、数字(0~9)を判定する ret: result, score ''' if input_image.shape != (MODEL_WIDTH, MODEL_HEIGHT): return -1, -1 input_image = input_image.reshape(1, input_image.shape[0], input_image.shape[1], 1) input_image = input_image / 255. probs = self.model.predict(input_image) result = np.argmax(probs[0]) return result, probs[0][result]
LAYER = "fc2" headModel = basemodel.get_layer(LAYER).output headModel = Dense(50, activation="softmax")(headModel) model = Model(inputs=basemodel.input, outputs=headModel) opt = optimizers.SGD(lr=1e-3, momentum=0.9) model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]) for layer in basemodel.layers: layer.trainable = False train_generator = DataGenerator(indexes=train, **generator_parameters) val_generator = DataGenerator(indexes=test, **generator_parameters) model.fit_generator(generator=train_generator, validation_data=val_generator, epochs=8) model.save("%s/VGG16_%s.h5" % (dump_folder, fold_id)) generator_parameters["shuffle"] = False val_generator = DataGenerator(indexes=test, **generator_parameters) y_pred_raw = model.predict_generator(val_generator) y_pred = np.argmax(y_pred_raw, axis=-1) y_true = np.argmax(y[test], axis=-1) model_predictions = ({ "path": np.array(metadata_df.path.values.tolist())[test], "y_true": y_true, "y_pred": y_pred, "y_pred_raw": y_pred_raw.tolist()