def load_model (args): if args.model == 'inception': model = InceptionV3(include_top=True, weights='imagenet') preprocess_mode='tf' elif args.model == 'xception': model = Xception(include_top=True, weights='imagenet') preprocess_mode='tf' elif args.model == 'inceptionresnet': model = InceptionResNetV2(include_top=True, weights='imagenet') preprocess_mode='tf' elif args.model == 'mobilenet': model = MobileNet(include_top=True, weights='imagenet') preprocess_mode='tf' elif args.model == 'mobilenet2': model = MobileNetV2(include_top=True, weights='imagenet') preprocess_mode='tf' elif args.model == 'nasnet': model = NASNetLarge(include_top=True, weights='imagenet') preprocess_mode='tf' elif args.model == 'resnet': model = ResNet50(include_top=True, weights='imagenet') preprocess_mode='caffe' elif args.model == 'vgg16': model = VGG16(include_top=True, weights='imagenet') preprocess_mode='caffe' elif args.model == 'vgg19': model = VGG19(include_top=True, weights='imagenet') preprocess_mode='caffe' else: print ("Model not found") return model,preprocess_mode
def extract_features(directory): base_model = InceptionV3(include_top=True, weights=None) weights_path = 'data/image_net.h5' base_model.load_weights(weights_path) new_input = base_model.layers[0].input hidden_layer = base_model.get_layer('avg_pool').output image_model = Model(new_input, hidden_layer) img_id = [] img_matrices = [] for img_file in os.listdir(directory): img_path = directory + '/' + img_file img = image.load_img(img_path, target_size=(299, 299)) x = image.img_to_array(img) x = preprocess_input(x) img_id.append(os.path.splitext(img_file)[0]) img_matrices.append(x) img_matrices = np.array(img_matrices) assert (len(img_matrices.shape) == 4) img_features = image_model.predict(img_matrices, verbose=1) return {'ids': img_id, 'features': img_features}
def image_dense_lstm(): base_model = InceptionV3(include_top=True, weights=None) weights_path = 'data/image_net.h5' # image_model = tf.keras.applications.InceptionV3(include_top=False, # weights='imagenet') base_model.load_weights(weights_path) # print(base_model.output_shape) for layer in base_model.layers[:312]: layer.trainable = False new_input = base_model.layers[0].input hidden_layer = base_model.get_layer('avg_pool').output image_model = Model(new_input, hidden_layer) EncoderDense = Dense(units, use_bias = False, name = 'dense_img') BatchNormLayer = BatchNormalization(name = 'batch_normalization_img') LSTMLayer = LSTM(units, return_state = True, name = 'lstm') inputs = Input(shape=(299,299,3)) X_img = image_model(inputs) X_img = EncoderDense(X_img) X_img = BatchNormLayer(X_img) X_img = Lambda(lambda x : K.expand_dims(x, axis=1))(X_img) a0 = Input(shape=(units,)) c0 = Input(shape=(units,)) a, _, c = LSTMLayer(X_img, initial_state=[a0, c0]) return Model(inputs=[inputs, a0, c0], outputs=[a, c])
def extract_inception(): model = InceptionV3(weights='imagenet', include_top=False) print(model.summary()) X_dirname = '../../411a3/train' Y_filename = '../../411a3/train.csv' X_filelist = image.list_pictures(X_dirname) Y_list = np.loadtxt(Y_filename, dtype='str', delimiter=',')[1:] X_inception = np.zeros((train_size, 2048, 8, 8)) y_inception = Y_list[:, 1].astype('int64').reshape(-1, 1) - 1 for i in range(train_size): img = image.load_img(X_filelist[i], target_size=target_size) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) inception = model.predict(x) X_inception[i, :, :, :] = inception print('Read image: ' + X_filelist[i]) # shuffle inputs and targets rnd_idx = np.arange(X_inception.shape[0]) np.random.shuffle(rnd_idx) X_train = X_inception[rnd_idx] y_train = y_inception[rnd_idx] return X_train, y_train
def inception_finetune_UCF(): base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=IMSIZE) print('Inception_v3 loaded') # freeze the top layers # for layer in base_model.layers[:172]: # layer.trainable = False for layer in base_model.layers: layer.trainable = False x = base_model.output # x = Flatten()(x) x = GlobalAveragePooling2D()(x) # x = Dense(256, activation='relu')(x) # x = Dropout(0.5)(x) # x = Dense(256, activation='relu')(x) predictions = Dense(N_CLASSES, activation='softmax')(x) model = Model(inputs=base_model.input, outputs=predictions) sgd = SGD(lr=0.001, decay=1e-6, momentum=0.5) model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) print(model.summary()) data_dir = '/home/changan/ActionRocognition_rnn/data' list_dir = os.path.join(data_dir, 'ucfTrainTestlist') video_dir = os.path.join(data_dir, 'UCF-Preprocessed') train_data, test_data, class_index = get_data_list(list_dir, video_dir) print('Train data size: ', len(train_data)) print('Test data size: ', len(test_data)) train_generator = video_image_generator(train_data, batch_size, seq_len=SequenceLength, img_size=IMSIZE, num_classes=101) test_generator = video_image_generator(test_data, batch_size, seq_len=SequenceLength, img_size=IMSIZE, num_classes=101) weights_dir = 'inception_finetune.h5' if os.path.exists(weights_dir): model.load_weights(weights_dir) print('weights loaded') checkpointer = ModelCheckpoint(weights_dir, save_weights_only=True) model.fit_generator(train_generator, steps_per_epoch=30, epochs=200, validation_data=test_generator, validation_steps=100, verbose=2, callbacks=[checkpointer])
def Run(self, img_path, model_name): # config variables weights = 'imagenet' include_top = 0 train_path = 'jpg' classfier_file = 'output/flowers_17/' + model_name + '/classifier.cpickle' # create the pretrained models # check for pretrained weight usage or not # check for top layers to be included or not if model_name == "vgg16": from vgg16 import VGG16, preprocess_input base_model = VGG16(weights=weights) model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output) image_size = (224, 224) elif model_name == "vgg19": from vgg19 import VGG19, preprocess_input base_model = VGG19(weights=weights) model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output) image_size = (224, 224) elif model_name == "resnet50": from resnet50 import ResNet50, preprocess_input base_model = ResNet50(weights=weights) model = Model(inputs=base_model.input, outputs=base_model.get_layer('avg_pool').output) image_size = (224, 224) elif model_name == "inceptionv3": from inception_v3 import InceptionV3, preprocess_input base_model = InceptionV3(weights=weights) model = Model(inputs=base_model.input, outputs=base_model.get_layer('mixed9').output) image_size = (299, 299) elif model_name == "xception": from xception import Xception, preprocess_input base_model = Xception(weights=weights) model = Model(inputs=base_model.input, outputs=base_model.get_layer('avg_pool').output) image_size = (299, 299) else: base_model = None img = image.load_img(img_path, target_size=image_size) img_array = image.img_to_array(img) img_array = np.expand_dims(img_array, axis=0) img_array = preprocess_input(img_array) feature = model.predict(img_array) feature = feature.flatten() with open(classfier_file, 'rb') as f: model2 = pickle.load(f) pred = model2.predict(feature) prob = model2.predict_proba(np.atleast_2d(feature))[0] return pred, prob[0]
def build_model(self): self.build_input() sess = K.get_session() tf.train.start_queue_runners(sess) image_input = Input(tensor=self.images) seq_input = Input(tensor=self.input_seqs) if self.config.is_training(): seq_targets = Input(tensor=self.target_seqs) input_masks = Input(tensor=self.input_mask) # Embed the vocabulary dimension to 512 dimensions # seq_embeddings' shape: (batch_size, 1, 512) seq_embeddings = Embedding(input_dim=self.config.vocab_size, output_dim=self.config.embedding_size, mask_zero=True)(seq_input) vision_model = InceptionV3(include_top=True, input_tensor=image_input) for layer in vision_model.layers: layer.trainable = False image_embedding = Dense(self.config.embedding_size, activation=None, kernel_initializer='random_uniform', bias_initializer='zeros')(vision_model.outputs[0]) init_lstm_input = Reshape(target_shape=(1, self.config.embedding_size), name='reshape')(image_embedding) # TODO There is a bug when LSTM is initialized with `dropout`/'recurrent_dropout' # If error raised please check the following issue link: # `https://github.com/keras-team/keras/issues/8407#issuecomment-361901801` lstm_cell = LSTM(self.config.num_lstm_units, unroll=False, return_sequences=True, dropout=0.4, recurrent_dropout=0.4, return_state=True) initial_state_from_image = lstm_cell(init_lstm_input) lstm_output, h_state, c_state = initial_state_from_image if self.config.is_training(): training_output = lstm_cell(seq_embeddings, initial_state=[h_state, c_state])[0] else: inception_part_model = Model(inputs=[image_input], outputs=[h_state, c_state]) self.inception_part_model = inception_part_model step_h_state = Input(batch_shape=(None, 512,), name='lstm_h_state') step_c_state = Input(batch_shape=(None, 512,), name='lstm_c_state') training_output, next_h_state, next_c_state = lstm_cell(seq_embeddings, initial_state=[step_h_state, step_c_state]) masked_training_output = Dense(self.config.vocab_size, activation=None, kernel_initializer='random_uniform', bias_initializer='zeros')(training_output) if self.config.is_training(): loss_out = Lambda(softmax_sparse_loss, name='softmax_loss')([masked_training_output, seq_targets, input_masks]) model = Model(inputs=[seq_input, seq_targets, input_masks, image_input], outputs=[loss_out]) model.summary() model.compile(loss={'softmax_loss': lambda y_true, y_pred: y_pred}, optimizer='adam') self.keras_model = model else: lstm_part_model = Model(inputs=[seq_input, step_h_state, step_c_state], outputs=[masked_training_output, next_h_state, next_c_state]) self.lstm_part_model = lstm_part_model
def main(): batch_size = 32 num_classes = 4 epochs = 100 save_dir = os.path.join(os.getcwd(), 'saved_models') model_name = 'orientation-inception.h5' data_train, data_test = load_data() # Use Google Inception v3 model model = InceptionV3( include_top=False, weights=None, input_shape=(192, 192, 3), pooling='softmax', classes=4, ) # initiate RMSprop optimizer opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) # Let's train the model using RMSprop model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) if not os.path.isdir(save_dir): os.makedirs(save_dir) checkpointer = ModelCheckpoint( filepath=os.path.join(save_dir, 'checkpoint.h5'), verbose=1, save_best_only=True, ) early_stopping = EarlyStopping(monitor='val_loss', patience=2) train_generator = DataGenerator(data_train) val_generator = DataGenerator(data_test) model.fit_generator( train_generator.flow(batch_size=batch_size), epochs=epochs, validation_data=val_generator.flow(batch_size=batch_size), shuffle=True, callbacks=[checkpointer, early_stopping], ) # Save model and weights model_path = os.path.join(save_dir, model_name) model.save(model_path) print('Saved trained model at %s' % model_path) # Score trained model. scores = model.evaluate_generator( val_generator.flow(batch_size=batch_size)) print('Test loss:', scores[0]) print('Test accuracy:', scores[1])
def greedy_inference_model(vocab_size, max_len): base_model = InceptionV3(include_top=True, weights=None) weights_path = 'data/image_net.h5' # image_model = tf.keras.applications.InceptionV3(include_top=False, # weights='imagenet') base_model.load_weights(weights_path) # print(base_model.output_shape) for layer in base_model.layers[:312]: layer.trainable = False new_input = base_model.layers[0].input hidden_layer = base_model.get_layer('avg_pool').output image_model = Model(new_input, hidden_layer) EncoderDense = Dense(img_embedding_size, use_bias=False, name = 'dense_img') EmbeddingLayer = Embedding(vocab_size, units, mask_zero = True, name = 'emb_text') LSTMLayer = LSTM(units, return_state = True, name = 'lstm') SoftmaxLayer = Dense(vocab_size, activation='softmax', name = 'time_distributed_softmax') BatchNormLayer = BatchNormalization(name='batch_normalization_img') # Image embedding image_input = Input(shape=(299,299,3)) X_img = image_model(image_input) X_img = EncoderDense(X_img) senti_input = Input(shape=(1,)) X_img = Concatenate(axis=-1)([senti_input, X_img]) X_img = BatchNormLayer(X_img) X_img = Lambda(lambda x : K.expand_dims(x, axis=1))(X_img) # Text embedding seq_input = Input(shape=(1,)) X_text = EmbeddingLayer(seq_input) # Initial States a0 = Input(shape=(units,)) c0 = Input(shape=(units,)) a, _, c = LSTMLayer(X_img, initial_state=[a0, c0]) x = X_text outputs = [] for i in range(max_len): a, _, c = LSTMLayer(x, initial_state=[a, c]) output = SoftmaxLayer(a) outputs.append(output) x = Lambda(lambda x : K.expand_dims(K.argmax(x)))(output) x = EmbeddingLayer(x) return Model(inputs=[image_input,seq_input,senti_input, a0, c0], outputs=outputs, name='NIC_greedy_inference_v2')
def build_lstm_part_model(self): self.build_input() image_input = Input(tensor=self.images) seq_input = Input(tensor=self.input_seqs) step_h_state = Input(batch_shape=( None, 512, )) step_c_state = Input(batch_shape=( None, 512, )) # Embed the vocabulary dimension to 512 dimensions seq_embeddings = Embedding(input_dim=self.config.vocab_size, output_dim=self.config.embedding_size, mask_zero=True)(seq_input) raw_vision_model = InceptionV3(include_top=True, input_tensor=image_input) image_embedding = Dense(self.config.embedding_size, activation=None, kernel_initializer='random_uniform', bias_initializer='zeros')( raw_vision_model.outputs[0]) lstm_cell = LSTM(self.config.num_lstm_units, unroll=False, return_sequences=True, dropout=0.4, recurrent_dropout=0.4, return_state=True) training_output, next_h_state, next_c_state = lstm_cell(seq_embeddings) ### NOTICE: # Softmax activation is included here but not in model.py # because logits is needed in loss function. masked_training_output = Dense( self.config.vocab_size, activation='softmax', kernel_initializer='random_uniform', bias_initializer='zeros')(training_output) lstm_part_model = Model(inputs=[seq_input], outputs=[masked_training_output]) lstm_part_model.load_weights('./keras_weight/weights_full.h5', by_name=True) self.lstm_part_model = lstm_part_model
def model(vocab_size, max_len, reg): # Image embedding base_model = InceptionV3(include_top=True, weights=None) weights_path = 'data/image_net.h5' # image_model = tf.keras.applications.InceptionV3(include_top=False, # weights='imagenet') base_model.load_weights(weights_path) # print(base_model.output_shape) for layer in base_model.layers[:312]: layer.trainable = False new_input = base_model.layers[0].input hidden_layer = base_model.get_layer('avg_pool').output image_model = Model(new_input, hidden_layer) image_input = Input(shape=(299,299,3)) X_img= image_model(image_input) X_img = Dropout(0.5)(X_img) X_img = Dense(img_embedding_size, use_bias = False, kernel_regularizer=regularizers.l2(reg), name = 'dense_img')(X_img) senti_input = Input(shape=(1,)) X_img = Concatenate(axis=-1)([senti_input, X_img]) X_img = BatchNormalization(name='batch_normalization_img')(X_img) X_img = Lambda(lambda x : K.expand_dims(x, axis=1))(X_img) # Text embedding seq_input = Input(shape=(max_len,)) X_text = Embedding(vocab_size, units, mask_zero = True, name = 'emb_text')(seq_input) X_text = Dropout(0.5)(X_text) # Initial States a0 = Input(shape=(units,)) c0 = Input(shape=(units,)) LSTMLayer = LSTM(units, return_sequences = True, return_state = True, dropout=0.5, name = 'lstm') # Take image embedding as the first input to LSTM _, a, c = LSTMLayer(X_img, initial_state=[a0, c0]) A, _, _ = LSTMLayer(X_text, initial_state=[a, c]) output = TimeDistributed(Dense(vocab_size, activation='softmax', kernel_regularizer = regularizers.l2(reg), bias_regularizer = regularizers.l2(reg)), name = 'time_distributed_softmax')(A) return Model(inputs=[image_input,seq_input,senti_input, a0, c0], outputs=output, name='NIC')
def load_model(): base_model = InceptionV3(include_top=False, weights='imagenet', input_shape=IMSIZE) for layer in base_model.layers: layer.trainable = False x = base_model.output x = Flatten()(x) predictions = Dense(N_CLASSES, activation='softmax')(x) model = Model(inputs=base_model.input, outputs=predictions) print(model.summary()) sgd = SGD(lr=0.001, decay=1e-6, momentum=0.5) model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) return model
def extract_feature_from_image(file_dir): img = image.load_img(file_dir, target_size=(299, 299)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) base_model = InceptionV3(include_top=True, weights=None) weights_path = 'data/image_net.h5' base_model.load_weights(weights_path) new_input = base_model.layers[0].input hidden_layer = base_model.get_layer('avg_pool').output image_model = Model(new_input, hidden_layer) return image_model.predict(x)
def extract_inception_test(): model = InceptionV3(weights='imagenet', include_top=False) print(model.summary()) X_dirname = '../../411a3/test' X_filelist = image.list_pictures(X_dirname) X_inception_test = np.zeros((test_size, 2048, 8, 8)) for i in range(test_size): img = image.load_img(X_filelist[i], target_size=target_size) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) inception = model.predict(x) X_inception_test[i, :, :, :] = inception print('Read image: ' + X_filelist[i]) return X_inception_test
def get_feature_mat_from_video(video_filename, output_dir='output'): yt_vid, extension = video_filename.split('/')[-1].split('.') assert extension in ['webm', 'mp4', '3gp'] mkdir_if_not_exist(output_dir, False) output_filename = output_dir + '/' + yt_vid + '.npy' vid_reader = imageio.get_reader(video_filename, 'ffmpeg') img_list = get_img_list_from_vid_reader(vid_reader, extension) base_model = InceptionV3(include_top=True, weights='imagenet') model = Model(inputs=base_model.input, outputs=base_model.get_layer('avg_pool').output) feature_mat = get_feature_mat(model, img_list) np.save(output_filename, feature_mat) return feature_mat
def build_inception_part_model(self): self.build_input() image_input = Input(tensor=self.images) seq_input = Input(tensor=self.input_seqs) raw_vision_model = InceptionV3(include_top=True, input_tensor=image_input) image_embedding = Dense(self.config.embedding_size, activation=None, kernel_initializer='random_uniform', bias_initializer='zeros')( raw_vision_model.outputs[0]) init_lstm_input = Reshape(target_shape=(1, self.config.embedding_size), name='reshape')(image_embedding) # LSTM Cell lstm_cell = LSTM(self.config.num_lstm_units, unroll=False, return_sequences=True, dropout=0.4, recurrent_dropout=0.4, return_state=True) # Get cell state from encoded image input initial_state_from_image = lstm_cell(init_lstm_input) lstm_output, h_state, c_state = initial_state_from_image # BUG coremltools only accept `lstm_output` but not h_state/c_state as model's output # Although in Xcode the model's output will still be (output, h_state, c_state) inception_part_model = Model(inputs=[image_input], outputs=[lstm_output]) inception_part_model.load_weights('./keras_weight/weights_full.h5', by_name=True) self.inception_part_model = inception_part_model
axarr[0].get_xaxis().set_ticks([]) axarr[0].get_yaxis().set_ticks([]) axarr[1].autoscale(enable=False) gs = gridspec.GridSpec(2,1, width_ratios=[1],height_ratios=[1,0.1]) plt.tight_layout() plt.savefig(out_name + '.png') ######################### # Models ######################### # load model model = ResNet50(weights='imagenet') model = InceptionV3(weights='imagenet') # model = VGG16(weights='imagenet', include_top=False) # pre-processing for inception model only def preprocess_input(x): x /= 255. x -= 0.5 x *= 2. return x # get all available images img_names_on_disk = os.listdir(path_images) img_paths_on_disk = [path_images + x for x in img_names_on_disk]
def load_model (args): if args.output_layer == '0': if args.model == 'inception': model = InceptionV3(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='tf' elif args.model == 'xception': model = Xception(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='tf' elif args.model == 'inceptionresnet': model = InceptionResNetV2(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='tf' elif args.model == 'mobilenet': model = MobileNet(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='tf' elif args.model == 'mobilenet2': model = MobileNetV2(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='tf' elif args.model == 'nasnet': model = NASNetLarge(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='tf' elif args.model == 'resnet': model = ResNet50(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='caffe' elif args.model == 'vgg16': model = VGG16(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='caffe' elif args.model == 'vgg19': model = VGG19(include_top=False, weights='imagenet', pooling=args.pooling) preprocess_mode='caffe' else: print ("Model not found") return 0 else: if args.model == 'inception': base_model = InceptionV3(include_top=False, weights='imagenet', pooling=args.pooling) model = Model(input=base_model.input, output=base_model.get_layer(args.output_layer).output) preprocess_mode='tf' elif args.model == 'xception': base_model = Xception(include_top=False, weights='imagenet', pooling=args.pooling) model = Model(input=base_model.input, output=base_model.get_layer(args.output_layer).output) preprocess_mode='tf' elif args.model == 'inceptionresnet': base_model = InceptionResNetV2(include_top=False, weights='imagenet', pooling=args.pooling) model = Model(input=base_model.input, output=base_model.get_layer(args.output_layer).output) preprocess_mode='tf' elif args.model == 'mobilenet': base_model = MobileNet(include_top=False, weights='imagenet', pooling=args.pooling) model = Model(input=base_model.input, output=base_model.get_layer(args.output_layer).output) preprocess_mode='tf' elif args.model == 'mobilenet2': base_model = MobileNetV2(include_top=False, weights='imagenet', pooling=args.pooling) model = Model(input=base_model.input, output=base_model.get_layer(args.output_layer).output) preprocess_mode='tf' elif args.model == 'nasnet': base_model = NASNetLarge(include_top=False, weights='imagenet', pooling=args.pooling) model = Model(input=base_model.input, output=base_model.get_layer(args.output_layer).output) preprocess_mode='tf' elif args.model == 'resnet': base_model = ResNet50(include_top=False, weights='imagenet', pooling=args.pooling) model = Model(input=base_model.input, output=base_model.get_layer(args.output_layer).output) preprocess_mode='caffe' elif args.model == 'vgg16': base_model = VGG16(include_top=False, weights='imagenet', pooling=args.pooling) model = Model(input=base_model.input, output=base_model.get_layer(args.output_layer).output) preprocess_mode='caffe' elif args.model == 'vgg19': base_model = VGG19(include_top=False, weights='imagenet', pooling=args.pooling) model = Model(input=base_model.input, output=base_model.get_layer(args.output_layer).output) preprocess_mode='caffe' else: print ("Model not found") return 0 return model,preprocess_mode
batch_size = 16 # path root_path = "../planet/" imgs_path = root_path + "train-jpg/" labels_file = root_path + "train_validation_v2_bin.csv" # iterations config max_iteration = 500 summary_iters = 50 valid_iters = 250 usecols = range(1, 18) # create the base pre-trained model base_model = InceptionV3(weights='imagenet', include_top=False) # add a global spatial average pooling layer x = base_model.output #x = MaxPooling2D((3, 3), strides=(2, 2), padding='valid')(x) #x = AveragePooling2D((8,8), padding='valid')(x) #x = Dropout(0.2)(x) #x = Flatten()(x) x = GlobalAveragePooling2D()(x) x = Dense(1024, activation='relu')(x) x = Dropout(0.5)(x) # and a logistic layer -- 17 classes predictions = Dense(17, activation='softmax')(x) # this is the model we will train print "init model"
patch_paths = [] #Get all 2048x2048 image paths for root, di, files in os.walk(patch_paths_dir): file_names = [ os.path.join(root, f) for f in os.listdir(root) if os.path.isfile(os.path.join(root, f)) ] patch_paths.extend(file_names) # Load inception model inception_model = InceptionV3( include_top=False, weights='imagenet', input_shape=input_shape, pooling='avg', trainable=False, classes=num_classes, second_stage=True, model_weights='imagenet_models/my_model_final.h5') # Get weights of the last fc layer final_layer_weights = inception_model.get_layer('my_predictions').get_weights() final_layer_weights[0] = np.expand_dims(final_layer_weights[0], axis=0) final_layer_weights[0] = np.expand_dims(final_layer_weights[0], axis=0) # print(final_layer_weights) ################################################################################################################################## ############################### Making fully convolutional model ############################################### if K.image_data_format() == 'channels_first': bn_axis = 1
class_weight = compute_class_weight('balanced', np.unique(y_train), y_train) print(class_weight) pre_computed_weights = dict(zip(classes, class_weight)) print("Pre computed weights for each class : \n", pre_computed_weights) steps_test = generator_test.n // batch_size print("The steps for batch size {} of test set is {}".format( batch_size, steps_test)) steps_per_epoch = generator_train.n // batch_size print("The steps for batch size {} of training set is {}".format( steps_per_epoch, steps_per_epoch)) print(type(generator_test.n)) # number of samples model = InceptionV3(include_top=True) model.summary() # pretrained_path = glob.glob(model_dir) # if pretrained_path is not None: # lastest_model = max(pretrained_path, key=os.path.getctime) # print(lastest_model) # model.load_weights(lastest_model) opt = keras.optimizers.Adam(lr=0.001) model.compile(optimizer=opt, loss=keras.losses.categorical_crossentropy, metrics=['accuracy']) check_point = keras.callbacks.ModelCheckpoint(model_weight_path, monitor='val_acc',
# print(X_train.shape) # print(Y_train.shape) # print(X_test.shape) # print(Y_test.shape) ########################################################################################################################################################################################## # FIRST STAGE: Training ONLY the last layer(softmax) of the updated inception model print( "FIRST STAGE: Training ONLY the last layer (softmax) of the updated inception model" ) # Load our model model = InceptionV3( include_top=False, weights='imagenet', input_shape=(img_rows, img_cols, channel), pooling='avg', trainable=False, classes=num_classes, second_stage=False, model_weights= 'imagenet_models/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5' ) # model.summary() # Some callbacks for logging tensorboard = TensorBoard(log_dir='./logs') early_stopping = EarlyStopping(monitor='val_loss', patience=2) reduceLR = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=0,
def main(args): """Use transfer learning and fine-tuning to train a network on a new dataset""" n_classes = len(glob.glob(args.image_dir + "/*")) # nb_val_samples = get_nb_files(args.val_dir) nb_epoch = int(args.nb_epoch) batch_size = int(args.batch_size) # model_file = os.path.join( # args.model_dir, 'retrain_incep_v3_model_config.json') # if not os.path.exists(model_file): base_model = InceptionV3(weights='imagenet', include_top=False) base_model = add_pooling_layer(base_model) iv3_model = InceptionV3(weights='imagenet', include_top=True) iv3_base_model = Model(inputs=iv3_model.input, outputs=iv3_model.layers[311].output) img_paths = [ 'OBOG5055.JPG', 'wallhaven-220382.jpg', 'wallhaven-295153.jpg', 'wallhaven-605824.jpg' ] target_size = (IM_WIDTH, IM_HEIGHT) for img_path in img_paths: print("img_path: {}".format(img_path)) img = image.load_img(img_path, target_size=target_size) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) print("model: iv3_base_model") preds = iv3_base_model.predict(x) print(preds) print("model: base_model") preds = base_model.predict(x) print(preds) raise RuntimeError model = add_final_layer(base_model.input, base_model.output, n_classes) # model = add_new_last_layer(base_model, n_classes) # with open(model_file, 'w') as f: # f.write(model.to_json()) # else: # with open(model_file) as f: # model = model_from_json(f.read()) # print('reloading model...') image_lists, n_classes = create_or_load_training_data(args) nb_train_samples = get_nb_files(args.image_dir) print('total no. samples: {}'.format(nb_train_samples)) if args.transfer_learning: # use bottleneck, here the model must be identical to the original top layer retrain_input_tensor = Input(shape=base_model.output.shape) retrain_model = add_final_layer(retrain_input_tensor, retrain_input_tensor, n_classes) check_point_file = os.path.join(args.model_dir, "retrain_weights_IV3.hdf5") if os.path.exists(check_point_file): print('loading checkpoint {}'.format(check_point_file)) retrain_model.load_weights(check_point_file) retrain_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) bottleneck_dir = os.path.join(args.model_dir, 'bottleneck_retrain_keras/') def bottle_pred_func(file): return predict_from_file(base_model, file) if not os.path.exists(bottleneck_dir): cache_bottlenecks(image_lists, args.image_dir, bottleneck_dir, bottle_pred_func) train_sequence = cached_bottlenecks_sequence( image_lists, args.batch_size, 'training', bottleneck_dir, args.image_dir, bottle_pred_func) validation_data = cached_bottlenecks_sequence( image_lists, args.validation_batch_size, 'validation', bottleneck_dir, args.image_dir, bottle_pred_func, sequence=False) # args.model_dir, "weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5") checkpoint = ModelCheckpoint(check_point_file, monitor='val_acc', verbose=1, save_best_only=True, mode='max', save_weights_only=True) tb_callback = TensorBoard(log_dir=args.model_dir, histogram_freq=2, write_graph=True) callbacks_list = [checkpoint, tb_callback] history_tl = retrain_model.fit_generator( train_sequence, epochs=nb_epoch, steps_per_epoch=nb_train_samples // batch_size, validation_data=validation_data, validation_steps=nb_train_samples // batch_size * 5, class_weight='auto', callbacks=callbacks_list) if not args.no_plot: plot_training(history_tl) if args.fine_tune: assert model.layers[ FINE_TUNE_FINAL_LAYER_INDEX].name == FINE_TUNE_FINAL_LAYER_NAME set_trainable_layers( trainable_layer_list=model.layers[:FINE_TUNE_FINAL_LAYER_INDEX + 1], frozen_layer_list=model.layers[FINE_TUNE_FINAL_LAYER_INDEX + 1:]) model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy']) history_ft = model.fit_generator( train_sequence, steps_per_epoch=nb_train_samples // batch_size, epochs=nb_epoch, validation_data=validation_data, validation_steps=nb_train_samples // batch_size, class_weight='auto') if not args.no_plot: plot_training(history_ft) model.save(os.path.join(args.model_dir, 'inceptionv3-ft.model'))
def train(train_dir, val_dir, output_model_file, nb_epoch, batch_size, weights_dir, verbose=True): """Use transfer learning and fine-tuning to train a network on a new dataset""" nb_train_samples = get_nb_files(train_dir) nb_classes = len(glob.glob(train_dir + "/*")) nb_val_samples = get_nb_files(val_dir) nb_epoch = NB_EPOCHS if not nb_epoch else int(nb_epoch) batch_size = BAT_SIZE if not batch_size else batch_size # data preparation if verbose: print("data preparation...") # We prepare our data using data augmentation # Here, we apply multiple transformation to have a bigger dataset for training # for example we add zooms, flips, shifts train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input, width_shift_range=0.4, shear_range=0.4, zoom_range=0.4, horizontal_flip=True, vertical_flip=True) # we do the same transformation for the validation dataset valid_datagen = ImageDataGenerator(preprocessing_function=preprocess_input, width_shift_range=0.4, shear_range=0.4, zoom_range=0.4, horizontal_flip=True, vertical_flip=True) # We generate now data from train_dir using the defined transformations train_generator = train_datagen.flow_from_directory( train_dir, target_size=(IM_WIDTH, IM_HEIGHT), batch_size=batch_size) # We generate data from valid_dir using the defined transformations validation_generator = valid_datagen.flow_from_directory( val_dir, target_size=(IM_WIDTH, IM_HEIGHT), batch_size=batch_size) # setup model if verbose: print("setup model...") base_model = InceptionV3( weights='imagenet', include_top=False) # include_top=False => excludes final FC layer model = add_new_last_layer(base_model, nb_classes) # transfer learning if verbose: print("transfer learning...") setup_to_transfer_learn(model, base_model) ''' ModelCheckPoint saves the model weights after each epoch if the validation loss decreased ''' checkpointer = ModelCheckpoint(filepath=os.path.join( module_path, weights_dir + '/weights_tl_tmp.h5'), verbose=0, save_best_only=True) # Train our model using transfer learning model.fit_generator(train_generator, steps_per_epoch=nb_train_samples / batch_size, epochs=nb_epoch, callbacks=[checkpointer], validation_data=validation_generator, validation_steps=nb_val_samples / batch_size, class_weight='auto') # fine-tuning if verbose: print("fine-tuning...") setup_to_finetune(model) checkpointer = ModelCheckpoint(filepath=os.path.join( module_path, weights_dir + '/weights_ft_tmp.h5'), verbose=0, save_best_only=True) # Train our model using fine-tuning model.fit_generator(train_generator, steps_per_epoch=nb_train_samples / batch_size, epochs=nb_epoch, callbacks=[checkpointer], validation_data=validation_generator, validation_steps=nb_val_samples / batch_size, class_weight='auto') # Saving the model if verbose: print("Saving model...") model.save(output_model_file)
def testing(weights_path="weights_gender_and_age/weights.h5", dataset_base_dir="sorted_gender_and_age"): dir_list = next(os.walk(dataset_base_dir + '/valid'))[1] classes = dir_list classes = np.sort(classes) nb_classes = len(classes) # Setup the inceptionV3 model, pretrained on ImageNet dataset, without the fully connected part. base_model = InceptionV3(weights='imagenet', include_top=False) # include_top=False excludes final FC layer # Add a new fully connected layer at the top of the base model. The weights of this FC layer are random # so they need to be trained model = add_new_last_layer(base_model, nb_classes) # We have already trained our model, so we just need to load it model.load_weights(weights_path) # Here, instead of writing the path and load the model each time, we load our model one time and we make a loop # where we ask only for the image path every time. If we enter "stop", we exit the loop file_processed = 0 f_count = 0 # tested success_f_count = 0 # successfully classified m_count = 0 # tested success_m_count = 0 # successfully classified one_off = 0 # almost got the age category right two_off = 0 three_off = 0 four_off = 0 more_off = 0 success_age_count = 0 fully_correct = 0 # both age and gender are correct file_count = sum([len(files) for r, d, files in os.walk(dataset_base_dir + "/valid/")]) offsets = dict() # stores how close to the actual age the prediction was for combined_class in dir_list: for root, dirs, files in os.walk(dataset_base_dir + "/valid/" + combined_class): print("Number of items in " + combined_class + ": " + str(len(files))) for file in files: file_processed = file_processed + 1 if file.lower().endswith('.jpg'): img_path = dataset_base_dir + "/valid/" + combined_class + "/" + file if os.path.isfile(img_path): img = image.load_img(img_path, target_size=(299, 299)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) preds = model.predict(x) # decode the results into a list of tuples (class, description, probability) # (one such list for each sample in the batch) label = classes[np.argmax(preds)] p = preds[0][np.argmax(preds)] * 100 gender_ok = False age_ok = False if 'f' in label: # classified as female f_count = f_count + 1 if 'f' in combined_class: # actually a female success_f_count = success_f_count + 1 gender_ok = True elif 'm' in label: m_count = m_count + 1 if 'm' in combined_class: success_m_count = success_m_count + 1 gender_ok = True expected = 100 # dummy values that should never be used predicted = -100 # As the age ranges are ordered, we can use the indices to determine # how close to the expected value the prediction was for index, cat in enumerate(age_categories): if cat in combined_class: expected = index if cat in label: predicted = index offset = expected - predicted offsets[offset] = offsets.get(offset, 0) + 1 if offset == 0: age_ok = True success_age_count = success_age_count + 1 elif abs(offset) == 1: one_off = one_off + 1 elif abs(offset) == 2: two_off = two_off + 1 elif abs(offset) == 3: three_off = three_off + 1 elif abs(offset) == 4: four_off = four_off + 1 else: print("worse than 4-off:" + str(offset)) more_off = more_off + 1 if not gender_ok or not age_ok: print("[class-err] Exp: " + combined_class + ", Got: " + label + " (p=" + ( "%.2f" % p) + "%, img=" + file + ")") else: fully_correct = fully_correct + 1 else: print("Error") # Prints current progress in case we're dealing with a large dataset if file_processed % 50 == 0: print("..." + "%.2f" % (100 * file_processed / file_count) + " %") total_age_classifications = success_age_count + one_off + two_off + three_off + four_off + more_off print() print("=> Female Accuracy: " + str(100 * success_f_count / f_count) + " %") print("=> Male Accuracy: " + str(100 * success_m_count / m_count) + " %") print("=> Gender global accuracy: " + "%.2f" % ( 100 * (success_m_count + success_f_count) / (m_count + f_count)) + " %") print("=> Gender average accuracy (in case test sets aren't equally distributed): " + "%.2f" % ( (100 * success_f_count / f_count + 100 * success_m_count / m_count) / 2) + " %") print() print("====================================") print() print("=> Age Accuracy: " + str(100 * success_age_count / total_age_classifications) + " %") print("=> 1-off: " + str(100 * one_off / total_age_classifications) + " %") print("=> 2-off: " + str(100 * two_off / total_age_classifications) + " %") print("=> 3-off: " + str(100 * three_off / total_age_classifications) + " %") print("=> 4-off: " + str(100 * four_off / total_age_classifications) + " %") print("=> worse: " + str(100 * more_off / total_age_classifications) + " %") print() # Crappy histogram to display the age classification results in full yolo mode for key in sorted(offsets): to_print = str(key) + ":\t" for i in range(0, offsets[key] // 2): to_print = to_print + Back.GREEN + '_' + Back.RESET to_print = to_print + ' (' + str(offsets[key]) + ')' print(to_print) print() print("====================================") print() print("=> Full classification accuracy: " + str(100 * fully_correct / total_age_classifications) + " %")
img = image.load_img(path, target_size=(299, 299)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) y = model.predict(x) preds = decode_predictions(y) activations = get_activations(model, x, layer_name=layer) return preds, np.array(activations).flatten() if __name__ == '__main__': args = parser.parse_args() correct, total = 0, 0 model = InceptionV3(include_top=True, weights='imagenet') layer_name = args.layer valid_concepts = [fname for fname in os.listdir(args.valid_images)] cache_filename = 'activation_cache' + args.layer + '.pkl' cache_path = join(args.cache_dir, cache_filename) try: cnn_valid = joblib.load(cache_path) print('Using cached activations') except FileNotFoundError: cnn_valid = []
# # # 指定优化器 # optimizer1 = SGD(lr=LEARNING_RATE, momentum=0.9, decay=0, nesterov=False) # optimizer2 = RMSprop(lr=LEARNING_RATE) # optimizer3 = RMSprop() # # # 编译模型 # model.compile(optimizer=optimizer3, # loss={'left_output': 'binary_crossentropy', 'right_output': 'binary_crossentropy'}, # metrics=['accuracy']) else: # 创建预训练模型 base_model = InceptionV3( weights= './pretrain_weights/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5', include_top=False, pooling='avg', model_name='left_inception_v3') # 左右眼输入 input_shape = (299, 299, 3) left_input = Input(input_shape, name='left_input') right_input = Input(input_shape, name='right_input') # inception_resnet的bottle_neck输出 # with tf.variable_scope("Inception", reuse=None): # left_x = base_model(left_input) # with tf.variable_scope("Inception", reuse=True): # right_x = base_model(right_input) with tf.variable_scope("Inception") as scope:
# Create validation generator val_datagen = ImageDataGenerator(rescale=1. / 255) val_generator = train_datagen.flow(x_val, y_val_ohe, shuffle=False, batch_size=BATCH_SIZE, seed=1) # ## Prepare Deep Learning Classifier # # * Load InceptionV3 pretrained on ImageNet without its top/classification layer # * Add additional custom layers on top of InceptionV3 to prepare custom classifier # Get the InceptionV3 model so we can do transfer learning base_inception = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3)) # base_inception = InceptionV3(weights='imagenet', include_top=True, input_shape=(299, 299, 3)) # Add a global spatial average pooling layer out = base_inception.output out = GlobalAveragePooling2D()(out) out = Dense(512, activation='relu')(out) out = Dense(512, activation='relu')(out) total_classes = y_train_ohe.shape[1] predictions = Dense(total_classes, activation='softmax')(out) # * Stack the two models (InceptionV3 and custom layers) on top of each other # * Compile the model and view its summary model = Model(inputs=base_inception.input, outputs=predictions)