def create_keras_classification_model(source, architecture, input_shape, n_classes, pretrained=False): assert input_shape[-1] in [1, 3], 'The input shape is incompatible with the model.' if source.startswith('cm'): # Create the model using the classification_models repository Architecture, preprocessing = Classifiers.get(architecture) weights = 'imagenet' if pretrained else None model = Architecture(input_shape=input_shape, classes=n_classes, weights=weights, include_top=not pretrained) if pretrained: # Perform model surgery and add an output softmax layer new_output = keras.layers.GlobalAveragePooling2D()(model.layers[-1].output) new_output = keras.layers.Dense(n_classes)(new_output) if source == 'cm_cxr': # Models that do multi-label classification use sigmoid outputs new_output = keras.activations.sigmoid(new_output) else: # Standard softmax output is best for most cases new_output = keras.activations.softmax(new_output) model = keras.Model(inputs=model.inputs, outputs=new_output) elif source == 'simple_cnn': model = simple_cnn_model(input_shape, n_classes) else: raise NotImplementedError # Print the model summary print(model.summary()) return model
def build(self): """ This method instantiates classifier model according to parameters :return: None """ model_fn, _ = Classifiers.get(self.architecture) model = model_fn(input_shape=self.input_shape, weights=self.weights, classes=self.num_classes, include_top=self.include_top) if not self.include_top: if self.freeze: for layer in model.layers: layer.trainable = False x = tf.keras.layers.GlobalAveragePooling2D()(model.output) output = tf.keras.layers.Dense(self.num_classes, activation=self.output_activation)(x) self.model = tf.keras.models.Model(inputs=[model.input], outputs=[output]) else: self.model = model
def _build(self): # Build the feature extractor self.feature_extractor, feature_shape = self._build_feature_network() # Determine pose dimension (7+1) pose_dim = self.data.cam_pose_dim if self.config.get_bool("pred_scale_extra"): pose_dim += 1 # Determine the number of channels the tensor has after the feature matching channels = 0 for n in self.dest_att_iters: channels += 1 * n * n # Build regression part (resnet18) ResNet18, preprocess_input = Classifiers.get('resnet18') resnet18 = ResNet18((128, 128, channels), include_top=False, weights=None) x = tf.keras.layers.GlobalAveragePooling2D(name='avg_pool')( resnet18.output) # Fully connected layers x = tf.keras.layers.Dense(512, activation="relu")(x) # Dropout for MC dropout if self.config.get_float("dropout") > 0: x = Dropout(self.config.get_float("dropout"))(x, training=True) x = Dense(512, activation="relu")(x) x = tf.keras.layers.Dense(pose_dim)(x) # Declare full regression part as one model self.decoder_pose_estimator = tf.keras.models.Model(resnet18.input, x)
def __init__(self, input_shape=(224, 224, 3)): super().__init__(input_shape=input_shape) from classification_models.tfkeras import Classifiers Resnet50, self.preprocess_input_f = Classifiers.get('resnet50') self.model = Resnet50(input_shape=input_shape, weights='imagenet11k-places365ch', include_top=False, classes=11586)
def senet_retinanet(num_classes, backbone='seresnext50', inputs=None, modifier=None, **kwargs): """ Constructs a retinanet model using a resnet backbone. Args num_classes: Number of classes to predict. backbone: Which backbone to use (one of ('resnet50', 'resnet101', 'resnet152')). inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)). modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example). Returns RetinaNet model with a ResNet backbone. """ # choose default input if inputs is None: if keras.backend.image_data_format() == 'channels_first': inputs = keras.layers.Input(shape=(3, None, None)) else: # inputs = keras.layers.Input(shape=(224, 224, 3)) inputs = keras.layers.Input(shape=(None, None, 3)) classifier, _ = Classifiers.get(backbone) model = classifier(input_tensor=inputs, include_top=False, weights=None) # get last conv layer from the end of each block [28x28, 14x14, 7x7] if backbone == 'seresnet18' or backbone == 'seresnet34': layer_outputs = ['stage3_unit1_relu1', 'stage4_unit1_relu1', 'relu1'] elif backbone == 'seresnet50': layer_outputs = ['activation_36', 'activation_66', 'activation_81'] elif backbone == 'seresnet101': layer_outputs = ['activation_36', 'activation_151', 'activation_166'] elif backbone == 'seresnet152': layer_outputs = ['activation_56', 'activation_236', 'activation_251'] elif backbone == 'seresnext50': layer_outputs = ['activation_37', 'activation_67', 'activation_81'] elif backbone == 'seresnext101': layer_outputs = ['activation_37', 'activation_152', 'activation_166'] elif backbone == 'senet154': layer_outputs = ['activation_59', 'activation_239', 'activation_253'] else: raise ValueError('Backbone (\'{}\') is invalid.'.format(backbone)) layer_outputs = [ model.get_layer(name=layer_outputs[0]).output, # 28x28 model.get_layer(name=layer_outputs[1]).output, # 14x14 model.get_layer(name=layer_outputs[2]).output, # 7x7 ] # create the densenet backbone model = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=model.name) # invoke modifier if given if modifier: model = modifier(model) # create the full model return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=model.outputs, **kwargs)
def extract_features(input_shape, name='', archi='resnet18', imagenet=False, freeze_until=None): input = Input(shape=input_shape) if archi == 'resnet18': ResNet18, preprocess_input = Classifiers.get('resnet18') if imagenet: base_model = ResNet18(input_tensor=input, input_shape=input_shape, weights='imagenet', include_top=False, version='v2') else: base_model = ResNet18(input_tensor=input, input_shape=input_shape, weights=None, include_top=False, version='v2') if freeze_until: print("Freezing network until layer: " + str(freeze_until)) for layer in base_model.layers[:-freeze_until]: layer.trainable = False for layer in base_model.layers[-freeze_until:]: layer.trainable = True else: for idx, layer in enumerate(base_model.layers): layer.trainable = True x = base_model.output elif archi == 'mobilenetv2': MobNetV2, preprocess_input = Classifiers.get('mobilenetv2') if imagenet: base_model = MobNetV2(input_tensor=input, input_shape=input_shape, weights='imagenet', include_top=False) else: base_model = MobNetV2(input_tensor=input, input_shape=input_shape, weights=None, include_top=False) if freeze_until: for layer in base_model.layers[:-freeze_until]: layer.trainable = False for layer in base_model.layers[-freeze_until:]: layer.trainable = True else: for idx, layer in enumerate(base_model.layers): layer.trainable = True x = base_model.output return Model(input, x, name=name)
def load_resnet(shape, efftype = 'R50', weights = 'imagenet'): typeof = efftype[1:] loader, _ = Classifiers.get(f'resnet{typeof}') backbone = loader(shape, weights = weights, include_top = False) input_layer = backbone.input for layer in backbone.layers: layer.trainable = False return backbone
def _test_application(name, input_shape=(224, 224, 3), last_dim=1000, label='bull_mastiff'): classifier, preprocess_input = Classifiers.get(name) model = classifier(input_shape=input_shape, weights='imagenet') output_shape, preds = _get_output_shape(model, preprocess_input) assert output_shape == (None, last_dim) names = [p[1] for p in decode_predictions(preds, **KWARGS)[0]] assert label in names[:3]
def model_MobileNet(args): basemodel, _ = Classifiers.get('mobilenetv2') # build model base_model = basemodel(input_shape=(args.IMG_HEIGHT, args.IMG_WIDTH, 3), weights='imagenet', include_top=False) x = GlobalAveragePooling2D()(base_model.output) output = Dense(args.num_classes, activation='softmax')(x) model = Model(inputs=[base_model.input], outputs=[output]) return model
def get_model(model_name, input_shape=(80, 80, 1)): # ClsModel, preprocess_input = Classifiers.get(model_name) ClsModel, _ = Classifiers.get(model_name) # X = preprocess_input(X_train) # Xt = preprocess_input(X_test) # build model base_model = ClsModel(input_shape=input_shape, include_top=False) x = GlobalAveragePooling2D()(base_model.output) output = Dense(1, activation='sigmoid')(x) model = Model(inputs=[base_model.input], outputs=[output]) return model
def create(include_top=False, input_shape=None, input_tensor=None, weights="imagenet"): """ネットワークの作成。""" from classification_models.tfkeras import Classifiers backbone, _ = Classifiers.get("resnet34") return backbone( include_top=include_top, input_shape=input_shape, input_tensor=input_tensor, weights=weights, )
def create_base_network(self, transfer_learning=False): ResNet18, _ = Classifiers.get('resnet18') model = ResNet18(input_shape=(self.net_input_size, self.net_input_size, 3), weights=self.weights_dir) feature_extractor = Model(model.layers[0].input, model.layers[-4].output) if transfer_learning: for l in feature_extractor.layers: l.trainable = False # feature_extractor.summary() return feature_extractor
def _test_save_load(name, input_shape=(224, 224, 3)): # create first model classifier, preprocess_input = Classifiers.get(name) model1 = classifier(input_shape=input_shape, weights=None) model1.save('model.h5') # load same model from file model2 = keras.models.load_model('model.h5', compile=False) os.remove('model.h5') x = _get_img() y1 = model1.predict(x) y2 = model2.predict(x) assert np.allclose(y1, y2)
def ResNet18(classes: int, img_size: Tuple[int, int] = (224, 224), weights: str = 'imagenet', freeze_till: str = 'all', activation='softmax') -> Model: """ Modèle ResNet 18. Paramètres: - classes : nombre de classes à prédire. - img_size : dimensions des images. - weights : initialisation des poids: aucun (None) ou 'imagenet'. - freeze_till : soit None (tous les poids sont ré-entraînés), soit 'all' (tous les poids sont gelés), soit le nom d'une couche en particulier. Toutes les couches avant cette couche seront gelées. """ # import du modèle resnet grâce à la librairie resnet18. resnet18_, _ = Classifiers.get('resnet18') # définition des poids initiaux resnet18 = resnet18_((*img_size, 3), weights=weights) # spécification des poids à geler / à apprendre if freeze_till: if freeze_till == 'all': resnet18.trainable = False # tout est gelé! else: frozen = resnet18.get_layer(freeze_till) for i in range(resnet18.layers.index(frozen) + 1): resnet18.layers[i].trainable = False # on recupère le modèle jusqu'à l'antepénultième couche, # la sortie de la dernière couche de convolution embed = resnet18.layers[resnet18.layers.index(resnet18.get_layer('pool1'))] resnet18_no_top = Model( inputs=resnet18.input, outputs=embed.output, name='ResNet18', ) # no_top signifie "pas de couche de classification" resnet18 = Sequential() resnet18.add(resnet18_no_top) # et on y ajoute une couche softmax pour la classification resnet18.add( Dense(classes, activation=activation, kernel_initializer='he_normal', name='fc1')) return resnet18
def __init__( self, num_classes: int = 2, pretrained: bool = True, dropout: float = 0.5, ): super().__init__() ResNet18, preprocess_input = Classifiers.get('resnet18') resnet18 = K.models.load_model( '/data/project/rw/ASDG/tf_ssdg/tf_resnet18') self.resnet18 = K.models.Model( inputs=resnet18.input, outputs=resnet18.get_layer('avgpool').output) self.drop = layers.Dropout(dropout) self.fc = layers.Dense(units=num_classes, # activation='softmax' ) self._freeze_clf()
def __model_wrap(input_shape, weights='imagenet', include_top=False, pooling='avg'): if pooling not in ['avg', 'max']: raise ValueError('pooling should be `avg` or `max`') # endif # Initialize base model model_b = Classifiers.get(model_type)[0](input_shape=input_shape, weights=weights, include_top=include_top, pooling=None) # Add pooling layer if pooling == 'avg': model_out = GlobalAveragePooling2D(name='avgpool')(model_b.output) elif pooling == 'max': model_out = GlobalMaxPooling2D(name='maxpool')(model_b.output) # endif # Create new model after pooling model_this = Model(inputs=model_b.inputs, outputs=model_out) return model_this
def create_resnet_18(height, width, pretrained: bool, mode: XModelMode = XModelMode.SIMPLE): shape = (height, width, 3) ResNet18, preprocess_input = Classifiers.get("resnet18") weights = "imagenet" if pretrained else None base_model = ResNet18(input_shape=shape, weights=weights, include_top=False) base_model = set_regularization( base_model, kernel_regularizer=tf.keras.regularizers.l2(L2_REG)) inputs = tf.keras.Input(shape=shape, name="input") base_model, features = upsample(base_model, inputs, [ "stage2_unit1_relu1", "stage3_unit1_relu1", "stage4_unit1_relu1", "relu1" ], mode) return base_model, inputs, features
def modelBuilding(self): def focalLoss(alpha=1., gamma=2.): alpha = float(alpha) gamma = float(gamma) def multiCategoryFocalLossFixed(yTrue, yPred): yTrue = tf.cast(yTrue, tf.float32) yPred = tf.cast(yPred, tf.float32) yPred = K.clip(yPred, K.epsilon(), 1. - K.epsilon()) ce = tf.multiply(yTrue, -K.log(yPred)) weight = tf.multiply(yTrue, tf.pow(tf.subtract(1., yPred), gamma)) fl = tf.multiply(alpha, tf.multiply(weight, ce)) reducedF1 = tf.reduce_max(fl, axis=1) return tf.reduce_sum(reducedF1) return multiCategoryFocalLossFixed clfs = Classifiers.get("seresnext50") self.pretrainedNet = clfs[0](input_shape=self.inputShape, weights="imagenet", include_top=False) for layer in self.pretrainedNet.layers: layer.trainable = True i = Input(shape=self.inputShape) x = self.pretrainedNet(i) x = GeM()(x) o = Dense(7, activation=softmax, use_bias=True, kernel_initializer=glorot_uniform(seed=2020), bias_initializer=Zeros())(x) self.clf = Model(i, o) self.clf.compile(optimizer=Adam(lr=1e-3), loss=focalLoss(alpha=1., gamma=2.), metrics=["accuracy"]) self.clf.summary()
def resnet18(input_shape, nclasses, dropout, weights='imagenet', **kwargs): ResNet18, preprocess = Classifiers.get('resnet18') resnet = ResNet18( include_top=False, input_shape=(256, 256, 3), weights=weights, ) model = keras.Sequential( [ keras.layers.Input((128, 128, 3)), # keras.layers.GaussianNoise(1000.0), keras.layers.UpSampling2D(size=(2, 2)), resnet, keras.layers.GlobalAveragePooling2D(), keras.layers.Dense(256, activation='relu'), keras.layers.Dropout(dropout), keras.layers.BatchNormalization(), keras.layers.Dense(nclasses, activation='softmax') ], **kwargs) return (model, preprocess)
from classification_models.tfkeras import Classifiers ResNet18, preprocess_input = Classifiers.get('resnet18') model = ResNet18(input_shape=(224, 224, 3), weights='imagenet', include_top=False) model = model[0:10] print(model.summary())
def test(ref_dir, query_dir, network, weights, visualize=False, sliding=True, imagenet=False, freeze_until=None, finetuning=False, save_maps=False): """Testing function. Args: network (str): String identifying the network architecture to use. weights (str): Path string to a .h5 weights file. """ if imagenet: finetuning = True siamese_net = setup_network(weights, network, sliding=sliding, imagenet=imagenet, freeze_until=freeze_until) patch_shape = (128, 128) patch_w = 128 patch_h = 128 if siamese_net is None: logging.error('Trouble loading the network.') return ref_dir_basename = os.path.basename(os.path.normpath(ref_dir)) print(ref_dir_basename) ref_patches = sorted(glob.glob(os.path.join(ref_dir, '*.png'))) query_images = sorted(glob.glob(os.path.join(query_dir, '*.png'))) query_images.extend(sorted(glob.glob(os.path.join(query_dir, '*.jpg')))) query_images_dict = dict() for query in query_images: frame_num = query.split('_')[-1].split('.png')[0] # frame_num = query.split('frame')[-1].split('.jpg')[0] print(query) # frame_num = query.split('frame')[-1].split('.png')[0] # frame_num = query.split('.png')[0].split('_')[-3] query_images_dict[int(frame_num)] = query # print(frame_num) query_images_keys = sorted(query_images_dict, key=query_images_dict.get) query_images_keys.sort() query_images = [query_images_dict[x] for x in query_images_keys] # print(query_images) patch_dict = dict() desc_shape = () # loop over reference patches: for patch_idx, ref_patch_path in enumerate(ref_patches): ref_patch = cv2.imread(ref_patch_path) ref_patch = cv2.resize(ref_patch, (patch_w, patch_h)) cv2.imshow("ref_patch", ref_patch) k2 = cv2.waitKey(0) if k2 == ord('s'): print(patch_idx) continue query_dict = dict() for img_path in query_images: # large image logging.info("Processing query: " + str(img_path)) orig_image = cv2.imread(img_path) saliency_dict = dict() if sliding: # image = cv2.resize(image, (image.shape[1]//2, image.shape[0]//2)) image = cv2.resize(orig_image, (512, 512)) print("Resized image shape: " + str(image.shape)) for (x, y, patch) in sliding_patches(image): if patch.shape[0] != patch_h or patch.shape[1] != patch_w: continue # draw current patch: if visualize: clone = image.copy() cv2.rectangle(clone, (x, y), (x + patch_w, y + patch_h), (0, 255, 0), 2) cv2.imshow("query_image", clone) cv2.imshow("query_patch", patch) k = cv2.waitKey(0) if k == ord('n'): break elif k == 27: return patch_np = np.zeros((1, patch_w, patch_h, 3)) ref_patch_np = np.zeros((1, patch_w, patch_h, 3)) if not finetuning: patch_np[0] = patch/255. ref_patch_np[0] = ref_patch/255. else: ResNet18, preprocess_input = Classifiers.get('resnet18') patch_np[0] = preprocess_input(patch) ref_patch_np[0] = preprocess_input(ref_patch) inputs = [patch_np, ref_patch_np] pred_dot = forward_pass(siamese_net, inputs, visualize=visualize, finetuning=finetuning) print("Dot product 3: " + str(pred_dot)) saliency_dict[(x,y)] = (0, pred_dot[3]) else: # img_h = (orig_image.shape[0] // patch_h)*patch_h # img_w = (orig_image.shape[1] // patch_w)*patch_w img_h = 512 img_w = 512 if img_w >= img_h: image = cv2.resize(orig_image, (img_h, img_h)) image_np = np.zeros((1, img_h, img_h, 3)) else: image = cv2.resize(orig_image, (img_w, img_w)) image_np = np.zeros((1, img_w, img_w, 3)) # print("Resized image shape: " + str(image.shape)) ref_patch_np = np.zeros((1, patch_w, patch_h, 3)) if not finetuning: image_np[0] = image/255. ref_patch_np[0] = ref_patch/255. else: ResNet18, preprocess_input = Classifiers.get('resnet18') image_np[0] = preprocess_input(image) ref_patch_np[0] = preprocess_input(ref_patch) inputs = [image_np, ref_patch_np] # print("Inputs shapes: ") # print("Image a: " + str(inputs[0].shape)) # print("Image b: " + str(inputs[1].shape)) saliency_dict, desc_shape = forward_pass(siamese_net, inputs, visualize=visualize, finetuning=finetuning) # saliency_dict[(x,y)] = (pred_dist, pred_dot) query_dict[(img_path)] = saliency_dict print("Generating heatmap") if not visualize: _, _, img_w_hmap = generate_heatmaps(image, saliency_dict, desc_shape=desc_shape, sliding=sliding, get_normed=False, wait_key_dur=-1) else: _, _, img_w_hmap = generate_heatmaps(image, saliency_dict, desc_shape=desc_shape, sliding=sliding, get_normed=False, wait_key_dur=0) if save_maps: # print(img_path) ext = img_path.split('.')[1] img_path_basename = os.path.basename(img_path) dir_name = os.path.dirname(img_path) img_w_hmap = img_w_hmap[64:448, 64:448] hmap_dir_path = os.path.join(dir_name, ref_dir_basename) if not os.path.exists(hmap_dir_path): os.makedirs(hmap_dir_path) cv2.imwrite(os.path.join(hmap_dir_path, img_path_basename.replace('.' + ext, '_hmap_PATCH' + str(patch_idx) + '.' + ext)), img_w_hmap) patch_dict[patch_idx] = query_dict return
from sklearn.metrics import confusion_matrix import matplotlib.pyplot as plt from tensorflow.keras.preprocessing.image import ImageDataGenerator import tensorflow import numpy as np from classification_models.tfkeras import Classifiers resnet, preprocess_input = Classifiers.get('xception') import itertools test_path = 'base_dir/test_dir' num_test_samples = 1002 test_batch_size = 8 image_size = 224 test_batches = ImageDataGenerator( preprocessing_function= \ tensorflow.keras.applications.xception.preprocess_input).flow_from_directory( test_path, target_size=(image_size, image_size), batch_size=test_batch_size, shuffle=False) base_model = resnet(input_shape=(224, 224, 3), weights='imagenet', include_top=False) x = tensorflow.keras.layers.GlobalAveragePooling2D()(base_model.output) output = tensorflow.keras.layers.Dense(7, activation='softmax')(x) model = tensorflow.keras.models.Model(inputs=[base_model.input], outputs=[output]) # See a summary of the new layers in the model
def main(args): class_names = sorted(os.listdir(r"/home/nvme/data/train/train")) N_classes = len(class_names) ResNext101, preprocess_input = Classifiers.get('resnext101') base_model = ResNext101(input_shape=(224, 224, 3), weights='imagenet', include_top=False) # add a global spatial average pooling layer x = base_model.output x = GlobalAveragePooling2D()(x) # let's add a fully-connected layer if args.add_dense: x = Dense(4096, activation='relu')(x) else: pass # and a logistic layer -- let's say we have 17 classes predictions = Dense(N_classes, activation='softmax')(x) # this is the model we will train model = Model(inputs=base_model.input, outputs=predictions) # compile the model (should be done *after* setting layers to non-trainable) adam = Adam(lr=0.0001) model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) original_dir = "/home/nvme/data/train/train" validation_split = 0.2 batch_size = 16 # all data in train_dir and val_dir which are alias to original_data. (both dir is temporary directory) # don't clear base_dir, because this directory holds on temp directory. base_dir, train_dir, val_dir = split_utils.train_valid_split( original_dir, validation_split, seed=1) # generator for train data train_datagen = ImageDataGenerator(rescale=1. / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) train_gen = train_datagen.flow_from_directory(train_dir, class_mode="categorical", target_size=(224, 224), batch_size=batch_size, shuffle=True, seed=42) # generator for validation data val_datagen = ImageDataGenerator(rescale=1. / 255) val_gen = val_datagen.flow_from_directory(val_dir, class_mode="categorical", target_size=(224, 224), batch_size=batch_size, shuffle=True, seed=42) epochs = args.epochs class_weights = { 0: 65, 1: 42, 2: 5, 3: 1, 4: 4, 5: 1, 6: 169, 7: 27, 8: 13, 9: 115, 10: 2, 11: 56, 12: 70, 13: 42, 14: 11, 15: 4, 16: 7 } for layer in model.layers[:2491]: layer.trainable = False for layer in model.layers[2491:]: layer.trainable = True model.fit_generator(train_gen, steps_per_epoch=train_gen.samples // batch_size, validation_data=val_gen, validation_steps=val_gen.samples // batch_size, epochs=epochs, class_weight=class_weights) datagen_test = ImageDataGenerator(rescale=1. / 255.) test_gen = datagen_test.flow_from_directory('/home/nvme/data/test', target_size=(224, 224), batch_size=1, shuffle=False) pred = model.predict_generator(test_gen, verbose=1) p = np.argmax(pred, axis=1) predictions = [class_names[k] for k in p] a = np.arange(len(predictions)) d = {'Id': a, 'Category': predictions} df = pd.DataFrame(d) file_name = args.file df.to_csv(file_name, index=None, header=True)
'efficientnet-b5': efn.EfficientNetB5, 'efficientnet-b6': efn.EfficientNetB6, 'efficientnet-b7': efn.EfficientNetB7, } Efficientnet_model = efficientnet_models[backbone_name] base_model = Efficientnet_model(input_shape=(128, 128, 3), weights='noisy-student', include_top=False) # if freeze_backbone: # for layer in base_model.layers[:-2]: # layer.trainable = False #checkpoints_load_name = 'work_dirs/efficientnet-b5_aug_alb_balance_swish_imagenet_small/weights/best_efficientnet-b5.hdf5' #base_model.load_weights(checkpoints_load_name, by_name=True) else: BaseModel, preprocess_input = Classifiers.get(backbone_name) base_model = BaseModel(input_shape=(128, 128, 3), weights='imagenet', include_top=False) if freeze_backbone: for layer in base_model.layers[:-2]: layer.trainable = False else: for layer in base_model.layers: layer.trainable = True if group_norm: for i, layer in enumerate(base_model.layers): if "batch_normalization" in layer.name: base_model.layers[i] = GroupNormalization(groups=32, axis=-1, epsilon=0.00001)
def cluster(ref_path, test_path, pred_path, weights, network='triplet', ref_patch_num=5, visualize=False, sliding=True, imagenet=False, freeze_until=None, finetuning=False, subset=False): """Testing function. Args: network (str): String identifying the network architecture to use. weights (str): Path string to a .h5 weights file. """ if imagenet: finetuning = True prediction_net = setup_network(weights, network, sliding=sliding, imagenet=imagenet, freeze_until=freeze_until) patch_shape = (128, 128) patch_w = 128 patch_h = 128 annotations = dict() if network == 'triplet': siamese_net = prediction_net siamese_single_model = siamese_net.single_model classes = dict() class_names = [] # Parse reference patches and their corresponding classes: patch_dirs = os.listdir(ref_path) print("Reference patch directories: " + str(patch_dirs)) ref_dirs = dict() for path in patch_dirs: if os.path.isdir(os.path.join(ref_path, path)): class_id = path.split('_')[0] class_name = path.split('_')[1] class_names.append(path) classes[int(class_id)] = class_name ref_dirs[int(class_id)] = (class_name, os.path.join(ref_path, path)) test_patch_dirs = os.listdir(test_path) print("Test patch directories: " + str(test_patch_dirs)) test_dirs = dict() test_data = [] for path in test_patch_dirs: extended_path = os.path.join(path, 'patches') if os.path.isdir(os.path.join(test_path, extended_path)): class_id = path.split('_')[0] class_name = path.split('_')[1] if int(class_id) not in classes.keys(): continue test_dirs[int(class_id)] = (class_name, os.path.join(test_path, extended_path)) test_images = sorted( glob.glob( os.path.join(os.path.join(test_path, extended_path), '*.png'))) print("Number of images in class " + class_name + ": ", len(test_images)) if subset: print("Only keeping 10 images per class") test_images = random.sample(test_images, 10) for test_img in test_images: test_data.append((test_img, class_id)) class_count = len(patch_dirs) print("Class count: " + str(class_count)) # Make prediction folders: # datetime object containing current date and time now = datetime.now() # dd/mm/YY H:M:S dt_string = now.strftime("%Y-%m-%d_%H%M%S") pred_path = os.path.join(pred_path, 'RPN' + str(ref_patch_num) + '_' + dt_string) for class_id, class_name in classes.items(): os.makedirs(os.path.join(pred_path, str(class_id) + '_' + class_name)) ref_patches = dict() ref_patches_desc = dict() print("Loading reference patches + computing their descriptors") for class_id, val in ref_dirs.items(): class_name = val[0] path = val[1] patches = sorted(glob.glob(os.path.join(path, '*.png'))) patches = random.sample(patches, ref_patch_num) # patches = patches[:ref_patch_num] if len(patches) > 0: ref_patches[class_id] = patches ref_patches_desc[class_id] = [] for patch in patches: print('\t' + patch) ref_patch = cv2.imread(patch) ref_patch = cv2.resize(ref_patch, (128, 128)) # cv2.imshow("ref_patch" + "_" + class_name, ref_patch) # k2 = cv2.waitKey(0) cv2.imwrite(os.path.join(pred_path, os.path.basename(patch)), ref_patch) ref_patch_batched = np.zeros((1, 128, 128, 3)) if not finetuning: ref_patch_batched[0] = ref_patch / 255. else: ResNet18, preprocess_input = Classifiers.get('resnet18') ref_patch_batched[0] = preprocess_input(ref_patch) if network == 'triplet': ref_patches_desc[class_id].append( siamese_single_model.predict(ref_patch_batched)) cv2.destroyAllWindows() print("Done") pred_data = [] y_true = [] y_pred = [] for test_idx, (test_img_path, label_id) in enumerate(test_data): if test_idx % 25 == 0: print("Processed " + str(test_idx) + "/" + str(len(test_data))) label_id = int(label_id) test_patch = cv2.imread(test_img_path) test_patch = cv2.resize(test_patch, (128, 128)) test_patch_batched = np.zeros((1, 128, 128, 3)) if not finetuning: test_patch_batched[0] = test_patch / 255. else: ResNet18, preprocess_input = Classifiers.get('resnet18') test_patch_batched[0] = preprocess_input(test_patch) if network == 'triplet': test_patch_desc = siamese_single_model.predict(test_patch_batched) similarity = dict() similarity_mean = dict() for class_id, ref_descs in ref_patches_desc.items(): # print("Comparing against class ", class_id) similarity[class_id] = [] for idx, ref_desc in enumerate(ref_descs): anchor_vec = test_patch_desc[0].flatten() vec_a_norm = np.linalg.norm(anchor_vec) if vec_a_norm != 0: anchor_vec = anchor_vec / vec_a_norm ref_vec = ref_desc[0].flatten() vec_p_norm = np.linalg.norm(ref_vec) if vec_p_norm != 0: ref_vec = ref_vec / vec_p_norm dot_prod = np.dot(anchor_vec, ref_vec.T) eval_csim = cos_sim_pos(None, [anchor_vec, ref_vec], concat=False) # print(dot_prod) # print(np.array(np.abs(eval_csim))) similarity[class_id].append(dot_prod) # print("Summarize prediction for this image: ") highest = 0 best_match_class = 0 test_img_basename = os.path.basename(test_img_path) annotations['main_dir'] = pred_path annotations[test_img_basename] = dict() for class_id, dot_prods in similarity.items(): similarity_mean[class_id] = np.mean(np.array(dot_prods)) # print("Class " + str(class_id) + " mean dot prod: " + str(similarity_mean[class_id])) if similarity_mean[class_id] > highest: highest = similarity_mean[class_id] best_match_class = class_id # build annotation string annotations[test_img_basename][class_id] = similarity_mean[ class_id] annotations[test_img_basename]['best_match_class'] = ( best_match_class, classes[best_match_class]) # print("Best match predicted class label: " + str(best_match_class)) # print("Actual gt label: " + str(label_id)) output_img_path = os.path.join( pred_path, str(best_match_class) + "_" + classes[best_match_class], os.path.basename(test_img_path)) cv2.imwrite(output_img_path, test_patch) pred_data.append((test_img_path, best_match_class)) y_true.append(label_id) y_pred.append(best_match_class) with open( os.path.join( pred_path, "RPN" + str(ref_patch_num) + "_similarity_annotations.pkl"), "wb") as handle: pkl.dump(annotations, handle, protocol=pkl.HIGHEST_PROTOCOL) target_names = sorted(class_names) cm = confusion_matrix(y_true, y_pred, labels=sorted(list(classes.keys()))) print(classification_report(y_true, y_pred, target_names=target_names)) print("Confusion matrix: ") print(cm) return
def get_backbone(): model_bb, _ = Classifiers.get(name='mobilenetv2') model_bb = model_bb(input_shape=(SHAPE, SHAPE, RGB), include_top=False, weights=None) all_layers = model_bb.layers i = 1 l1_inputs = layers.Input((SHAPE, SHAPE, RGB)) l1 = tf.keras.applications.mobilenet_v2.preprocess_input(l1_inputs) while all_layers[i].name != 'block_1_pad': l1 = all_layers[i](l1) i += 1 l1 = Down(32)(l1) l1 = Model(inputs=l1_inputs, outputs=l1) add = [] l2_inputs = layers.Input((SHAPE // 2, SHAPE // 2, 32)) l2 = Up(96)(l2_inputs) while all_layers[i].name != 'block_2_add': l2 = all_layers[i](l2) if (all_layers[i].name == 'block_1_project_BN' or all_layers[i].name == 'block_2_project_BN'): add.append(l2) i += 1 l2 = all_layers[i](add) i += 1 while all_layers[i].name != 'block_3_pad': l2 = all_layers[i](l2) i += 1 l2 = Down(64)(l2) l2 = Model(inputs=l2_inputs, outputs=l2) add = [] l3_inputs = layers.Input((SHAPE // 4, SHAPE // 4, 64)) l3 = Up(144)(l3_inputs) while all_layers[i].name != 'block_4_add': l3 = all_layers[i](l3) if (all_layers[i].name == 'block_3_project_BN' or all_layers[i].name == 'block_4_project_BN'): add.append(l3) i += 1 l3 = all_layers[i](add) i += 1 add = [] while all_layers[i].name != 'block_5_add': l3 = all_layers[i](l3) if (all_layers[i].name == 'block_4_add' or all_layers[i].name == 'block_5_project_BN'): add.append(l3) i += 1 l3 = all_layers[i](add) i += 1 while all_layers[i].name != 'block_6_pad': l3 = all_layers[i](l3) i += 1 l3 = Down(128)(l3) l3 = Model(inputs=l3_inputs, outputs=l3) add = [] l4_inputs = layers.Input((SHAPE // 8, SHAPE // 8, 128)) l4 = Up(192)(l4_inputs) while all_layers[i].name != 'block_7_add': l4 = all_layers[i](l4) if (all_layers[i].name == 'block_6_project_BN' or all_layers[i].name == 'block_7_project_BN'): add.append(l4) i += 1 l4 = all_layers[i](add) i += 1 add = [] while all_layers[i].name != 'block_8_add': l4 = all_layers[i](l4) if (all_layers[i].name == 'block_7_add' or all_layers[i].name == 'block_8_project_BN'): add.append(l4) i += 1 l4 = all_layers[i](add) i += 1 add = [] while all_layers[i].name != 'block_9_add': l4 = all_layers[i](l4) if (all_layers[i].name == 'block_8_add' or all_layers[i].name == 'block_9_project_BN'): add.append(l4) i += 1 l4 = all_layers[i](add) i += 1 while all_layers[i].name != 'block_11_depthwise': l4 = all_layers[i](l4) i += 1 l4 = Down(256)(l4) l4 = Model(inputs=l4_inputs, outputs=l4) add = [] l5_inputs = layers.Input((SHAPE // 16, SHAPE // 16, 256)) l5 = Up(576)(l5_inputs) while all_layers[i].name != 'block_11_add': l5 = all_layers[i](l5) if (all_layers[i].name == 'block_10_project_BN' or all_layers[i].name == 'block_11_project_BN'): add.append(l5) i += 1 l5 = all_layers[i](add) i += 1 add = [] while all_layers[i].name != 'block_12_add': l5 = all_layers[i](l5) if (all_layers[i].name == 'block_11_add' or all_layers[i].name == 'block_12_project_BN'): add.append(l5) i += 1 l5 = all_layers[i](add) i += 1 add = [] while all_layers[i].name != 'block_14_add': l5 = all_layers[i](l5) if (all_layers[i].name == 'block_13_project_BN' or all_layers[i].name == 'block_14_project_BN'): add.append(l5) i += 1 l5 = all_layers[i](add) i += 1 add = [] while all_layers[i].name != 'block_15_add': l5 = all_layers[i](l5) if (all_layers[i].name == 'block_14_add' or all_layers[i].name == 'block_15_project_BN'): add.append(l5) i += 1 l5 = all_layers[i](add) i += 1 while all_layers[i].name != 'out_relu': l5 = all_layers[i](l5) i += 1 l5 = all_layers[i](l5) l5 = Down(512)(l5) l5 = Model(inputs=l5_inputs, outputs=l5) return l1, l2, l3, l4, l5
def test_preprocess_input(): from classification_models.tfkeras import Classifiers _, preprocess_input = Classifiers.get("resnet34") x = np.random.uniform(0, 1, size=(3, 32, 32, 3)) assert module.preprocess_input(x) == pytest.approx(preprocess_input(x))
def objective(trial): ResNet18, preprocess_input = Classifiers.get('resnet18') RESNET = ResNet18(include_top=False, weights='imagenet', input_shape=(image_height,image_width,3)) model = tf.keras.Sequential() # Projection doProjection = trial.suggest_categorical('projection', ['yes','no']) if doProjection == 'yes': model.add(Conv2D(3,(1,1),input_shape=(image_height,image_width,1),padding="same")) dropout_rate = trial.suggest_uniform('dropout_rate', 0.0, 0.5) # Resnet model.add(RESNET) model.add(GlobalAveragePooling2D()) model.add(Dropout(dropout_rate)) model.add(Dense(trial.suggest_int("num_neurons_1",1,512),Activation("relu"))) model.add(Dense(trial.suggest_int("num_neurons_2",1,512),Activation("relu"))) model.add(Dense(trial.suggest_int("num_neurons_3",1,512),Activation("relu"))) model.add(Dense(1)) optimize = keras.optimizers.Adam(learning_rate=learning_rate) model.compile(optimizer=optimize, loss='MSE', metrics=['mse'] ) # Data generators train_df = pandas.read_csv(train_path) validate_df = pandas.read_csv(validate_path) train_datagen = ImageDataGenerator( rescale=1./255, horizontal_flip=True, vertical_flip=True ) val_datagen = ImageDataGenerator( rescale=1./255, ) if model.input_shape[2] == 1: train_generator = train_datagen.flow_from_dataframe( dataframe=train_df, directory=image_dir, x_col="filename", y_col='label', target_size=(image_height, image_width), batch_size=batch_size, shuffle=True, class_mode="raw", color_mode="grayscale" ) val_generator = val_datagen.flow_from_dataframe( dataframe=validate_df, directory=image_dir, x_col="filename", y_col='label', target_size=(image_height, image_width), batch_size=batch_size, shuffle=True, class_mode="raw", color_mode="grayscale" ) else: train_generator = train_datagen.flow_from_dataframe( dataframe=train_df, directory=image_dir, x_col="filename", y_col='label', target_size=(image_height, image_width), batch_size=batch_size, shuffle=True, class_mode="raw", color_mode="rgb" ) val_generator = val_datagen.flow_from_dataframe( dataframe=validate_df, directory=image_dir, x_col="filename", y_col='label', target_size=(image_height, image_width), batch_size=batch_size, shuffle=True, class_mode="raw", color_mode="rgb" ) filepath=str(checkpointpath)+"model_"+str(modelName)+"_checkpoint-"+str(image_height)+"x"+str(image_width)+"-{epoch:03d}-{val_mse:.16f}.hdf5" RLR = keras.callbacks.ReduceLROnPlateau(monitor='val_mse', factor=0.5, patience=2, verbose=1, mode='min', min_delta=0.0001, cooldown=0) checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_mse', verbose=0, save_best_only=True, save_weights_only=False, mode='min') earlyStop = keras.callbacks.EarlyStopping(monitor='val_mse', mode='min', patience=10, restore_best_weights=True,verbose=1) callbacks_list = [checkpoint, RLR, earlyStop] model.summary() history = model.fit(train_generator,validation_data=val_generator,verbose=1 , epochs=numEpochs, steps_per_epoch=train_generator.n/train_generator.batch_size , callbacks=callbacks_list) val = history.history['val_mse'] os.system("rm /home/lasg/bachelor-data/checkpoints/*") return val[-1]
def get_backbone(input_shape, encodings_len=4096, backbone_name='simple', embeddings_normalization=True, backbone_weights='imagenet', freeze_backbone=False, **kwargs): if backbone_name == 'simple': input_image = Input(input_shape) x = Conv2D(64, (10, 10), activation='relu', kernel_regularizer=l2(2e-4))(input_image) x = MaxPool2D()(x) x = Conv2D(128, (7, 7), activation='relu', kernel_regularizer=l2(2e-4))(x) x = MaxPool2D()(x) x = Conv2D(128, (4, 4), activation='relu', kernel_regularizer=l2(2e-4))(x) x = MaxPool2D()(x) x = Conv2D(256, (4, 4), activation='relu', kernel_regularizer=l2(2e-4))(x) x = Flatten()(x) backbone_model = Model(inputs=[input_image], outputs=[x]) encoded_output = Dense(encodings_len, activation='relu', kernel_regularizer=l2(1e-3))(x) if embeddings_normalization: encoded_output = Lambda(lambda x: K.l2_normalize(x, axis=1), name='l2_norm')(encoded_output) base_model = Model(inputs=[input_image], outputs=[encoded_output]) elif backbone_name == 'simple2': input_image = Input(input_shape) x = Conv2D(32, kernel_size=3, activation='relu', kernel_regularizer=l2(2e-4))(input_image) x = BatchNormalization()(x) x = Conv2D(32, kernel_size=3, activation='relu', kernel_regularizer=l2(2e-4))(x) x = BatchNormalization()(x) x = Conv2D(32, kernel_size=5, strides=2, padding='same', activation='relu', kernel_regularizer=l2(2e-4))(x) x = BatchNormalization()(x) x = Dropout(0.4)(x) x = Conv2D(64, kernel_size=3, activation='relu', kernel_regularizer=l2(2e-4))(x) x = BatchNormalization()(x) x = Conv2D(64, kernel_size=3, activation='relu', kernel_regularizer=l2(2e-4))(x) x = BatchNormalization()(x) x = Conv2D(64, kernel_size=5, strides=2, padding='same', activation='relu', kernel_regularizer=l2(2e-4))(x) x = BatchNormalization()(x) x = Dropout(0.4)(x) x = Conv2D(128, kernel_size=4, activation='relu', kernel_regularizer=l2(2e-4))(x) x = BatchNormalization()(x) backbone_model = Model(inputs=[input_image], outputs=[x]) x = Flatten()(x) x = Dense(512, activation="relu")(x) x = Dropout(0.5)(x) encoded_output = Dense(encodings_len, activation='relu', kernel_regularizer=l2(1e-3))(x) if embeddings_normalization: encoded_output = Lambda(lambda x: K.l2_normalize(x, axis=1), name='l2_norm')(encoded_output) base_model = Model(inputs=[input_image], outputs=[encoded_output]) else: if backbone_name.startswith('efficientnet'): import efficientnet.tfkeras as efn efficientnet_models = { 'efficientnet-b0': efn.EfficientNetB0, 'efficientnet-b1': efn.EfficientNetB1, 'efficientnet-b2': efn.EfficientNetB2, 'efficientnet-b3': efn.EfficientNetB3, 'efficientnet-b4': efn.EfficientNetB4, 'efficientnet-b5': efn.EfficientNetB5, 'efficientnet-b6': efn.EfficientNetB6, 'efficientnet-b7': efn.EfficientNetB7, } Efficientnet_model = efficientnet_models[backbone_name] backbone_model = Efficientnet_model(input_shape=input_shape, weights=backbone_weights, include_top=False) else: from classification_models.tfkeras import Classifiers classifier, preprocess_input = Classifiers.get(backbone_name) backbone_model = classifier(input_shape=input_shape, weights=backbone_weights, include_top=False) if freeze_backbone: for layer in backbone_model.layers[:-2]: layer.trainable = False after_backbone = backbone_model.output x = GlobalAveragePooling2D()(after_backbone) # x = Flatten()(after_backbone) x = Dense(encodings_len // 2, activation="relu")(x) encoded_output = Dense(encodings_len, activation="relu")(x) if embeddings_normalization: encoded_output = Lambda(lambda x: K.l2_normalize(x, axis=1), name='l2_norm')(encoded_output) base_model = Model(inputs=[backbone_model.input], outputs=[encoded_output]) # base_model._make_predict_function() return base_model, backbone_model
def Lipreading(mode, inputDim=256, hiddenDim=512, nClasses=500, frameLen=29, absolute_max_string_len=128, every_frame=True, pretrain=None): frontend3D = Sequential([ ZeroPadding3D(padding=(2, 3, 3)), Conv3D(64, kernel_size=(5, 7, 7), strides=(1, 2, 2), padding='valid', use_bias=False), BatchNormalization(), #Mish('Mish'), Activation('relu'), ZeroPadding3D(padding=((0, 4, 8))), MaxPooling3D(pool_size=(1, 2, 3), strides=(1, 1, 2)) ]) backend_conv1 = Sequential([ Conv1D(2 * inputDim, 5, strides=2, use_bias=False), BatchNormalization(), #Mish('Mish'), Activation('relu'), MaxPooling1D(2, 2), Conv1D(4 * inputDim, 5, strides=2, use_bias=False), BatchNormalization(), #Mish('Mish'), Activation('relu'), ]) backend_conv2 = Sequential([ Dense(inputDim), BatchNormalization(), #Mish('Mish'), Activation('relu'), Dense(nClasses) ]) nLayers = 2 # Forward pass input_frames = Input(shape=(frameLen, 50, 100, 1), name='frames_input') x = frontend3D(input_frames) print('3D Conv Out:', x.shape) #x = Lambda(lambda x : tf.transpose(x, [0, 2, 1, 3, 4]), name='lambda1')(x) #x.transpose(1, 2) tf.tens #print('3D Conv Out Transp:', x.shape) x = Lambda(lambda x: tf.reshape(x, [ -1, int(x.shape[2]), int(x.shape[3]), int(x.shape[4]) ]), name='lambda2')(x) #x.view(-1, 64, x.size(3), x.size(4)) print('3D Conv Out Reshape:', x.shape) channels = int(x.shape[-1]) #resnet18 = ResNet18((None, None, channels), weights=None, include_top=False) ResNet18, preprocess_input = Classifiers.get('resnet18') resnet18 = ResNet18((None, None, channels), weights=None, include_top=False) x = resnet18(x) print('Resnet18 Out:', x.shape) x = GlobalAveragePooling2D(name='global_avgpool_resnet')(x) x = Dense(inputDim, name='dense_resnet')(x) x = BatchNormalization(name='bn_resnet')(x) print('Resnet18 Linear Out:', x.shape) if mode == 'temporalConv': x = Lambda(lambda x: tf.reshape(x, [-1, frameLen, inputDim]), name='lambda3')(x) #x.view(-1, frameLen, inputDim) x = Lambda(lambda x: tf.transpose(x, [0, 2, 1]), name='lambda4')(x) #x.transpose(1, 2) x = backend_conv1(x) x = Lambda(lambda x: tf.reduce_mean(x, 2), name='lambda5')(x) x = backend_conv2(x) #print(x.shape) elif mode == 'backendGRU' or mode == 'finetuneGRU': x = Lambda(lambda x: tf.reshape(x, [-1, frameLen, inputDim]), name='lambda6')(x) #x.view(-1, frameLen, inputDim) print('Input to GRU:', x.shape) x = GRU(x, inputDim, hiddenDim, nLayers, nClasses, every_frame) print('GRU Out:', x.shape) else: raise Exception('No model is selected') model = Model(inputs=input_frames, outputs=x) if pretrain == True: model.load_weights( '/data/models/combResnetLSTM_CTCloss_236k-train_1to3ratio_valWER_epochs9to20_lr1e-5_0.1decay9epochs/weights-04-109.0513.hdf5' ) print('ResNet LSTM Pretrain weights loaded') return model