def detect(self, images, verbose=0): """Runs the detection pipeline. images: List of images, potentially of different sizes. Returns a list of dicts, one dict per image. The dict contains: rois: [N, (y1, x1, y2, x2)] detection bounding boxes class_ids: [N] int class IDs scores: [N] float probability scores for the class IDs masks: [H, W, N] instance binary masks """ assert self.mode == "inference", "Create model in inference mode." assert len(images) == self.config.BATCH_SIZE,\ "len(images) must be equal to BATCH_SIZE" if verbose: utils.log("Processing {} images".format(len(images))) for image in images: utils.log("image", image) # Mold inputs to format expected by the neural network molded_images, image_metas, windows = self.mold_inputs(images) # Validate image sizes # All images in a batch MUST be of the same size image_shape = molded_images[0].shape for g in molded_images[1:]: assert g.shape == image_shape,\ ("After resizing, all images must have the same size." "Check IMAGE_RESIZE_MODE and image sizes.") # Anchors anchors = self.get_anchors(image_shape) # Duplicate across the batch dimension because Keras requires it # TODO: can this be optimized to avoid duplicating the anchors? anchors = np.broadcast_to(anchors, (self.config.BATCH_SIZE, ) + anchors.shape) if verbose: utils.log("molded_images", molded_images) utils.log("image_metas", image_metas) utils.log("anchors", anchors) # Run object detection detections, _, _, mrcnn_mask, _, _, _ =\ self.keras_model.predict([molded_images, image_metas, anchors], verbose=0) # Process detections results = [] for i, image in enumerate(images): final_rois, final_class_ids, final_scores, final_masks =\ self.unmold_detections(detections[i], mrcnn_mask[i], image.shape, molded_images[i].shape, windows[i]) results.append({ "rois": final_rois, "class_ids": final_class_ids, "scores": final_scores, "masks": final_masks, }) return results
def detect_molded(self, molded_images, image_metas, verbose=0): """Runs the detection pipeline, but expect inputs that are molded already. Used mostly for debugging and inspecting the model. molded_images: List of images loaded using load_image_gt() image_metas: image meta data, also retruned by load_image_gt() Returns a list of dicts, one dict per image. The dict contains: rois: [N, (y1, x1, y2, x2)] detection bounding boxes class_ids: [N] int class IDs scores: [N] float probability scores for the class IDs masks: [H, W, N] instance binary masks """ assert self.mode == "inference", "Create model in inference mode." assert len(molded_images) == self.config.BATCH_SIZE,\ "Number of images must be equal to BATCH_SIZE" if verbose: utils.log("Processing {} images".format(len(molded_images))) for image in molded_images: utils.log("image", image) # Validate image sizes # All images in a batch MUST be of the same size image_shape = molded_images[0].shape for g in molded_images[1:]: assert g.shape == image_shape, "Images must have the same size" # Anchors anchors = self.get_anchors(image_shape) # Duplicate across the batch dimension because Keras requires it # TODO: can this be optimized to avoid duplicating the anchors? anchors = np.broadcast_to(anchors, (self.config.BATCH_SIZE, ) + anchors.shape) if verbose: utils.log("molded_images", molded_images) utils.log("image_metas", image_metas) utils.log("anchors", anchors) # Run object detection detections, _, _, mrcnn_mask, _, _, _ =\ self.keras_model.predict([molded_images, image_metas, anchors], verbose=0) # Process detections results = [] for i, image in enumerate(molded_images): window = [0, 0, image.shape[0], image.shape[1]] final_rois, final_class_ids, final_scores, final_masks =\ self.unmold_detections(detections[i], mrcnn_mask[i], image.shape, molded_images[i].shape, window) results.append({ "rois": final_rois, "class_ids": final_class_ids, "scores": final_scores, "masks": final_masks, }) return results
def run_graph(self, images, outputs, image_metas=None): """Runs a sub-set of the computation graph that computes the given outputs. image_metas: If provided, the images are assumed to be already molded (i.e. resized, padded, and noramlized) outputs: List of tuples (name, tensor) to compute. The tensors are symbolic TensorFlow tensors and the names are for easy tracking. Returns an ordered dict of results. Keys are the names received in the input and values are Numpy arrays. """ model = self.keras_model # Organize desired outputs into an ordered dict outputs = OrderedDict(outputs) for o in outputs.values(): assert o is not None # Build a Keras function to run parts of the computation graph inputs = model.inputs if model.uses_learning_phase and not isinstance( K.learning_phase(), int): inputs += [K.learning_phase()] kf = K.function(model.inputs, list(outputs.values())) # Prepare inputs if image_metas is None: molded_images, image_metas, _ = self.mold_inputs(images) else: molded_images = images image_shape = molded_images[0].shape # Anchors anchors = self.get_anchors(image_shape) # Duplicate across the batch dimension because Keras requires it # TODO: can this be optimized to avoid duplicating the anchors? anchors = np.broadcast_to(anchors, (self.config.BATCH_SIZE, ) + anchors.shape) model_in = [molded_images, image_metas, anchors] # Run inference if model.uses_learning_phase and not isinstance( K.learning_phase(), int): model_in.append(0.) outputs_np = kf(model_in) # Pack the generated Numpy arrays into a a dict and log the results. outputs_np = OrderedDict([(k, v) for k, v in zip(outputs.keys(), outputs_np)]) for k, v in outputs_np.items(): utils.log(k, v) return outputs_np
def set_trainable(self, layer_regex, keras_model=None, indent=0, verbose=1): """Sets model layers as trainable if their names match the given regular expression. """ # Print message on the first call (but not on recursive calls) if verbose > 0 and keras_model is None: utils.log("Selecting layers to train") keras_model = keras_model or self.keras_model # In multi-GPU training, we wrap the model. Get layers # of the inner model because they have the weights. layers = keras_model.inner_model.layers\ if hasattr(keras_model, "inner_model") else keras_model.layers for layer in layers: # Is the layer a model? if layer.__class__.__name__ == 'Model': print("In model: ", layer.name) self.set_trainable(layer_regex, keras_model=layer, indent=indent + 4) continue if not layer.weights: continue # Is it trainable? trainable = bool(re.fullmatch(layer_regex, layer.name)) # Update layer. If layer is a container, update inner layer. if layer.__class__.__name__ == 'TimeDistributed': layer.layer.trainable = trainable else: layer.trainable = trainable # Print trainble layer names if trainable and verbose > 0: utils.log("{}{:20} ({})".format(" " * indent, layer.name, layer.__class__.__name__))
def train(self, train_dataset, val_dataset, learning_rate, epochs, layers, augmentation=None): """Train the model. train_dataset, val_dataset: Training and validation Dataset objects. learning_rate: The learning rate to train with epochs: Number of training epochs. Note that previous training epochs are considered to be done alreay, so this actually determines the epochs to train in total rather than in this particaular call. layers: Allows selecting wich layers to train. It can be: - A regular expression to match layer names to train - One of these predefined values: heaads: The RPN, classifier and mask heads of the network all: All the layers 3+: Train Resnet stage 3 and up 4+: Train Resnet stage 4 and up 5+: Train Resnet stage 5 and up augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation. For example, passing imgaug.augmenters.Fliplr(0.5) flips images right/left 50% of the time. You can pass complex augmentations as well. This augmentation applies 50% of the time, and when it does it flips images right/left half the time and adds a Gausssian blur with a random sigma in range 0 to 5. augmentation = imgaug.augmenters.Sometimes(0.5, [ imgaug.augmenters.Fliplr(0.5), imgaug.augmenters.GaussianBlur(sigma=(0.0, 5.0)) ]) """ assert self.mode == "training", "Create model in training mode." # Pre-defined layer regular expressions layer_regex = { # all layers but the backbone "heads": r"(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", # From a specific Resnet stage and up "3+": r"(res3.*)|(bn3.*)|(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", "4+": r"(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", "5+": r"(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", # All layers "all": ".*", } if layers in layer_regex.keys(): layers = layer_regex[layers] # Data generators train_generator = data.data_generator( train_dataset, self.config, shuffle=True, augmentation=augmentation, batch_size=self.config.BATCH_SIZE) val_generator = data.data_generator(val_dataset, self.config, shuffle=True, batch_size=self.config.BATCH_SIZE) # Callbacks callbacks = [ keras.callbacks.TensorBoard(log_dir=self.log_dir, histogram_freq=0, write_graph=True, write_images=False), keras.callbacks.ModelCheckpoint(self.checkpoint_path, verbose=0, save_weights_only=True), ] # Train utils.log("\nStarting at epoch {}. LR={}\n".format( self.epoch, learning_rate)) utils.log("Checkpoint Path: {}".format(self.checkpoint_path)) self.set_trainable(layers) self.compile(learning_rate, self.config.LEARNING_MOMENTUM) # Work-around for Windows: Keras fails on Windows when using # multiprocessing workers. See discussion here: # https://github.com/matterport/Mask_RCNN/issues/13#issuecomment-353124009 if os.name is 'nt': workers = 0 else: workers = multiprocessing.cpu_count() self.keras_model.fit_generator( train_generator, initial_epoch=self.epoch, epochs=epochs, steps_per_epoch=self.config.STEPS_PER_EPOCH, callbacks=callbacks, validation_data=val_generator, validation_steps=self.config.VALIDATION_STEPS, max_queue_size=100, workers=workers, use_multiprocessing=True, ) self.epoch = max(self.epoch, epochs)