def __init__( self, phi=0, image_sizes=(512, 640, 768, 896, 1024, 1280, 1408), misc_effect=None, visual_effect=None, batch_size=1, group_method='ratio', # one of 'none', 'random', 'ratio' shuffle_groups=True, ): """ Initialize Generator object. Args: batch_size: The size of the batches to generate. group_method: Determines how images are grouped together (defaults to 'ratio', one of ('none', 'random', 'ratio')). shuffle_groups: If True, shuffles the groups each epoch. image_sizes: """ self.misc_effect = misc_effect self.visual_effect = visual_effect self.batch_size = int(batch_size) self.group_method = group_method self.shuffle_groups = shuffle_groups self.image_size = image_sizes[phi] self.groups = None self.anchors = anchors_for_shape((self.image_size, self.image_size)) self.current_index = 0 # Define groups self.group_images() # Shuffle when initializing if self.shuffle_groups: random.shuffle(self.groups)
def generate_anchors(self, image_shape): anchor_params = None if self.config and 'anchor_parameters' in self.config: anchor_params = parse_anchor_parameters(self.config) return anchors_for_shape(image_shape, anchor_params=anchor_params, shapes_callback=self.compute_shapes)
def main(): os.environ['CUDA_VISIBLE_DEVICES'] = '0' phi = 1 weighted_bifpn = False model_path = 'checkpoints/2019-12-03/pascal_05_0.6283_1.1975_0.8029.h5' image_sizes = (512, 640, 768, 896, 1024, 1280, 1408) image_size = image_sizes[phi] classes = [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor', ] num_classes = len(classes) score_threshold = 0.5 colors = [np.random.randint(0, 256, 3).tolist() for i in range(num_classes)] model, prediction_model = efficientdet(phi=phi, weighted_bifpn=weighted_bifpn, num_classes=num_classes, score_threshold=score_threshold) prediction_model.load_weights(model_path, by_name=True) video_path = 'datasets/video.mp4' cap = cv2.VideoCapture(video_path) while True: ret, frame = cap.read() if not ret: break h, w = frame.shape[:2] image, scale, offset_h, offset_w = preprocess_image(frame, image_size=image_size) anchors = anchors_for_shape((image_size, image_size)) boxes_batch, scores_batch, labels_batch = prediction_model.predict_on_batch([np.expand_dims(image, axis=0), np.expand_dims(anchors, axis=0)]) for i, (boxes, scores, labels) in enumerate(zip(boxes_batch, scores_batch, labels_batch)): boxes = post_process_boxes(boxes=boxes, scale=scale, offset_h=offset_h, offset_w=offset_w, height=h, width=w) indices = np.where(scores[:] > score_threshold)[0] boxes = boxes[indices] labels = labels[indices] draw_boxes(frame, boxes, scores, labels, colors, classes) cv2.imshow('image', frame) cv2.waitKey(1)
def efficientdet(phi, num_classes=20, num_anchors=9, weighted_bifpn=False, freeze_bn=False, score_threshold=0.01, detect_quadrangle=False, anchor_parameters=None, separable_conv=True): assert phi in range(7) input_size = image_sizes[phi] input_shape = (input_size, input_size, 3) image_input = layers.Input(input_shape) w_bifpn = w_bifpns[phi] d_bifpn = d_bifpns[phi] w_head = w_bifpn d_head = d_heads[phi] backbone_cls = backbones[phi] features = backbone_cls(input_tensor=image_input, freeze_bn=freeze_bn) if weighted_bifpn: fpn_features = features for i in range(d_bifpn): fpn_features = build_wBiFPN(fpn_features, w_bifpn, i, freeze_bn=freeze_bn) else: fpn_features = features for i in range(d_bifpn): fpn_features = build_BiFPN(fpn_features, w_bifpn, i, freeze_bn=freeze_bn) box_net = BoxNet(w_head, d_head, num_anchors=num_anchors, separable_conv=separable_conv, freeze_bn=freeze_bn, detect_quadrangle=detect_quadrangle, name='box_net') class_net = ClassNet(w_head, d_head, num_classes=num_classes, num_anchors=num_anchors, separable_conv=separable_conv, freeze_bn=freeze_bn, name='class_net') classification = [class_net([feature, i]) for i, feature in enumerate(fpn_features)] classification = layers.Concatenate(axis=1, name='classification')(classification) regression = [box_net([feature, i]) for i, feature in enumerate(fpn_features)] regression = layers.Concatenate(axis=1, name='regression')(regression) model = models.Model(inputs=[image_input], outputs=[classification, regression], name='efficientdet') # apply predicted regression to anchors anchors = anchors_for_shape((input_size, input_size), anchor_params=anchor_parameters) anchors_input = np.expand_dims(anchors, axis=0) boxes = RegressBoxes(name='boxes')([anchors_input, regression[..., :4]]) boxes = ClipBoxes(name='clipped_boxes')([image_input, boxes]) # filter detections (apply NMS / score threshold / select top-k) if detect_quadrangle: detections = FilterDetections( name='filtered_detections', score_threshold=score_threshold, detect_quadrangle=True )([boxes, classification, regression[..., 4:8], regression[..., 8]]) else: detections = FilterDetections( name='filtered_detections', score_threshold=score_threshold )([boxes, classification]) prediction_model = models.Model(inputs=[image_input], outputs=detections, name='efficientdet_p') return model, prediction_model
def detect_image_value(image): image = image[:, :, ::-1] h, w = image.shape[:2] # resize the image into input size image, scale, offset_h, offset_w = preprocess_image(image, image_size=image_size) # add batch dimension inputs = np.expand_dims(image, axis=0) anchors = anchors_for_shape((image_size, image_size)) # run network start = timer() boxes, scores, labels = prediction_model.predict_on_batch( [np.expand_dims(image, axis=0), np.expand_dims(anchors, axis=0)]) print(timer() - start) boxes[0, :, [0, 2]] = boxes[0, :, [0, 2]] - offset_w boxes[0, :, [1, 3]] = boxes[0, :, [1, 3]] - offset_h boxes /= scale boxes[0, :, 0] = np.clip(boxes[0, :, 0], 0, w - 1) boxes[0, :, 1] = np.clip(boxes[0, :, 1], 0, h - 1) boxes[0, :, 2] = np.clip(boxes[0, :, 2], 0, w - 1) boxes[0, :, 3] = np.clip(boxes[0, :, 3], 0, h - 1) # select indices which have a score above the threshold indices = np.where(scores[0, :] > score_threshold)[0] # select those detections boxes = boxes[0, indices] scores = scores[0, indices] labels = labels[0, indices] real_boxes = [] real_classes = [] real_scores = [] for box, score, label in zip(boxes, scores, labels): xmin = int(round(box[0])) ymin = int(round(box[1])) xmax = int(round(box[2])) ymax = int(round(box[3])) score = '{:.4f}'.format(score) class_id = int(label) #color = colors[class_id] #class_name = classes[class_id] #real_label = '-'.join([class_name, score]) real_boxes.append((ymin, xmin, ymax, xmax)) real_classes.append(class_id) real_scores.append(score) return boxes, scores, labels
def run(generator, args): """ Main loop in which data is provided by the generator and then displayed Args: generator: The generator to debug. args: parseargs args object. """ while True: # display images, one at a time for i in range(generator.size()): # load the data image = generator.load_image(i) annotations = generator.load_annotations(i) mask = generator.load_mask(i) camera_matrix = generator.load_camera_matrix(i) if len(annotations['labels']) > 0: # apply random transformations image, annotations = generator.random_transform_group_entry( image, annotations, mask, camera_matrix) anchors = anchors_for_shape(image.shape, anchor_params=None) positive_indices, _, max_indices = compute_gt_annotations( anchors[0], annotations['bboxes']) #switch image RGB to BGR again image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # draw anchors on the image if args.anchors: draw_boxes(image, anchors[0][positive_indices], (255, 255, 0), thickness=1) # draw annotations on the image if args.annotations: draw_annotations( image, annotations, class_to_bbox_3D=generator.get_bbox_3d_dict(), camera_matrix=camera_matrix, label_to_name=generator.label_to_name, draw_bbox_2d=args.draw_2d_bboxes, draw_name=args.draw_class_names) print("Generator idx: {}".format(i)) cv2.imshow('Image', image) if cv2.waitKey() == ord('q'): cv2.destroyAllWindows() return
def __init__( self, phi=1, image_sizes=(512, 640, 768, 896, 1024, 1280, 1408), misc_effect=None, visual_effect=None, batch_size=1, group_method="random", # one of 'none', 'random', 'ratio' shuffle_groups=True, detect_text=False, detect_quadrangle=False, ): """ Initialize Generator object. Args: batch_size: The size of the batches to generate. group_method: Determines how images are grouped together (defaults to 'ratio', one of ('none', 'random', 'ratio')). shuffle_groups: If True, shuffles the groups each epoch. image_sizes: """ self.misc_effect = misc_effect self.visual_effect = visual_effect self.batch_size = int(batch_size) self.group_method = group_method self.shuffle_groups = shuffle_groups self.detect_text = detect_text self.detect_quadrangle = detect_quadrangle self.image_size = image_sizes[phi] self.groups = None self.anchor_parameters = (AnchorParameters.default if not self.detect_text else AnchorParameters( ratios=(0.25, 0.5, 1.0, 2.0), sizes=(16, 32, 64, 128, 256))) self.anchors = anchors_for_shape((self.image_size, self.image_size), anchor_params=self.anchor_parameters) self.num_anchors = self.anchor_parameters.num_anchors() # Define groups self.group_images() # Shuffle when initializing if self.shuffle_groups: random.shuffle(self.groups)
def run(generator, args, anchor_params): """!@brief Main loop. @param generator : The generator to debug. @param args : Parseargs args object. """ # display images, one at a time for i in range(generator.size()): # load the data image = generator.load_image(i) annotations = generator.load_annotations(i) # Apply random transformations # if args.random_transform or args.random_deformable or args.random_photometric:# or args.random_psf_blur: if True: image, annotations = generator.random_transform_group_entry(image, annotations) # resize the image and annotations if args.resize: image, image_scale = generator.resize_image(image) annotations['bboxes'] *= image_scale anchors = anchors_for_shape(image.shape, anchor_params=anchor_params) positive_indices, _, max_indices = compute_gt_annotations(anchors, annotations['bboxes']) # draw anchors on the image if args.anchors: draw_boxes(image, anchors[positive_indices], (255, 255, 0), thickness=1) # draw annotations on the image if args.annotations: # draw annotations in red draw_annotations(image, annotations, color=(0, 0, 255), label_to_name=generator.label_to_name) # draw regressed anchors in green to override most red annotations # result is that annotations without anchors are red, with anchors are green draw_boxes(image, annotations['bboxes'][max_indices[positive_indices], :], (0, 255, 0)) cv2.imshow('Image', image) if cv2.waitKey() == ord('q'): return False return True
def efficientdet(phi, num_classes=20, num_anchors=9, weighted_bifpn=False, freeze_bn=False, score_threshold=0.01, detect_quadrangle=False, anchor_parameters=None): assert phi in range(7) input_size = image_sizes[phi] input_shape = (input_size, input_size, 3) # input_shape = (None, None, 3) image_input = layers.Input(input_shape) w_bifpn = w_bifpns[phi] d_bifpn = 2 + phi w_head = w_bifpn d_head = 3 + int(phi / 3) backbone_cls = backbones[phi] # features = backbone_cls(include_top=False, input_shape=input_shape, weights=weights)(image_input) features = backbone_cls(input_tensor=image_input, freeze_bn=freeze_bn) if weighted_bifpn: for i in range(d_bifpn): features = build_wBiFPN(features, w_bifpn, i, freeze_bn=freeze_bn) else: for i in range(d_bifpn): features = build_BiFPN(features, w_bifpn, i, freeze_bn=freeze_bn) regress_head = build_regress_head(w_head, d_head, num_anchors=num_anchors, detect_quadrangle=detect_quadrangle) class_head = build_class_head(w_head, d_head, num_classes=num_classes, num_anchors=num_anchors) regression = [regress_head(feature) for feature in features] regression = layers.Concatenate(axis=1, name='regression')(regression) classification = [class_head(feature) for feature in features] classification = layers.Concatenate(axis=1, name='classification')(classification) model = models.Model(inputs=[image_input], outputs=[regression, classification], name='efficientdet') # apply predicted regression to anchors # anchors_input = layers.Input((None, 4)) anchors = anchors_for_shape((input_size, input_size), anchor_params=anchor_parameters) anchors_input = np.expand_dims(anchors, axis=0) boxes = RegressBoxes(name='boxes')([anchors_input, regression[..., :4]]) boxes = ClipBoxes(name='clipped_boxes')([image_input, boxes]) # filter detections (apply NMS / score threshold / select top-k) if detect_quadrangle: detections = FilterDetections(name='filtered_detections', score_threshold=score_threshold, detect_quadrangle=True)([ boxes, classification, regression[..., 4:8], regression[..., 8] ]) else: detections = FilterDetections(name='filtered_detections', score_threshold=score_threshold)( [boxes, classification]) prediction_model = models.Model(inputs=[image_input], outputs=detections, name='efficientdet_p') return model, prediction_model
detect_quadrangle=True, anchor_parameters=anchor_parameters, ) prediction_model.load_weights(model_path, by_name=True) import glob for image_path in glob.glob('datasets/ic15/test_images/*.jpg'): image = cv2.imread(image_path) src_image = image.copy() image = image[:, :, ::-1] h, w = image.shape[:2] image, scale, offset_h, offset_w = preprocess_image(image, image_size=image_size) inputs = np.expand_dims(image, axis=0) anchors = anchors_for_shape((image_size, image_size), anchor_params=anchor_parameters) # run network start = time.time() boxes, scores, alphas, ratios, labels = prediction_model.predict_on_batch([np.expand_dims(image, axis=0), np.expand_dims(anchors, axis=0)]) # alphas = np.exp(alphas) alphas = 1 / (1 + np.exp(-alphas)) ratios = 1 / (1 + np.exp(-ratios)) quadrangles = np.zeros(boxes.shape[:2] + (8,)) quadrangles[:, :, 0] = boxes[:, :, 0] + (boxes[:, :, 2] - boxes[:, :, 0]) * alphas[:, :, 0] quadrangles[:, :, 1] = boxes[:, :, 1] quadrangles[:, :, 2] = boxes[:, :, 2] quadrangles[:, :, 3] = boxes[:, :, 1] + (boxes[:, :, 3] - boxes[:, :, 1]) * alphas[:, :, 1] quadrangles[:, :, 4] = boxes[:, :, 2] - (boxes[:, :, 2] - boxes[:, :, 0]) * alphas[:, :, 2] quadrangles[:, :, 5] = boxes[:, :, 3] quadrangles[:, :, 6] = boxes[:, :, 0]
def __init__( self, phi=0, image_sizes=(512, 640, 768, 896, 1024, 1280, 1408), train=True, use_colorspace_augmentation=False, use_6DoF_augmentation=False, scale_6DoF_augmentation=(0.7, 1.3), chance_no_augmentation=0.02, translation_scale_norm=1000.0, points_for_shape_match_loss=500, batch_size=1, rotation_representation="axis_angle", group_method='random', # one of 'none', 'random', 'ratio' shuffle_groups=True, ): """ Initialize Generator object. Args: phi: EfficientPose scaling hyperparameter phi image_sizes: Tuple of different input image resolutions for every phi train: Boolean indicating wheter the generator loads training data or not use_colorspace_augmentation: Boolean indicating wheter to use augmentation in the color space or not use_6DoF_augmentation: Boolean indicating wheter to use 6D augmentation or not chance_no_augmentation: Probability to skip augmentation for an image translation_scale_norm: factor to change units. EfficientPose internally works with meter and if your dataset unit is mm for example, then you need to set this parameter to 1000 points_for_shape_match_loss: Number of the objects 3D model points that are used in the loss function batch_size: The size of the batches to generate. rotation_representation: String which representation of rotation should be used. Currently only axis_angle is supported group_method: Determines how images are grouped together (defaults to 'ratio', one of ('none', 'random', 'ratio')). shuffle_groups: If True, shuffles the groups each epoch. """ self.batch_size = int(batch_size) self.group_method = group_method self.shuffle_groups = shuffle_groups self.image_size = image_sizes[phi] self.groups = None self.anchor_parameters = AnchorParameters.default self.anchors, self.translation_anchors = anchors_for_shape( (self.image_size, self.image_size), anchor_params=self.anchor_parameters) self.num_anchors = self.anchor_parameters.num_anchors() self.train = train self.use_colorspace_augmentation = use_colorspace_augmentation self.use_6DoF_augmentation = use_6DoF_augmentation self.chance_no_augmentation = chance_no_augmentation self.translation_scale_norm = translation_scale_norm self.points_for_shape_match_loss = points_for_shape_match_loss self.scale_6DoF_augmentation = scale_6DoF_augmentation if self.use_colorspace_augmentation: self.rand_aug = RandAugment(n=(1, 3), m=(1, 14)) else: self.rand_aug = None # Define groups self.group_images() # Shuffle when initializing if self.shuffle_groups: random.shuffle(self.groups) self.all_3d_model_points_array_for_loss = self.create_all_3d_model_points_array_for_loss( self.class_to_model_3d_points, self.points_for_shape_match_loss)
def generate_anchors(self, image_shape): return anchors_for_shape(image_shape, shapes_callback=self.compute_shapes)
def main(): phi = 1 model_path = 'checkpoints/2019-12-03/pascal_05.pb' image_sizes = (512, 640, 768, 896, 1024, 1280, 1408) image_size = image_sizes[phi] classes = [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor', ] num_classes = len(classes) score_threshold = 0.5 colors = [ np.random.randint(0, 256, 3).tolist() for i in range(num_classes) ] output_names = { 'output_boxes': 'filtered_detections/map/TensorArrayStack/TensorArrayGatherV3:0', 'output_scores': 'filtered_detections/map/TensorArrayStack_1/TensorArrayGatherV3:0', 'output_labels': 'filtered_detections/map/TensorArrayStack_2/TensorArrayGatherV3:0' } graph = tf.Graph() graph.as_default() sess = tf.Session() graph = get_frozen_graph(model_path) tf.import_graph_def(graph, name='') output_boxes = sess.graph.get_tensor_by_name(output_names["output_boxes"]) output_scores = sess.graph.get_tensor_by_name( output_names['output_scores']) output_labels = sess.graph.get_tensor_by_name( output_names['output_labels']) image_path = 'datasets/VOC2007/JPEGImages/000002.jpg' image = cv2.imread(image_path) src_image = image.copy() image = image[:, :, ::-1] h, w = image.shape[:2] image, scale = preprocess_image(image, image_size=image_size) anchors = anchors_for_shape((image_size, image_size)) # run network start = time.time() image_batch = np.expand_dims(image, axis=0) anchors_batch = np.expand_dims(anchors, axis=0) feed_dict = {"input_1:0": image_batch, "input_4:0": anchors_batch} boxes, scores, labels = sess.run( [output_boxes, output_scores, output_labels], feed_dict) boxes, scores, labels = np.squeeze(boxes), np.squeeze(scores), np.squeeze( labels) print(time.time() - start) boxes = post_process_boxes(boxes=boxes, scale=scale, offset_h=offset_h, offset_w=offset_w, height=h, width=w) # select indices which have a score above the threshold indices = np.where(scores[:] > score_threshold)[0] # select those detections boxes = boxes[indices] labels = labels[indices] draw_boxes(src_image, boxes, scores, labels, colors, classes) cv2.namedWindow('image', cv2.WINDOW_NORMAL) cv2.imshow('image', src_image) cv2.waitKey(0)
def apply_subnets_to_feature_maps(box_net, class_net, rotation_net, translation_net, fpn_feature_maps, image_input, camera_parameters_input, input_size, anchor_parameters): """ Applies the subnetworks to the BiFPN feature maps Args: box_net, class_net, rotation_net, translation_net: Subnetworks fpn_feature_maps: Sequence of the BiFPN feature maps of the different levels (P3, P4, P5, P6, P7) image_input, camera_parameters_input: The image and camera parameter input layer input size: Integer representing the input image resolution anchor_parameters: Struct containing anchor parameters. If None, default values are used. Returns: classification: Tensor containing the classification outputs for all anchor boxes. Shape (batch_size, num_anchor_boxes, num_classes) bbox_regression: Tensor containing the deltas of anchor boxes to the GT 2D bounding boxes for all anchor boxes. Shape (batch_size, num_anchor_boxes, 4) rotation: Tensor containing the rotation outputs for all anchor boxes. Shape (batch_size, num_anchor_boxes, num_rotation_parameters) translation: Tensor containing the translation outputs for all anchor boxes. Shape (batch_size, num_anchor_boxes, 3) transformation: Tensor containing the concatenated rotation and translation outputs for all anchor boxes. Shape (batch_size, num_anchor_boxes, num_rotation_parameters + 3) Rotation and Translation are concatenated because the Keras Loss function takes only one GT and prediction tensor respectively as input but the transformation loss needs both bboxes: Tensor containing the 2D bounding boxes for all anchor boxes. Shape (batch_size, num_anchor_boxes, 4) """ classification = [ class_net([feature, i]) for i, feature in enumerate(fpn_feature_maps) ] classification = layers.Concatenate(axis=1, name='classification')(classification) bbox_regression = [ box_net([feature, i]) for i, feature in enumerate(fpn_feature_maps) ] bbox_regression = layers.Concatenate(axis=1, name='regression')(bbox_regression) rotation = [ rotation_net([feature, i]) for i, feature in enumerate(fpn_feature_maps) ] rotation = layers.Concatenate(axis=1, name='rotation')(rotation) translation_raw = [ translation_net([feature, i]) for i, feature in enumerate(fpn_feature_maps) ] translation_raw = layers.Concatenate( axis=1, name='translation_raw_outputs')(translation_raw) #get anchors and apply predicted translation offsets to translation anchors anchors, translation_anchors = anchors_for_shape( (input_size, input_size), anchor_params=anchor_parameters) translation_anchors_input = np.expand_dims(translation_anchors, axis=0) translation_xy_Tz = RegressTranslation(name='translation_regression')( [translation_anchors_input, translation_raw]) translation = CalculateTxTy(name='translation')( translation_xy_Tz, fx=camera_parameters_input[:, 0], fy=camera_parameters_input[:, 1], px=camera_parameters_input[:, 2], py=camera_parameters_input[:, 3], tz_scale=camera_parameters_input[:, 4], image_scale=camera_parameters_input[:, 5]) # apply predicted 2D bbox regression to anchors anchors_input = np.expand_dims(anchors, axis=0) bboxes = RegressBoxes(name='boxes')( [anchors_input, bbox_regression[..., :4]]) bboxes = ClipBoxes(name='clipped_boxes')([image_input, bboxes]) #concat rotation and translation outputs to transformation output to have a single output for transformation loss calculation #standard concatenate layer throws error that shapes does not match because translation shape dim 2 is known via translation_anchors and rotation shape dim 2 is None #so just use lambda layer with tf concat transformation = layers.Lambda( lambda input_list: tf.concat(input_list, axis=-1), name="transformation")([rotation, translation]) return classification, bbox_regression, rotation, translation, transformation, bboxes
def main(): os.environ['CUDA_VISIBLE_DEVICES'] = '0' phi = 1 weighted_bifpn = False model_path = 'checkpoints/2019-12-03/pascal_05_0.6283_1.1975_0.8029.h5' image_sizes = (512, 640, 768, 896, 1024, 1280, 1408) image_size = image_sizes[phi] classes = [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] num_classes = len(classes) score_threshold = 0.5 colors = [ np.random.randint(0, 256, 3).tolist() for _ in range(num_classes) ] model, prediction_model = efficientdet(phi=phi, weighted_bifpn=weighted_bifpn, num_classes=num_classes, score_threshold=score_threshold) prediction_model.load_weights(model_path, by_name=True) image_path = 'datasets/VOC2007/JPEGImages/000002.jpg' image = cv2.imread(image_path) src_image = image.copy() image = image[:, :, ::-1] h, w = image.shape[:2] image, scale, offset_h, offset_w = preprocess_image(image, image_size=image_size) anchors = anchors_for_shape((image_size, image_size)) # run network start = time.time() boxes, scores, labels = prediction_model.predict_on_batch( [np.expand_dims(image, axis=0), np.expand_dims(anchors, axis=0)]) boxes, scores, labels = np.squeeze(boxes), np.squeeze(scores), np.squeeze( labels) print(time.time() - start) boxes = post_process_boxes(boxes=boxes, scale=scale, offset_h=offset_h, offset_w=offset_w, height=h, width=w) # select indices which have a score above the threshold indices = np.where(scores[:] > score_threshold)[0] # select those detections boxes = boxes[indices] labels = labels[indices] draw_boxes(src_image, boxes, scores, labels, colors, classes) cv2.namedWindow('image', cv2.WINDOW_NORMAL) cv2.imshow('image', src_image) cv2.waitKey(0)
def inference(model_path, image_dir, dst_path, patch_size, overlay_size, save_img, test_one,score_threshold,nms_threshold,model_nms_threshold): """ Inference images to detect objects :param str ckpt_path: path to trained checkpoint :param str image_dir: directory to source images :param str dst_path: path to save detection output :param int patch_size: patch size that width and height of patch is equal :param int overlay_size: overlay size in patches :return: None (save detection output) """ # Get filenames file_paths = [os.path.join(root, name) for root, dirs, files in os.walk(image_dir) for name in files if name.endswith('png') or name.endswith('jpg')] model, prediction_model = efficientdet(phi=phi, weighted_bifpn=weighted_bifpn, num_classes=num_classes, num_anchors=AnchorParameters.ship.num_anchors(), score_threshold=score_threshold, detect_quadrangle=True, anchor_parameters=AnchorParameters.ship, nms_threshold = model_nms_threshold) print(model_path) prediction_model.load_weights(model_path, by_name=True) det_by_file = dict() patch_size = args.patch_size overlay_size = args.overlay_size if test_one : file_paths = file_paths[:20] for file_path in tqdm(file_paths): start = time.time() image = cv2.imread(file_path) src_image = image patch_generator = get_patch_generator(image, patch_size=patch_size, overlay_size=overlay_size) classes_list, scores_list, quadrangles_list, boxes_list,ratios_list = list(), list(), list(), list(), list() for patch_image, row, col in patch_generator: #print("row {} col {}".format(row,col)) image, scale, offset_h, offset_w = preprocess_image(patch_image, image_size=image_size) inputs = np.expand_dims(image, axis=0) anchors = anchors_for_shape((image_size, image_size), anchor_params=AnchorParameters.ship) # run network boxes, scores, alphas, ratios, classes = prediction_model.predict([np.expand_dims(image, axis=0), np.expand_dims(anchors, axis=0)]) h, w = patch_image.shape[:2] alphas = 1 / (1 + np.exp(-alphas)) ratios = 1 / (1 + np.exp(-ratios)) quadrangles = np.zeros(boxes.shape[:2] + (8,)) quadrangles[:, :, 0] = boxes[:, :, 0] + (boxes[:, :, 2] - boxes[:, :, 0]) * alphas[:, :, 0] quadrangles[:, :, 1] = boxes[:, :, 1] quadrangles[:, :, 2] = boxes[:, :, 2] quadrangles[:, :, 3] = boxes[:, :, 1] + (boxes[:, :, 3] - boxes[:, :, 1]) * alphas[:, :, 1] quadrangles[:, :, 4] = boxes[:, :, 2] - (boxes[:, :, 2] - boxes[:, :, 0]) * alphas[:, :, 2] quadrangles[:, :, 5] = boxes[:, :, 3] quadrangles[:, :, 6] = boxes[:, :, 0] quadrangles[:, :, 7] = boxes[:, :, 3] - (boxes[:, :, 3] - boxes[:, :, 1]) * alphas[:, :, 3] boxes[0, :, [0, 2]] = boxes[0, :, [0, 2]] - offset_w boxes[0, :, [1, 3]] = boxes[0, :, [1, 3]] - offset_h boxes /= scale boxes[0, :, 0] = np.clip(boxes[0, :, 0], 0, w - 1) + col boxes[0, :, 1] = np.clip(boxes[0, :, 1], 0, h - 1) + row boxes[0, :, 2] = np.clip(boxes[0, :, 2], 0, w - 1) + col boxes[0, :, 3] = np.clip(boxes[0, :, 3], 0, h - 1) + row quadrangles[0, :, [0, 2, 4, 6]] = quadrangles[0, :, [0, 2, 4, 6]] - offset_w quadrangles[0, :, [1, 3, 5, 7]] = quadrangles[0, :, [1, 3, 5, 7]] - offset_h quadrangles /= scale quadrangles[0, :, [0, 2, 4, 6]] = np.clip(quadrangles[0, :, [0, 2, 4, 6]], 0, w - 1) + col quadrangles[0, :, [1, 3, 5, 7]] = np.clip(quadrangles[0, :, [1, 3, 5, 7]], 0, h - 1) + row #[1, 3, 5, 7]] #[0, 2, 4, 6] # select indices which have a score above the threshold indices = np.where(scores[0, :] > score_threshold)[0] # select those detections boxes = boxes[0, indices] scores = scores[0, indices] classes = classes[0, indices] quadrangles = quadrangles[0, indices] ratios = ratios[0, indices] #quadrangles = np.array(quadrangles).reshape(-1,8) #boxes = np.array(boxes_list).reshape(-1, 4) if len(quadrangles)>0 : quadrangles_list.extend(list(quadrangles)) boxes_list.extend(list(boxes)) classes_list.extend(list(classes)) scores_list.extend(list(scores)) ratios_list.extend(list(ratios)) quadrangles = np.array(quadrangles_list).reshape(-1, 8) boxes = np.array(boxes_list).reshape(-1, 4) classes = np.array(classes_list).flatten() scores = np.array(scores_list).flatten() ratios = np.array(ratios_list).flatten() #quadrangles = quadrangles[scores > 0] #classes = classes[scores > 0] #scores = scores[scores > 0] #pdb.set_trace() quadrangles, boxes, classes, scores,ratios = nms(quadrangles, boxes, classes, scores, ratios , nms_threshold) det_by_file[file_path] = {'boxes': quadrangles, 'classes': classes, 'scores': scores} #print(time.time() - start) # Save detection output if save_img: for bbox, score, label, quadrangle, ratio in zip(boxes, scores, classes, quadrangles, ratios): xmin = int(round(bbox[0])) ymin = int(round(bbox[1])) xmax = int(round(bbox[2])) ymax = int(round(bbox[3])) score = '{:.4f}'.format(score) class_id = int(label) color = colors[class_id] class_name = classes_name[class_id] label = '-'.join([class_name, score]) ret, baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) #cv2.rectangle(src_image, (xmin, ymin), (xmax, ymax), color, 1) #cv2.rectangle(src_image, (xmin, ymax - ret[1] - baseline), (xmin + ret[0], ymax), color, -1) #cv2.putText(src_image, label, (xmin, ymax - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1) #cv2.putText(src_image, score, (xmin, ymax - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1) #cv2.putText(src_image, f'{ratio:.2f}', (xmin + (xmax - xmin) // 3, (ymin + ymax) // 2), # cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1) cv2.drawContours(src_image, [quadrangle.astype(np.int32).reshape((4, 2))], -1, color, 3) cv2.imwrite(dst_path+'/img/ship{}.jpg'.format(int(re.findall("\d+",file_path)[0])),src_image) #if test_one : # break save_det_to_csv(dst_path+'/result.csv', det_by_file)
colors = [np.random.randint(0, 256, 3).tolist() for i in range(num_classes)] model, prediction_model = efficientdet(phi=phi, weighted_bifpn=weighted_bifpn, num_classes=num_classes, score_threshold=score_threshold) prediction_model.load_weights(model_path, by_name=True) image = cv2.imread(args.image_path) src_image = image.copy() image = image[:, :, ::-1] h, w = image.shape[:2] image, scale, offset_h, offset_w = preprocess_image(image, image_size=image_size) inputs = np.expand_dims(image, axis=0) anchors = anchors_for_shape((image_size, image_size)) # run network start = time.time() boxes, scores, labels = prediction_model.predict_on_batch( [np.expand_dims(image, axis=0), np.expand_dims(anchors, axis=0)]) print(time.time() - start) boxes[0, :, [0, 2]] = boxes[0, :, [0, 2]] - offset_w boxes[0, :, [1, 3]] = boxes[0, :, [1, 3]] - offset_h boxes /= scale boxes[0, :, 0] = np.clip(boxes[0, :, 0], 0, w - 1) boxes[0, :, 1] = np.clip(boxes[0, :, 1], 0, h - 1) boxes[0, :, 2] = np.clip(boxes[0, :, 2], 0, w - 1) boxes[0, :, 3] = np.clip(boxes[0, :, 3], 0, h - 1) # select indices which have a score above the threshold
help="image file to detect on", required=True) args = parser.parse_args() ## initialise configuration model_path = args.model phi = int(args.phi) object_classes = generate_voc_classes() resolutions = generate_resolutions() score_threshold = args.threshold max_detections = args.max_detection num_classes = len(object_classes) colors = generate_class_colors(num_classes) anchors = anchors_for_shape((resolutions[phi], resolutions[phi])) ## Start detection process image = cv2.imread(args.image) model, prediction_model = efficientdet(phi=phi, num_classes=num_classes) prediction_model.load_weights(model_path, by_name=True) draw_image = image.copy() start_time = time.time() detections = detect_on_frame(image, prediction_model, anchors, score_threshold, max_detections) print("Prediction speed {}".format(1 / (time.time() - start_time))) ## Visualise detections for detection in detections: label = int(detection[5])