def __init__(
        self,
        phi=0,
        image_sizes=(512, 640, 768, 896, 1024, 1280, 1408),
        misc_effect=None,
        visual_effect=None,
        batch_size=1,
        group_method='ratio',  # one of 'none', 'random', 'ratio'
        shuffle_groups=True,
    ):
        """
        Initialize Generator object.

        Args:
            batch_size: The size of the batches to generate.
            group_method: Determines how images are grouped together (defaults to 'ratio', one of ('none', 'random', 'ratio')).
            shuffle_groups: If True, shuffles the groups each epoch.
            image_sizes:
        """
        self.misc_effect = misc_effect
        self.visual_effect = visual_effect
        self.batch_size = int(batch_size)
        self.group_method = group_method
        self.shuffle_groups = shuffle_groups
        self.image_size = image_sizes[phi]
        self.groups = None
        self.anchors = anchors_for_shape((self.image_size, self.image_size))
        self.current_index = 0

        # Define groups
        self.group_images()

        # Shuffle when initializing
        if self.shuffle_groups:
            random.shuffle(self.groups)
Ejemplo n.º 2
0
 def generate_anchors(self, image_shape):
     anchor_params = None
     if self.config and 'anchor_parameters' in self.config:
         anchor_params = parse_anchor_parameters(self.config)
     return anchors_for_shape(image_shape,
                              anchor_params=anchor_params,
                              shapes_callback=self.compute_shapes)
Ejemplo n.º 3
0
def main():
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    
    phi = 1
    weighted_bifpn = False
    model_path = 'checkpoints/2019-12-03/pascal_05_0.6283_1.1975_0.8029.h5'
    image_sizes = (512, 640, 768, 896, 1024, 1280, 1408)
    image_size = image_sizes[phi]
    classes = [
        'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair',
        'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
        'tvmonitor',
    ]
    num_classes = len(classes)
    score_threshold = 0.5
    colors = [np.random.randint(0, 256, 3).tolist() for i in range(num_classes)]
    model, prediction_model = efficientdet(phi=phi,
                                           weighted_bifpn=weighted_bifpn,
                                           num_classes=num_classes,
                                           score_threshold=score_threshold)
    prediction_model.load_weights(model_path, by_name=True)
    
    video_path = 'datasets/video.mp4'
    cap = cv2.VideoCapture(video_path)

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        h, w = frame.shape[:2]
        image, scale, offset_h, offset_w = preprocess_image(frame, image_size=image_size)
        
        anchors = anchors_for_shape((image_size, image_size))
        
        boxes_batch, scores_batch, labels_batch = prediction_model.predict_on_batch([np.expand_dims(image, axis=0),
                                                                                     np.expand_dims(anchors, axis=0)])
        
        for i, (boxes, scores, labels) in enumerate(zip(boxes_batch, scores_batch, labels_batch)):
            boxes = post_process_boxes(boxes=boxes,
                                       scale=scale,
                                       offset_h=offset_h,
                                       offset_w=offset_w,
                                       height=h,
                                       width=w)

            indices = np.where(scores[:] > score_threshold)[0]
            boxes = boxes[indices]
            labels = labels[indices]
            
            draw_boxes(frame, boxes, scores, labels, colors, classes)
            
            cv2.imshow('image', frame)
            cv2.waitKey(1)
Ejemplo n.º 4
0
def efficientdet(phi, num_classes=20, num_anchors=9, weighted_bifpn=False, freeze_bn=False,
                 score_threshold=0.01, detect_quadrangle=False, anchor_parameters=None, separable_conv=True):
    assert phi in range(7)
    input_size = image_sizes[phi]
    input_shape = (input_size, input_size, 3)
    image_input = layers.Input(input_shape)
    w_bifpn = w_bifpns[phi]
    d_bifpn = d_bifpns[phi]
    w_head = w_bifpn
    d_head = d_heads[phi]
    backbone_cls = backbones[phi]
    features = backbone_cls(input_tensor=image_input, freeze_bn=freeze_bn)
    if weighted_bifpn:
        fpn_features = features
        for i in range(d_bifpn):
            fpn_features = build_wBiFPN(fpn_features, w_bifpn, i, freeze_bn=freeze_bn)
    else:
        fpn_features = features
        for i in range(d_bifpn):
            fpn_features = build_BiFPN(fpn_features, w_bifpn, i, freeze_bn=freeze_bn)
    box_net = BoxNet(w_head, d_head, num_anchors=num_anchors, separable_conv=separable_conv, freeze_bn=freeze_bn,
                     detect_quadrangle=detect_quadrangle, name='box_net')
    class_net = ClassNet(w_head, d_head, num_classes=num_classes, num_anchors=num_anchors,
                         separable_conv=separable_conv, freeze_bn=freeze_bn, name='class_net')
    classification = [class_net([feature, i]) for i, feature in enumerate(fpn_features)]
    classification = layers.Concatenate(axis=1, name='classification')(classification)
    regression = [box_net([feature, i]) for i, feature in enumerate(fpn_features)]
    regression = layers.Concatenate(axis=1, name='regression')(regression)

    model = models.Model(inputs=[image_input], outputs=[classification, regression], name='efficientdet')

    # apply predicted regression to anchors
    anchors = anchors_for_shape((input_size, input_size), anchor_params=anchor_parameters)
    anchors_input = np.expand_dims(anchors, axis=0)
    boxes = RegressBoxes(name='boxes')([anchors_input, regression[..., :4]])
    boxes = ClipBoxes(name='clipped_boxes')([image_input, boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    if detect_quadrangle:
        detections = FilterDetections(
            name='filtered_detections',
            score_threshold=score_threshold,
            detect_quadrangle=True
        )([boxes, classification, regression[..., 4:8], regression[..., 8]])
    else:
        detections = FilterDetections(
            name='filtered_detections',
            score_threshold=score_threshold
        )([boxes, classification])

    prediction_model = models.Model(inputs=[image_input], outputs=detections, name='efficientdet_p')
    return model, prediction_model
Ejemplo n.º 5
0
def detect_image_value(image):
    image = image[:, :, ::-1]
    h, w = image.shape[:2]

    # resize the image into input size
    image, scale, offset_h, offset_w = preprocess_image(image,
                                                        image_size=image_size)
    # add batch dimension
    inputs = np.expand_dims(image, axis=0)
    anchors = anchors_for_shape((image_size, image_size))
    # run network
    start = timer()
    boxes, scores, labels = prediction_model.predict_on_batch(
        [np.expand_dims(image, axis=0),
         np.expand_dims(anchors, axis=0)])
    print(timer() - start)
    boxes[0, :, [0, 2]] = boxes[0, :, [0, 2]] - offset_w
    boxes[0, :, [1, 3]] = boxes[0, :, [1, 3]] - offset_h
    boxes /= scale
    boxes[0, :, 0] = np.clip(boxes[0, :, 0], 0, w - 1)
    boxes[0, :, 1] = np.clip(boxes[0, :, 1], 0, h - 1)
    boxes[0, :, 2] = np.clip(boxes[0, :, 2], 0, w - 1)
    boxes[0, :, 3] = np.clip(boxes[0, :, 3], 0, h - 1)

    # select indices which have a score above the threshold
    indices = np.where(scores[0, :] > score_threshold)[0]

    # select those detections
    boxes = boxes[0, indices]
    scores = scores[0, indices]
    labels = labels[0, indices]
    real_boxes = []
    real_classes = []
    real_scores = []

    for box, score, label in zip(boxes, scores, labels):
        xmin = int(round(box[0]))
        ymin = int(round(box[1]))
        xmax = int(round(box[2]))
        ymax = int(round(box[3]))
        score = '{:.4f}'.format(score)
        class_id = int(label)
        #color = colors[class_id]
        #class_name = classes[class_id]
        #real_label = '-'.join([class_name, score])

        real_boxes.append((ymin, xmin, ymax, xmax))
        real_classes.append(class_id)
        real_scores.append(score)

    return boxes, scores, labels
Ejemplo n.º 6
0
def run(generator, args):
    """ Main loop in which data is provided by the generator and then displayed

    Args:
        generator: The generator to debug.
        args: parseargs args object.
    """
    while True:
        # display images, one at a time
        for i in range(generator.size()):
            # load the data
            image = generator.load_image(i)
            annotations = generator.load_annotations(i)
            mask = generator.load_mask(i)
            camera_matrix = generator.load_camera_matrix(i)
            if len(annotations['labels']) > 0:
                # apply random transformations
                image, annotations = generator.random_transform_group_entry(
                    image, annotations, mask, camera_matrix)

                anchors = anchors_for_shape(image.shape, anchor_params=None)
                positive_indices, _, max_indices = compute_gt_annotations(
                    anchors[0], annotations['bboxes'])

                #switch image RGB to BGR again
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

                # draw anchors on the image
                if args.anchors:
                    draw_boxes(image,
                               anchors[0][positive_indices], (255, 255, 0),
                               thickness=1)

                # draw annotations on the image
                if args.annotations:
                    draw_annotations(
                        image,
                        annotations,
                        class_to_bbox_3D=generator.get_bbox_3d_dict(),
                        camera_matrix=camera_matrix,
                        label_to_name=generator.label_to_name,
                        draw_bbox_2d=args.draw_2d_bboxes,
                        draw_name=args.draw_class_names)

                print("Generator idx: {}".format(i))

            cv2.imshow('Image', image)
            if cv2.waitKey() == ord('q'):
                cv2.destroyAllWindows()
                return
Ejemplo n.º 7
0
    def __init__(
        self,
        phi=1,
        image_sizes=(512, 640, 768, 896, 1024, 1280, 1408),
        misc_effect=None,
        visual_effect=None,
        batch_size=1,
        group_method="random",  # one of 'none', 'random', 'ratio'
        shuffle_groups=True,
        detect_text=False,
        detect_quadrangle=False,
    ):
        """
        Initialize Generator object.

        Args:
            batch_size: The size of the batches to generate.
            group_method: Determines how images are grouped together (defaults to 'ratio', one of ('none', 'random', 'ratio')).
            shuffle_groups: If True, shuffles the groups each epoch.
            image_sizes:
        """
        self.misc_effect = misc_effect
        self.visual_effect = visual_effect
        self.batch_size = int(batch_size)
        self.group_method = group_method
        self.shuffle_groups = shuffle_groups
        self.detect_text = detect_text
        self.detect_quadrangle = detect_quadrangle
        self.image_size = image_sizes[phi]
        self.groups = None
        self.anchor_parameters = (AnchorParameters.default if
                                  not self.detect_text else AnchorParameters(
                                      ratios=(0.25, 0.5, 1.0, 2.0),
                                      sizes=(16, 32, 64, 128, 256)))
        self.anchors = anchors_for_shape((self.image_size, self.image_size),
                                         anchor_params=self.anchor_parameters)
        self.num_anchors = self.anchor_parameters.num_anchors()

        # Define groups
        self.group_images()

        # Shuffle when initializing
        if self.shuffle_groups:
            random.shuffle(self.groups)
Ejemplo n.º 8
0
def run(generator, args, anchor_params):
    """!@brief
    Main loop.

    @param generator : The generator to debug.
    @param args      : Parseargs args object.
    """
    # display images, one at a time
    for i in range(generator.size()):
        # load the data
        image       = generator.load_image(i)
        annotations = generator.load_annotations(i)

        # Apply random transformations
        # if args.random_transform or args.random_deformable or args.random_photometric:# or args.random_psf_blur:
        if True:
            image, annotations = generator.random_transform_group_entry(image, annotations)

        # resize the image and annotations
        if args.resize:
            image, image_scale = generator.resize_image(image)
            annotations['bboxes'] *= image_scale

        anchors = anchors_for_shape(image.shape, anchor_params=anchor_params)
        positive_indices, _, max_indices = compute_gt_annotations(anchors, annotations['bboxes'])

        # draw anchors on the image
        if args.anchors:
            draw_boxes(image, anchors[positive_indices], (255, 255, 0), thickness=1)

        # draw annotations on the image
        if args.annotations:
            # draw annotations in red
            draw_annotations(image, annotations, color=(0, 0, 255), label_to_name=generator.label_to_name)

            # draw regressed anchors in green to override most red annotations
            # result is that annotations without anchors are red, with anchors are green
            draw_boxes(image, annotations['bboxes'][max_indices[positive_indices], :], (0, 255, 0))

        cv2.imshow('Image', image)
        if cv2.waitKey() == ord('q'):
            return False
    return True
Ejemplo n.º 9
0
def efficientdet(phi,
                 num_classes=20,
                 num_anchors=9,
                 weighted_bifpn=False,
                 freeze_bn=False,
                 score_threshold=0.01,
                 detect_quadrangle=False,
                 anchor_parameters=None):
    assert phi in range(7)
    input_size = image_sizes[phi]
    input_shape = (input_size, input_size, 3)
    # input_shape = (None, None, 3)
    image_input = layers.Input(input_shape)
    w_bifpn = w_bifpns[phi]
    d_bifpn = 2 + phi
    w_head = w_bifpn
    d_head = 3 + int(phi / 3)
    backbone_cls = backbones[phi]
    # features = backbone_cls(include_top=False, input_shape=input_shape, weights=weights)(image_input)
    features = backbone_cls(input_tensor=image_input, freeze_bn=freeze_bn)
    if weighted_bifpn:
        for i in range(d_bifpn):
            features = build_wBiFPN(features, w_bifpn, i, freeze_bn=freeze_bn)
    else:
        for i in range(d_bifpn):
            features = build_BiFPN(features, w_bifpn, i, freeze_bn=freeze_bn)
    regress_head = build_regress_head(w_head,
                                      d_head,
                                      num_anchors=num_anchors,
                                      detect_quadrangle=detect_quadrangle)
    class_head = build_class_head(w_head,
                                  d_head,
                                  num_classes=num_classes,
                                  num_anchors=num_anchors)
    regression = [regress_head(feature) for feature in features]
    regression = layers.Concatenate(axis=1, name='regression')(regression)
    classification = [class_head(feature) for feature in features]
    classification = layers.Concatenate(axis=1,
                                        name='classification')(classification)

    model = models.Model(inputs=[image_input],
                         outputs=[regression, classification],
                         name='efficientdet')

    # apply predicted regression to anchors

    # anchors_input = layers.Input((None, 4))
    anchors = anchors_for_shape((input_size, input_size),
                                anchor_params=anchor_parameters)
    anchors_input = np.expand_dims(anchors, axis=0)
    boxes = RegressBoxes(name='boxes')([anchors_input, regression[..., :4]])
    boxes = ClipBoxes(name='clipped_boxes')([image_input, boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    if detect_quadrangle:
        detections = FilterDetections(name='filtered_detections',
                                      score_threshold=score_threshold,
                                      detect_quadrangle=True)([
                                          boxes, classification,
                                          regression[..., 4:8], regression[...,
                                                                           8]
                                      ])
    else:
        detections = FilterDetections(name='filtered_detections',
                                      score_threshold=score_threshold)(
                                          [boxes, classification])

    prediction_model = models.Model(inputs=[image_input],
                                    outputs=detections,
                                    name='efficientdet_p')
    return model, prediction_model
                                       detect_quadrangle=True,
                                       anchor_parameters=anchor_parameters,
                                       )
prediction_model.load_weights(model_path, by_name=True)

import glob

for image_path in glob.glob('datasets/ic15/test_images/*.jpg'):
    image = cv2.imread(image_path)
    src_image = image.copy()
    image = image[:, :, ::-1]
    h, w = image.shape[:2]

    image, scale, offset_h, offset_w = preprocess_image(image, image_size=image_size)
    inputs = np.expand_dims(image, axis=0)
    anchors = anchors_for_shape((image_size, image_size), anchor_params=anchor_parameters)
    # run network
    start = time.time()
    boxes, scores, alphas, ratios, labels = prediction_model.predict_on_batch([np.expand_dims(image, axis=0),
                                                                               np.expand_dims(anchors, axis=0)])
    # alphas = np.exp(alphas)
    alphas = 1 / (1 + np.exp(-alphas))
    ratios = 1 / (1 + np.exp(-ratios))
    quadrangles = np.zeros(boxes.shape[:2] + (8,))
    quadrangles[:, :, 0] = boxes[:, :, 0] + (boxes[:, :, 2] - boxes[:, :, 0]) * alphas[:, :, 0]
    quadrangles[:, :, 1] = boxes[:, :, 1]
    quadrangles[:, :, 2] = boxes[:, :, 2]
    quadrangles[:, :, 3] = boxes[:, :, 1] + (boxes[:, :, 3] - boxes[:, :, 1]) * alphas[:, :, 1]
    quadrangles[:, :, 4] = boxes[:, :, 2] - (boxes[:, :, 2] - boxes[:, :, 0]) * alphas[:, :, 2]
    quadrangles[:, :, 5] = boxes[:, :, 3]
    quadrangles[:, :, 6] = boxes[:, :, 0]
Ejemplo n.º 11
0
    def __init__(
        self,
        phi=0,
        image_sizes=(512, 640, 768, 896, 1024, 1280, 1408),
        train=True,
        use_colorspace_augmentation=False,
        use_6DoF_augmentation=False,
        scale_6DoF_augmentation=(0.7, 1.3),
        chance_no_augmentation=0.02,
        translation_scale_norm=1000.0,
        points_for_shape_match_loss=500,
        batch_size=1,
        rotation_representation="axis_angle",
        group_method='random',  # one of 'none', 'random', 'ratio'
        shuffle_groups=True,
    ):
        """
        Initialize Generator object.

        Args:
            phi: EfficientPose scaling hyperparameter phi
            image_sizes: Tuple of different input image resolutions for every phi
            train: Boolean indicating wheter the generator loads training data or not
            use_colorspace_augmentation: Boolean indicating wheter to use augmentation in the color space or not
            use_6DoF_augmentation: Boolean indicating wheter to use 6D augmentation or not
            chance_no_augmentation: Probability to skip augmentation for an image
            translation_scale_norm: factor to change units. EfficientPose internally works with meter and if your dataset unit is mm for example, then you need to set this parameter to 1000
            points_for_shape_match_loss: Number of the objects 3D model points that are used in the loss function
            batch_size: The size of the batches to generate.
            rotation_representation: String which representation of rotation should be used. Currently only axis_angle is supported
            group_method: Determines how images are grouped together (defaults to 'ratio', one of ('none', 'random', 'ratio')).
            shuffle_groups: If True, shuffles the groups each epoch.
        """
        self.batch_size = int(batch_size)
        self.group_method = group_method
        self.shuffle_groups = shuffle_groups
        self.image_size = image_sizes[phi]
        self.groups = None
        self.anchor_parameters = AnchorParameters.default
        self.anchors, self.translation_anchors = anchors_for_shape(
            (self.image_size, self.image_size),
            anchor_params=self.anchor_parameters)
        self.num_anchors = self.anchor_parameters.num_anchors()

        self.train = train
        self.use_colorspace_augmentation = use_colorspace_augmentation
        self.use_6DoF_augmentation = use_6DoF_augmentation
        self.chance_no_augmentation = chance_no_augmentation
        self.translation_scale_norm = translation_scale_norm
        self.points_for_shape_match_loss = points_for_shape_match_loss
        self.scale_6DoF_augmentation = scale_6DoF_augmentation
        if self.use_colorspace_augmentation:
            self.rand_aug = RandAugment(n=(1, 3), m=(1, 14))
        else:
            self.rand_aug = None

        # Define groups
        self.group_images()

        # Shuffle when initializing
        if self.shuffle_groups:
            random.shuffle(self.groups)

        self.all_3d_model_points_array_for_loss = self.create_all_3d_model_points_array_for_loss(
            self.class_to_model_3d_points, self.points_for_shape_match_loss)
Ejemplo n.º 12
0
 def generate_anchors(self, image_shape):
     return anchors_for_shape(image_shape,
                              shapes_callback=self.compute_shapes)
def main():
    phi = 1
    model_path = 'checkpoints/2019-12-03/pascal_05.pb'
    image_sizes = (512, 640, 768, 896, 1024, 1280, 1408)
    image_size = image_sizes[phi]
    classes = [
        'aeroplane',
        'bicycle',
        'bird',
        'boat',
        'bottle',
        'bus',
        'car',
        'cat',
        'chair',
        'cow',
        'diningtable',
        'dog',
        'horse',
        'motorbike',
        'person',
        'pottedplant',
        'sheep',
        'sofa',
        'train',
        'tvmonitor',
    ]
    num_classes = len(classes)
    score_threshold = 0.5
    colors = [
        np.random.randint(0, 256, 3).tolist() for i in range(num_classes)
    ]

    output_names = {
        'output_boxes':
        'filtered_detections/map/TensorArrayStack/TensorArrayGatherV3:0',
        'output_scores':
        'filtered_detections/map/TensorArrayStack_1/TensorArrayGatherV3:0',
        'output_labels':
        'filtered_detections/map/TensorArrayStack_2/TensorArrayGatherV3:0'
    }

    graph = tf.Graph()
    graph.as_default()
    sess = tf.Session()
    graph = get_frozen_graph(model_path)
    tf.import_graph_def(graph, name='')

    output_boxes = sess.graph.get_tensor_by_name(output_names["output_boxes"])
    output_scores = sess.graph.get_tensor_by_name(
        output_names['output_scores'])
    output_labels = sess.graph.get_tensor_by_name(
        output_names['output_labels'])

    image_path = 'datasets/VOC2007/JPEGImages/000002.jpg'
    image = cv2.imread(image_path)
    src_image = image.copy()
    image = image[:, :, ::-1]
    h, w = image.shape[:2]

    image, scale = preprocess_image(image, image_size=image_size)
    anchors = anchors_for_shape((image_size, image_size))

    # run network
    start = time.time()
    image_batch = np.expand_dims(image, axis=0)
    anchors_batch = np.expand_dims(anchors, axis=0)
    feed_dict = {"input_1:0": image_batch, "input_4:0": anchors_batch}
    boxes, scores, labels = sess.run(
        [output_boxes, output_scores, output_labels], feed_dict)

    boxes, scores, labels = np.squeeze(boxes), np.squeeze(scores), np.squeeze(
        labels)
    print(time.time() - start)
    boxes = post_process_boxes(boxes=boxes,
                               scale=scale,
                               offset_h=offset_h,
                               offset_w=offset_w,
                               height=h,
                               width=w)

    # select indices which have a score above the threshold
    indices = np.where(scores[:] > score_threshold)[0]

    # select those detections
    boxes = boxes[indices]
    labels = labels[indices]

    draw_boxes(src_image, boxes, scores, labels, colors, classes)

    cv2.namedWindow('image', cv2.WINDOW_NORMAL)
    cv2.imshow('image', src_image)
    cv2.waitKey(0)
Ejemplo n.º 14
0
def apply_subnets_to_feature_maps(box_net, class_net, rotation_net,
                                  translation_net, fpn_feature_maps,
                                  image_input, camera_parameters_input,
                                  input_size, anchor_parameters):
    """
    Applies the subnetworks to the BiFPN feature maps
    Args:
        box_net, class_net, rotation_net, translation_net: Subnetworks
        fpn_feature_maps: Sequence of the BiFPN feature maps of the different levels (P3, P4, P5, P6, P7)
        image_input, camera_parameters_input: The image and camera parameter input layer
        input size: Integer representing the input image resolution
        anchor_parameters: Struct containing anchor parameters. If None, default values are used.
    
    Returns:
       classification: Tensor containing the classification outputs for all anchor boxes. Shape (batch_size, num_anchor_boxes, num_classes)
       bbox_regression: Tensor containing the deltas of anchor boxes to the GT 2D bounding boxes for all anchor boxes. Shape (batch_size, num_anchor_boxes, 4)
       rotation: Tensor containing the rotation outputs for all anchor boxes. Shape (batch_size, num_anchor_boxes, num_rotation_parameters)
       translation: Tensor containing the translation outputs for all anchor boxes. Shape (batch_size, num_anchor_boxes, 3)
       transformation: Tensor containing the concatenated rotation and translation outputs for all anchor boxes. Shape (batch_size, num_anchor_boxes, num_rotation_parameters + 3)
                       Rotation and Translation are concatenated because the Keras Loss function takes only one GT and prediction tensor respectively as input but the transformation loss needs both
       bboxes: Tensor containing the 2D bounding boxes for all anchor boxes. Shape (batch_size, num_anchor_boxes, 4)
    """
    classification = [
        class_net([feature, i]) for i, feature in enumerate(fpn_feature_maps)
    ]
    classification = layers.Concatenate(axis=1,
                                        name='classification')(classification)

    bbox_regression = [
        box_net([feature, i]) for i, feature in enumerate(fpn_feature_maps)
    ]
    bbox_regression = layers.Concatenate(axis=1,
                                         name='regression')(bbox_regression)

    rotation = [
        rotation_net([feature, i])
        for i, feature in enumerate(fpn_feature_maps)
    ]
    rotation = layers.Concatenate(axis=1, name='rotation')(rotation)

    translation_raw = [
        translation_net([feature, i])
        for i, feature in enumerate(fpn_feature_maps)
    ]
    translation_raw = layers.Concatenate(
        axis=1, name='translation_raw_outputs')(translation_raw)

    #get anchors and apply predicted translation offsets to translation anchors
    anchors, translation_anchors = anchors_for_shape(
        (input_size, input_size), anchor_params=anchor_parameters)
    translation_anchors_input = np.expand_dims(translation_anchors, axis=0)

    translation_xy_Tz = RegressTranslation(name='translation_regression')(
        [translation_anchors_input, translation_raw])
    translation = CalculateTxTy(name='translation')(
        translation_xy_Tz,
        fx=camera_parameters_input[:, 0],
        fy=camera_parameters_input[:, 1],
        px=camera_parameters_input[:, 2],
        py=camera_parameters_input[:, 3],
        tz_scale=camera_parameters_input[:, 4],
        image_scale=camera_parameters_input[:, 5])

    # apply predicted 2D bbox regression to anchors
    anchors_input = np.expand_dims(anchors, axis=0)
    bboxes = RegressBoxes(name='boxes')(
        [anchors_input, bbox_regression[..., :4]])
    bboxes = ClipBoxes(name='clipped_boxes')([image_input, bboxes])

    #concat rotation and translation outputs to transformation output to have a single output for transformation loss calculation
    #standard concatenate layer throws error that shapes does not match because translation shape dim 2 is known via translation_anchors and rotation shape dim 2 is None
    #so just use lambda layer with tf concat
    transformation = layers.Lambda(
        lambda input_list: tf.concat(input_list, axis=-1),
        name="transformation")([rotation, translation])

    return classification, bbox_regression, rotation, translation, transformation, bboxes
Ejemplo n.º 15
0
def main():
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'

    phi = 1
    weighted_bifpn = False
    model_path = 'checkpoints/2019-12-03/pascal_05_0.6283_1.1975_0.8029.h5'
    image_sizes = (512, 640, 768, 896, 1024, 1280, 1408)
    image_size = image_sizes[phi]
    classes = [
        'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
        'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',
        'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
    ]
    num_classes = len(classes)
    score_threshold = 0.5
    colors = [
        np.random.randint(0, 256, 3).tolist() for _ in range(num_classes)
    ]
    model, prediction_model = efficientdet(phi=phi,
                                           weighted_bifpn=weighted_bifpn,
                                           num_classes=num_classes,
                                           score_threshold=score_threshold)
    prediction_model.load_weights(model_path, by_name=True)

    image_path = 'datasets/VOC2007/JPEGImages/000002.jpg'
    image = cv2.imread(image_path)
    src_image = image.copy()
    image = image[:, :, ::-1]
    h, w = image.shape[:2]

    image, scale, offset_h, offset_w = preprocess_image(image,
                                                        image_size=image_size)
    anchors = anchors_for_shape((image_size, image_size))

    # run network
    start = time.time()
    boxes, scores, labels = prediction_model.predict_on_batch(
        [np.expand_dims(image, axis=0),
         np.expand_dims(anchors, axis=0)])
    boxes, scores, labels = np.squeeze(boxes), np.squeeze(scores), np.squeeze(
        labels)
    print(time.time() - start)
    boxes = post_process_boxes(boxes=boxes,
                               scale=scale,
                               offset_h=offset_h,
                               offset_w=offset_w,
                               height=h,
                               width=w)

    # select indices which have a score above the threshold
    indices = np.where(scores[:] > score_threshold)[0]

    # select those detections
    boxes = boxes[indices]
    labels = labels[indices]

    draw_boxes(src_image, boxes, scores, labels, colors, classes)

    cv2.namedWindow('image', cv2.WINDOW_NORMAL)
    cv2.imshow('image', src_image)
    cv2.waitKey(0)
Ejemplo n.º 16
0
def inference(model_path, image_dir, dst_path, patch_size, overlay_size, save_img, test_one,score_threshold,nms_threshold,model_nms_threshold):
    """ Inference images to detect objects

    :param str ckpt_path: path to trained checkpoint
    :param str image_dir: directory to source images
    :param str dst_path: path to save detection output
    :param int patch_size: patch size that width and height of patch is equal
    :param int overlay_size: overlay size in patches
    :return: None (save detection output)

    """
    # Get filenames
    file_paths = [os.path.join(root, name) for root, dirs, files in os.walk(image_dir) for name in files if
                  name.endswith('png') or name.endswith('jpg')]
    
    model, prediction_model = efficientdet(phi=phi,
                                       weighted_bifpn=weighted_bifpn,
                                       num_classes=num_classes,
                                       num_anchors=AnchorParameters.ship.num_anchors(),
                                       score_threshold=score_threshold,
                                       detect_quadrangle=True,
                                       anchor_parameters=AnchorParameters.ship,
                                       nms_threshold = model_nms_threshold)
    print(model_path)
    prediction_model.load_weights(model_path, by_name=True)
    det_by_file = dict()
    
    patch_size = args.patch_size
    overlay_size = args.overlay_size
    if test_one :
        file_paths = file_paths[:20]
    
    for file_path in tqdm(file_paths):
        start = time.time() 
        image = cv2.imread(file_path)
        src_image = image
        patch_generator = get_patch_generator(image, patch_size=patch_size, overlay_size=overlay_size)

        classes_list, scores_list, quadrangles_list, boxes_list,ratios_list = list(), list(), list(), list(), list()
        
        for patch_image, row, col in patch_generator:
            #print("row {} col {}".format(row,col))
            image, scale, offset_h, offset_w = preprocess_image(patch_image, image_size=image_size)
            inputs = np.expand_dims(image, axis=0)
            anchors = anchors_for_shape((image_size, image_size), anchor_params=AnchorParameters.ship)
            # run network
            boxes, scores, alphas, ratios, classes = prediction_model.predict([np.expand_dims(image, axis=0),
                                                                                       np.expand_dims(anchors, axis=0)])
            h, w = patch_image.shape[:2]
            
            alphas = 1 / (1 + np.exp(-alphas))
            ratios = 1 / (1 + np.exp(-ratios))
            quadrangles = np.zeros(boxes.shape[:2] + (8,))
            quadrangles[:, :, 0] = boxes[:, :, 0] + (boxes[:, :, 2] - boxes[:, :, 0]) * alphas[:, :, 0]
            quadrangles[:, :, 1] = boxes[:, :, 1]
            quadrangles[:, :, 2] = boxes[:, :, 2]
            quadrangles[:, :, 3] = boxes[:, :, 1] + (boxes[:, :, 3] - boxes[:, :, 1]) * alphas[:, :, 1]
            quadrangles[:, :, 4] = boxes[:, :, 2] - (boxes[:, :, 2] - boxes[:, :, 0]) * alphas[:, :, 2]
            quadrangles[:, :, 5] = boxes[:, :, 3]
            quadrangles[:, :, 6] = boxes[:, :, 0]
            quadrangles[:, :, 7] = boxes[:, :, 3] - (boxes[:, :, 3] - boxes[:, :, 1]) * alphas[:, :, 3]

            boxes[0, :, [0, 2]] = boxes[0, :, [0, 2]] - offset_w
            boxes[0, :, [1, 3]] = boxes[0, :, [1, 3]] - offset_h
            boxes /= scale
            boxes[0, :, 0] = np.clip(boxes[0, :, 0], 0, w - 1) + col
            boxes[0, :, 1] = np.clip(boxes[0, :, 1], 0, h - 1) + row
            boxes[0, :, 2] = np.clip(boxes[0, :, 2], 0, w - 1) + col
            boxes[0, :, 3] = np.clip(boxes[0, :, 3], 0, h - 1) + row

            quadrangles[0, :, [0, 2, 4, 6]] = quadrangles[0, :, [0, 2, 4, 6]] - offset_w
            quadrangles[0, :, [1, 3, 5, 7]] = quadrangles[0, :, [1, 3, 5, 7]] - offset_h
            quadrangles /= scale
            quadrangles[0, :, [0, 2, 4, 6]] = np.clip(quadrangles[0, :, [0, 2, 4, 6]], 0, w - 1) + col
            quadrangles[0, :, [1, 3, 5, 7]] = np.clip(quadrangles[0, :, [1, 3, 5, 7]], 0, h - 1) + row
            
            #[1, 3, 5, 7]]
            #[0, 2, 4, 6]
            # select indices which have a score above the threshold
            indices = np.where(scores[0, :] > score_threshold)[0]

            # select those detections
            boxes = boxes[0, indices]
            scores = scores[0, indices]
            classes = classes[0, indices]
            quadrangles = quadrangles[0, indices]
            ratios = ratios[0, indices]
                            
            #quadrangles = np.array(quadrangles).reshape(-1,8)
            #boxes = np.array(boxes_list).reshape(-1, 4)
            
            if len(quadrangles)>0 :
                quadrangles_list.extend(list(quadrangles))
                boxes_list.extend(list(boxes))
                classes_list.extend(list(classes))
                scores_list.extend(list(scores))
                ratios_list.extend(list(ratios))
        quadrangles = np.array(quadrangles_list).reshape(-1, 8)
        boxes = np.array(boxes_list).reshape(-1, 4)
        classes = np.array(classes_list).flatten()
        scores = np.array(scores_list).flatten()
        ratios = np.array(ratios_list).flatten()
        #quadrangles = quadrangles[scores > 0]
        #classes = classes[scores > 0]
        #scores = scores[scores > 0]
        #pdb.set_trace()
        quadrangles, boxes, classes, scores,ratios = nms(quadrangles, boxes, classes, scores, ratios , nms_threshold)
        det_by_file[file_path] = {'boxes': quadrangles, 'classes': classes, 'scores': scores}
        #print(time.time() - start)

    # Save detection output
        if save_img:
            for bbox, score, label, quadrangle, ratio in zip(boxes, scores, classes, quadrangles, ratios):
                xmin = int(round(bbox[0]))
                ymin = int(round(bbox[1]))
                xmax = int(round(bbox[2]))
                ymax = int(round(bbox[3]))
                
                score = '{:.4f}'.format(score)
                class_id = int(label)
                color = colors[class_id]
                class_name = classes_name[class_id]
                label = '-'.join([class_name, score])
                ret, baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
                #cv2.rectangle(src_image, (xmin, ymin), (xmax, ymax), color, 1)
                #cv2.rectangle(src_image, (xmin, ymax - ret[1] - baseline), (xmin + ret[0], ymax), color, -1)
                #cv2.putText(src_image, label, (xmin, ymax - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
                #cv2.putText(src_image, score, (xmin, ymax - baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
                #cv2.putText(src_image, f'{ratio:.2f}', (xmin + (xmax - xmin) // 3, (ymin + ymax) // 2),
                #            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)
                cv2.drawContours(src_image, [quadrangle.astype(np.int32).reshape((4, 2))], -1, color, 3)
            cv2.imwrite(dst_path+'/img/ship{}.jpg'.format(int(re.findall("\d+",file_path)[0])),src_image)
        #if test_one :
        #    break
            
    save_det_to_csv(dst_path+'/result.csv', det_by_file)
Ejemplo n.º 17
0
colors = [np.random.randint(0, 256, 3).tolist() for i in range(num_classes)]
model, prediction_model = efficientdet(phi=phi,
                                       weighted_bifpn=weighted_bifpn,
                                       num_classes=num_classes,
                                       score_threshold=score_threshold)
prediction_model.load_weights(model_path, by_name=True)

image = cv2.imread(args.image_path)
src_image = image.copy()
image = image[:, :, ::-1]
h, w = image.shape[:2]

image, scale, offset_h, offset_w = preprocess_image(image,
                                                    image_size=image_size)
inputs = np.expand_dims(image, axis=0)
anchors = anchors_for_shape((image_size, image_size))
# run network
start = time.time()
boxes, scores, labels = prediction_model.predict_on_batch(
    [np.expand_dims(image, axis=0),
     np.expand_dims(anchors, axis=0)])
print(time.time() - start)
boxes[0, :, [0, 2]] = boxes[0, :, [0, 2]] - offset_w
boxes[0, :, [1, 3]] = boxes[0, :, [1, 3]] - offset_h
boxes /= scale
boxes[0, :, 0] = np.clip(boxes[0, :, 0], 0, w - 1)
boxes[0, :, 1] = np.clip(boxes[0, :, 1], 0, h - 1)
boxes[0, :, 2] = np.clip(boxes[0, :, 2], 0, w - 1)
boxes[0, :, 3] = np.clip(boxes[0, :, 3], 0, h - 1)

# select indices which have a score above the threshold
Ejemplo n.º 18
0
                        help="image file to detect on",
                        required=True)

    args = parser.parse_args()

    ## initialise configuration
    model_path = args.model
    phi = int(args.phi)
    object_classes = generate_voc_classes()
    resolutions = generate_resolutions()
    score_threshold = args.threshold
    max_detections = args.max_detection

    num_classes = len(object_classes)
    colors = generate_class_colors(num_classes)
    anchors = anchors_for_shape((resolutions[phi], resolutions[phi]))

    ## Start detection process
    image = cv2.imread(args.image)
    model, prediction_model = efficientdet(phi=phi, num_classes=num_classes)
    prediction_model.load_weights(model_path, by_name=True)

    draw_image = image.copy()
    start_time = time.time()
    detections = detect_on_frame(image, prediction_model, anchors,
                                 score_threshold, max_detections)
    print("Prediction speed {}".format(1 / (time.time() - start_time)))

    ## Visualise detections
    for detection in detections:
        label = int(detection[5])