Esempio n. 1
0
    def __init__(self):
        self.img_paths = []
        self.labels = []
        self.output_shape = image_shape
        self.rgb_means = rgb_means

        f = open(txt_path, 'r')
        lines = f.readlines()
        isFirst = True
        img_label = np.zeros((0, 15))
        for line in lines:
            line = line.rstrip()
            if line.startswith('#'):
                if isFirst is True:
                    isFirst = False
                else:
                    img_label_copy = img_label.copy()
                    self.labels.append(img_label_copy)
                    img_label = np.zeros((0, 15))
                path = line[2:]
                path = txt_path.replace('label.txt', 'images/') + path
                self.img_paths.append(path)
            else:
                line = line.split(' ')
                line = [float(x) for x in line]

                label = np.zeros((1, 15))
                label[0, 0] = line[0]  # x1
                label[0, 1] = line[1]  # y1
                label[0, 2] = line[0] + line[2]  # x2
                label[0, 3] = line[1] + line[3]  # y2

                # landmarks
                label[0, 4] = line[4]  # l0_x
                label[0, 5] = line[5]  # l0_y
                label[0, 6] = line[7]  # l1_x
                label[0, 7] = line[8]  # l1_y
                label[0, 8] = line[10]  # l2_x
                label[0, 9] = line[11]  # l2_y
                label[0, 10] = line[13]  # l3_x
                label[0, 11] = line[14]  # l3_y
                label[0, 12] = line[16]  # l4_x
                label[0, 13] = line[17]  # l4_y
                if (label[0, 4] < 0):
                    label[0, 14] = -1
                else:
                    label[0, 14] = 1
                img_label = np.append(img_label, label, axis=0)
        self.labels.append(img_label)
        self.anchors = generate_anchors(min_sizes, steps, image_shape)
        self.total = len(self.img_paths)
Esempio n. 2
0
 def test_generate_anchors(self):
     DEBUG = False
     if DEBUG:
         image = cv2.imread(os.path.join('images', '1.jpg'))
         anchors = generate_anchors(image.shape,
                                    scales=[1 / 2, 2],
                                    base_size=32,
                                    stride=32)
         print(anchors.shape)
         anchors = anchors.reshape(-1, 4)
         for anchor in anchors:
             cv2.rectangle(image, (int(anchor[0]), int(anchor[1])),
                           (int(anchor[2]), int(anchor[3])), (0, 0, 255), 1)
         cv2.imshow('anchors', image)
         cv2.waitKey(0)
Esempio n. 3
0
    def build(self):
        ##############
        # Set Inputs
        ##############

        if self.mode == 'inference_init':
            # Input template's batch size is nailed to 1.
            inp_template = Input(batch_shape=(1, ) + self.config.template_size,
                                 name='inp_template')

        elif self.mode == 'inference':
            # When evaluating batch size must be 1.!!!!!!
            assert self.config.batch_size == 1

            inp_img = Input(shape=self.config.instance_size, name='inp_img')
            # Generate anchors for every batch,
            anchors = generate_anchors(self.config.total_stride,
                                       self.config.scales, self.config.ratios,
                                       self.config.score_size)
            anchors = np.broadcast_to(anchors, (self.config.batch_size, ) +
                                      anchors.shape)  #shape=(1, 19, 19, 5, 4)

        ###########################
        # Set Backbone
        ###########################

        self.encoder = build_encoder()

        if self.mode == 'inference':
            encoded_img = self.encoder(inp_img)
            model = Model([inp_img], outputs=encoded_img, name='bb_alex_large')
            return model

        elif self.mode == 'inference_init':

            cls_filters = 2 * self.config.num_anchors * self.config.encoder_out_filter  #5120
            bbox_filters = 4 * self.config.num_anchors * self.config.encoder_out_filter  #10240
            encoded_template = self.encoder(inp_template)

            model = Model([inp_template],
                          encoded_template,
                          name='bb_alex_small')

            return model
Esempio n. 4
0
    def __init__(self,
                 feature_architecture='resnet',
                 anchor_scales=(128, 256, 512),
                 feat_stride=16,
                 negative_overlap=0.3,
                 positive_overlap=0.7,
                 fg_fraction=0.5,
                 batch_size=128,
                 nms_thresh=0.7,
                 pre_nms_limit=6000,
                 post_nms_limit=2000):
        super(RegionProposalNetwork, self).__init__()
        # Setup
        if feature_architecture == 'vgg16':
            input_dims = 512
        else:
            input_dims = 256

        self.test = False
        self.anchors = generate_anchors(feat_stride=feat_stride,
                                        scales=anchor_scales)
        self.num_anchors = self.anchors.shape[0]
        self.feat_stride = feat_stride  # how much smaller is the feature map than the original image
        self.negative_overlap = negative_overlap
        self.positive_overlap = positive_overlap
        self.fg_fraction = fg_fraction
        self.batch_size = batch_size

        # used for both train and test
        self.nms_thresh = nms_thresh
        self.pre_nms_limit = pre_nms_limit
        self.post_nms_limit = post_nms_limit

        # for calcing targets
        self.all_anchor_boxes = None
        self.feature_map_dim = None  # (N, C, H, W)

        # layers
        self.rpn_conv1 = nn.Conv2d(input_dims, 512, kernel_size=3, padding=1)
        self.conv_classify = nn.Conv2d(512, 2 * 9, kernel_size=1)
        self.conv_bbox_regr = nn.Conv2d(512, 4 * 9, kernel_size=1)
Esempio n. 5
0
    def __init__(self,
                 size,
                 stride,
                 ratios=None,
                 scales=None,
                 *args,
                 **kwargs):
        """ Initializer for an Anchors layer.

        Args
            size: The base size of the anchors to generate.
            stride: The stride of the anchors to generate.
            ratios: The ratios of the anchors to generate (defaults to AnchorParameters.default.ratios).
            scales: The scales of the anchors to generate (defaults to AnchorParameters.default.scales).
        """
        self.size = size
        self.stride = stride
        self.ratios = ratios
        self.scales = scales

        if ratios is None:
            self.ratios = utils_anchors.AnchorParameters.default.ratios
        elif isinstance(ratios, list):
            self.ratios = np.array(ratios)
        if scales is None:
            self.scales = utils_anchors.AnchorParameters.default.scales
        elif isinstance(scales, list):
            self.scales = np.array(scales)

        self.num_anchors = len(self.ratios) * len(self.scales)
        self.anchors = keras.backend.variable(
            utils_anchors.generate_anchors(
                base_size=self.size,
                ratios=self.ratios,
                scales=self.scales,
            ))

        super(Anchors, self).__init__(*args, **kwargs)
Esempio n. 6
0
    def test_generate_minibatch(self):
        DEBUG = True
        if DEBUG:
            image = np.ones((500, 500, 3))
            box_size = 60
            bounding_boxes = np.array(
                [[100, 100, 100 + box_size, 100 + box_size],
                 [300, 300, 300 + box_size, 300 + box_size]])
            for box in bounding_boxes:
                image[box[1]:box[3], box[0]:box[2]] = 0
            anchors = generate_anchors(image.shape)
            anchors_batch_indices, _, _ = generate_minibatch_mask(
                anchors, bounding_boxes, batch_size=64)
            anchors = anchors.reshape(-1, 4)
            anchors_indices = anchors_batch_indices.reshape(-1, )
            for anchor in anchors[anchors_indices == -1, :]:
                cv2.rectangle(image, (int(anchor[0]), int(anchor[1])),
                              (int(anchor[2]), int(anchor[3])), (0, 0, 255), 1)
            for anchor in anchors[anchors_indices == 1, :]:
                cv2.rectangle(image, (int(anchor[0]), int(anchor[1])),
                              (int(anchor[2]), int(anchor[3])), (0, 255, 0), 1)

            cv2.imshow('anchors', image)
            cv2.waitKey(0)
Esempio n. 7
0
    def test_classify_anchors(self):
        DEBUG = False
        if DEBUG:
            image = np.ones((500, 500, 3))
            box_size = 60
            bounding_boxes = np.array(
                [[100, 100, 100 + box_size, 100 + box_size],
                 [300, 300, 300 + box_size, 300 + box_size]])
            for box in bounding_boxes:
                image[box[1]:box[3], box[0]:box[2]] = 0
            anchors = generate_anchors(image.shape).reshape(-1, 4)
            anchors_classes = classify_anchors(bounding_boxes, anchors)
            for anchor in anchors[anchors_classes == -1]:
                cv2.rectangle(image, (int(anchor[0]), int(anchor[1])),
                              (int(anchor[2]), int(anchor[3])), (0, 0, 255), 1)
            for anchor in anchors[anchors_classes == 0]:
                cv2.rectangle(image, (int(anchor[0]), int(anchor[1])),
                              (int(anchor[2]), int(anchor[3])), (255, 0, 0), 1)
            for anchor in anchors[anchors_classes == 1]:
                cv2.rectangle(image, (int(anchor[0]), int(anchor[1])),
                              (int(anchor[2]), int(anchor[3])), (0, 255, 0), 1)

            cv2.imshow('anchors', image)
            cv2.waitKey(0)
Esempio n. 8
0
    def build(self):
        ##############
        # Inputs
        ##############
        if self.mode == 'inference':
            # When evaluating batch size must be 1.!!!!!!
            assert self.config.batch_size == 1

            inp_img = KL.Input(shape=self.config.instance_size, name='inp_img')
            # Generate anchors for every batch,
            anchors = generate_anchors(self.config.total_stride,
                                       self.config.scales, self.config.ratios,
                                       self.config.score_size)
            anchors = np.broadcast_to(anchors, (self.config.batch_size, ) +
                                      anchors.shape)
            anchors = KL.Lambda(lambda x: K.variable(anchors),
                                name='inp_anchors')(inp_img)
            #inp_template = KL.Input(batch_shape = (1,)+self.config.template_size, name='inp_template')
            cls_template = KL.Lambda(
                lambda x: K.variable(self.config.cls_template),
                name='cls_template')(inp_img)
            bbox_template = KL.Lambda(
                lambda x: K.variable(self.config.bbox_template),
                name='bbox_template')(inp_img)
        elif self.mode == 'inference_init':
            # Input template's batch size is nailed to 1.
            inp_template = KL.Input(batch_shape=(1, ) +
                                    self.config.template_size,
                                    name='inp_template')
        ###########################
        # Encoder
        ###########################
        self.encoder = build_encoder()
        if self.mode == 'inference_init':
            ###########
            # Init
            ###########
            cls_filters = 2 * self.config.num_anchors * self.config.encoder_out_filter
            bbox_filters = 4 * self.config.num_anchors * self.config.encoder_out_filter
            encoded_template = self.encoder(inp_template)
            cls_template = KL.Conv2D(cls_filters, (3, 3),
                                     name='conv_cls1')(encoded_template)
            bbox_template = KL.Conv2D(bbox_filters, (3, 3),
                                      name='conv_r1')(encoded_template)
            outputs = [cls_template, bbox_template]
            return KM.Model([inp_template], outputs, name='Siamese_init')

        elif self.mode == 'inference':
            ###################
            # Inference
            ###################
            encoded_img = self.encoder(inp_img)
            cls_img = KL.Conv2D(self.config.encoder_out_filter, (3, 3),
                                name='conv_cls2')(encoded_img)
            bbox_img = KL.Conv2D(self.config.encoder_out_filter, (3, 3),
                                 name='conv_r2')(encoded_img)
            cls_out = CONV(self.config,
                           name='cls_nn_conv')([cls_img, cls_template])
            bbox_out = CONV(self.config,
                            name='box_nn_conv')([bbox_img, bbox_template])
            bbox_out = KL.Conv2D(4 * self.config.num_anchors,
                                 1,
                                 name='regress_adjust')(bbox_out)

            outputs = KL.Lambda(lambda x: eval_graph(*x, config=self.config),
                                name='Eval')([bbox_out, cls_out, anchors])
            return KM.Model([inp_img], outputs, name='Siamese_inference')
Esempio n. 9
0
    def __init__(self, inputs):
        """
        Region proposal net - inputs should be a list of [convolution model, tuple(image_h, image_w, image_scale)]
        """
        self.conv_in, self.im_info = inputs
        ## inputs is a convolutional net (i.e. VGG or ZFNet) before the fully-connected layers.
        super(RPN, self).__init__(inputs)
        in_filters = self.conv_in.output_size[1] # 512
        # RPN conv layers
        classes = 2
        n_anchors = 9
        min_size = 16
        anchor_size = 16
        nms_thresh = 0.7
        topN = 2000

        self.conv = Conv2D(inputs=self.conv_in,
                           n_filters=in_filters, filter_size=(3, 3), stride=(1, 1), activation='relu', border_mode='full')

        self.cls_score = Conv2D(inputs=self.conv,
                                n_filters=classes*n_anchors, filter_size=(1, 1), stride=(1, 1), activation='linear', border_mode='valid')

        # need to dimshuffle/flatten it down to get the softmax class probabilities for each class of `classes`
        cls_shape = self.cls_score.get_outputs().shape
        cls_score = self.cls_score.get_outputs().reshape((cls_shape[0], classes, -1, cls_shape[3]))
        # shuffle to (classes, batch, row, col)
        cls_shuffle = cls_score.dimshuffle((1, 0, 2, 3))
        # flatten to (classes, batch*row*col)
        cls_flat = cls_shuffle.flatten(2)
        # shuffle to (batch*row*col, classes)
        cls_flat = cls_flat.dimshuffle((1, 0))
        # softmax for probability!
        cls_probs_flat = T.nnet.softmax(cls_flat)
        # now shuffle back up to 4D output from cls_score (undo what we did)
        cls_probs = cls_probs_flat.dimshuffle((1, 0)).reshape(cls_shuffle.shape)
        cls_probs = cls_probs.dimshuffle((1, 0, 2, 3))
        self.cls_probs = cls_probs.reshape(cls_shape)

        self.bbox_pred = Conv2D(inputs=self.conv,
                                n_filters=4*n_anchors, filter_size=(1, 1), stride=(1, 1), activation='linear', border_mode='valid')


        ###############
        #  1. Generate proposals from bbox deltas and shifted anchors (ROIs)
        ###############
        anchors = theano.shared(generate_anchors(anchor_size))
        object_probs = self.cls_probs[:, n_anchors:, :, :]
        bbox_deltas = self.bbox_pred.get_outputs()
        # height and width of convolution features
        H, W = object_probs.shape[-2:]
        # essentially do numpy's meshgrid by tiling anchors across height and width of convolution features
        shift_x = (T.arange(0, W) * anchor_size).reshape((1, W))
        shift_y = (T.arange(0, H) * anchor_size).reshape((1, H))
        shift_x = T.tile(shift_x, (H, 1))
        shift_y = T.tile(shift_y.T, (1, W))
        shifts = T.stack([shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel()]).T
        # Enumerate all shifted anchors:
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = n_anchors
        K = shifts.shape[0]
        anchors = anchors.reshape((1, A, 4)) + shifts.reshape((K, 1, 4))
        anchors = anchors.reshape((K*A, 4))
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.dimshuffle((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the object scores:
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = object_probs.dimshuffle((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, self.im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = filter_boxes(proposals, min_size * self.im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        order = scores.ravel().argsort()[::-1]

        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 2000)
        # 8. return the top proposals (-> RoIs top)
        keep, self.updates = nms(T.concatenate([proposals, scores], axis=1), nms_thresh)
        keep = keep[:topN]
        self.proposals = proposals[keep, :]
        self.scores = scores[keep]

        self.outputs = [self.proposals, self.scores]
        # self.output_size = [self.cls_score.output_size, self.bbox_pred.output_size]

        self.params = {}
        self.params.update(p_dict("rpn_conv/3x3_", self.conv))
        self.params.update(p_dict("rpn_cls_score_", self.cls_score))
        self.params.update(p_dict("rpn_bbox_pred_", self.bbox_pred))
Esempio n. 10
0
    def __init__(self, inputs):
        """
        Region proposal net - inputs should be a list of [convolution model, tuple(image_h, image_w, image_scale)]
        """
        self.conv_in, self.im_info = inputs
        ## inputs is a convolutional net (i.e. VGG or ZFNet) before the fully-connected layers.
        super(RPN, self).__init__(inputs)
        in_filters = self.conv_in.output_size[1]  # 512
        # RPN conv layers
        classes = 2
        n_anchors = 9
        min_size = 16
        anchor_size = 16
        nms_thresh = 0.7
        topN = 2000

        self.conv = Conv2D(inputs=self.conv_in,
                           n_filters=in_filters,
                           filter_size=(3, 3),
                           stride=(1, 1),
                           activation='relu',
                           border_mode='full')

        self.cls_score = Conv2D(inputs=self.conv,
                                n_filters=classes * n_anchors,
                                filter_size=(1, 1),
                                stride=(1, 1),
                                activation='linear',
                                border_mode='valid')

        # need to dimshuffle/flatten it down to get the softmax class probabilities for each class of `classes`
        cls_shape = self.cls_score.get_outputs().shape
        cls_score = self.cls_score.get_outputs().reshape(
            (cls_shape[0], classes, -1, cls_shape[3]))
        # shuffle to (classes, batch, row, col)
        cls_shuffle = cls_score.dimshuffle((1, 0, 2, 3))
        # flatten to (classes, batch*row*col)
        cls_flat = cls_shuffle.flatten(2)
        # shuffle to (batch*row*col, classes)
        cls_flat = cls_flat.dimshuffle((1, 0))
        # softmax for probability!
        cls_probs_flat = T.nnet.softmax(cls_flat)
        # now shuffle back up to 4D output from cls_score (undo what we did)
        cls_probs = cls_probs_flat.dimshuffle(
            (1, 0)).reshape(cls_shuffle.shape)
        cls_probs = cls_probs.dimshuffle((1, 0, 2, 3))
        self.cls_probs = cls_probs.reshape(cls_shape)

        self.bbox_pred = Conv2D(inputs=self.conv,
                                n_filters=4 * n_anchors,
                                filter_size=(1, 1),
                                stride=(1, 1),
                                activation='linear',
                                border_mode='valid')

        ###############
        #  1. Generate proposals from bbox deltas and shifted anchors (ROIs)
        ###############
        anchors = theano.shared(generate_anchors(anchor_size))
        object_probs = self.cls_probs[:, n_anchors:, :, :]
        bbox_deltas = self.bbox_pred.get_outputs()
        # height and width of convolution features
        H, W = object_probs.shape[-2:]
        # essentially do numpy's meshgrid by tiling anchors across height and width of convolution features
        shift_x = (T.arange(0, W) * anchor_size).reshape((1, W))
        shift_y = (T.arange(0, H) * anchor_size).reshape((1, H))
        shift_x = T.tile(shift_x, (H, 1))
        shift_y = T.tile(shift_y.T, (1, W))
        shifts = T.stack([
            shift_x.ravel(),
            shift_y.ravel(),
            shift_x.ravel(),
            shift_y.ravel()
        ]).T
        # Enumerate all shifted anchors:
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = n_anchors
        K = shifts.shape[0]
        anchors = anchors.reshape((1, A, 4)) + shifts.reshape((K, 1, 4))
        anchors = anchors.reshape((K * A, 4))
        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.dimshuffle((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the object scores:
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = object_probs.dimshuffle((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, self.im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = filter_boxes(proposals, min_size * self.im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        order = scores.ravel().argsort()[::-1]

        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 2000)
        # 8. return the top proposals (-> RoIs top)
        keep, self.updates = nms(T.concatenate([proposals, scores], axis=1),
                                 nms_thresh)
        keep = keep[:topN]
        self.proposals = proposals[keep, :]
        self.scores = scores[keep]

        self.outputs = [self.proposals, self.scores]
        # self.output_size = [self.cls_score.output_size, self.bbox_pred.output_size]

        self.params = {}
        self.params.update(p_dict("rpn_conv/3x3_", self.conv))
        self.params.update(p_dict("rpn_cls_score_", self.cls_score))
        self.params.update(p_dict("rpn_bbox_pred_", self.bbox_pred))