Python BoundBox Beispiele, utils.bbox.BoundBox Python Beispiele

Beispiel #1

0

Datei anzeigen

    def __init__(self, 
        instances, #inst
        anchors,   #anchor
        labels,        #label
        downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3
        max_box_per_image=30,#max_box per image default =3
        batch_size=1,#default = 1 (sgd)
        min_net_size=320,#min net size
        max_net_size=608,#max net size
        shuffle=True, #shuffle
        jitter=True, #jitter (adding noise to the input data to increase module robust )
        norm=None   #norm
    ):
        self.instances          = instances
        self.batch_size         = batch_size
        self.labels             = labels
        self.downsample         = downsample
        self.max_box_per_image  = max_box_per_image
        self.min_net_size       = (min_net_size//self.downsample)*self.downsample
        self.max_net_size       = (max_net_size//self.downsample)*self.downsample
        self.shuffle            = shuffle
        self.jitter             = jitter
        self.norm               = norm
                            #--------------------------------------------------------#
        self.anchors            = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)]
        #create a boundbox class using the anchors
                            #--------------------------------------------------------#
        self.net_h              = 416
        self.net_w              = 416

        if shuffle: np.random.shuffle(self.instances)

Beispiel #2

0

Datei anzeigen

    def compare(self, data1, data2, thresh_iou):
        if data2['xmin'] <= data1['xmin'] <= data1['xmax'] <= data2['xmax'] \
           and data2['ymin'] <= data1['ymin'] <= data1['ymax'] <= data2['ymax']:
            return True
        if data1['xmin'] <= data2['xmin'] <= data2['xmax'] <= data1['xmax'] \
           and data1['ymin'] <= data2['ymin'] <= data2['ymax'] <= data1['ymax']:
            return True
        box1 = BoundBox(data1['xmin'], data1['ymin'], data1['xmax'], data1['ymax'])
        box2 = BoundBox(data2['xmin'], data2['ymin'], data2['xmax'], data2['ymax'])

        iou = bbox_iou(box1, box2)

        if iou > thresh_iou:
            return True
        else:
            return False

Beispiel #3

0

Datei anzeigen

    def __init__(self, 
        instances,            # 训练样本，其结构参见 train.py 之 create_training_instances()
        anchors,              # 先验框，[55,69, 75,234, 133,240, 136,129, 142,363, 203,290, 228,184, 285,359, 341,260]
        labels,               # 通常就是config['model']['labels']，比如["raccoon"]；如果没有指定，则为样本图像中的所有对象。
        downsample=32,        # ratio between network input's size and network output's size, 32 for YOLOv3
        max_box_per_image=30, # 每张图像中最多有几个对象。是根据样本中的对象标注信息统计的来。
        batch_size=1,
        min_net_size=320,     # config['model']['min_input_size']，输入图像的最小尺寸（宽和高）
        max_net_size=608,     # config['model']['max_input_size']，输入图像的最大尺寸（宽和高）
        shuffle=True, 
        jitter=True, 
        norm=None
    ):
        self.instances          = instances
        self.batch_size         = batch_size
        self.labels             = labels
        self.downsample         = downsample
        self.max_box_per_image  = max_box_per_image
        self.min_net_size       = (min_net_size//self.downsample)*self.downsample
        self.max_net_size       = (max_net_size//self.downsample)*self.downsample
        self.shuffle            = shuffle
        self.jitter             = jitter
        self.norm               = norm
        self.anchors            = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)] # 9个BoundBox
        self.net_h              = 416  
        self.net_w              = 416

        if shuffle: np.random.shuffle(self.instances)

Beispiel #4

0

Datei anzeigen

    def __init__(self,
                 dataset_path: str,
                 simplify_classes: bool = False,
                 batch_size: int = 1,
                 max_image_side_length: int = 512,
                 augmentation: Augmenter = None,
                 center_color_to_imagenet: bool = False,
                 image_scale_mode: str = 'just',
                 pre_image_scale=0.5):

        super(Yolo_3Dataset,
              self).__init__(dataset_path, simplify_classes, batch_size,
                             max_image_side_length, augmentation, False,
                             'squash', pre_image_scale)

        self.anchors = [
            BoundBox(0, 0, self.anchors[2 * i], self.anchors[2 * i + 1])
            for i in range(len(self.anchors) // 2)
        ]

        self.get_item = BatchGenerator.__getitem__.__get__(self, Yolo_3Dataset)

        self.instances = self.get_instances()
        self.labels = ('Sharp Force', 'Blunt Force')
        self.downsample = 32
        self.max_box_per_image = 30
        self.min_net_size = max_image_side_length
        self.max_net_size = max_image_side_length
        self.shuffle = False
        self.jitter = 0.0
        self.norm = normalize

Beispiel #5

0

Datei anzeigen

Datei: generator.py Projekt: icerove/keras-yolo3

    def __init__(self, 
        instances, 
        anchors,   
        labels,        
        downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3
        max_box_per_image=30,
        batch_size=1,
        min_net_size=320,
        max_net_size=608,    
        shuffle=True, 
        jitter=True, 
        norm=None
    ):
        self.instances          = instances
        self.batch_size         = batch_size
        self.labels             = labels
        self.downsample         = downsample
        self.max_box_per_image  = max_box_per_image
        self.min_net_size       = (min_net_size//self.downsample)*self.downsample
        self.max_net_size       = (max_net_size//self.downsample)*self.downsample
        self.shuffle            = shuffle
        self.jitter             = jitter
        self.norm               = norm
        self.anchors            = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)]
        self.net_h              = 416  
        self.net_w              = 416

        if shuffle: np.random.shuffle(self.instances)

Beispiel #6

0

Datei anzeigen

Datei: dataloader.py Projekt: maveltoz/YOLOv3

 def __init__(
     self,
     train_list,
     label_list,
     anchors,
     max_box_per_image=42,
     batch_size=1,
 ):
     self.train_list = train_list
     self.label_list = label_list
     self.batch_size = batch_size
     self.max_box_per_image = max_box_per_image
     self.anchors = [
         BoundBox(0, 0, anchors[2 * i], anchors[2 * i + 1])
         for i in range(len(anchors) // 2)
     ]
     self.net_h = 416
     self.net_w = 416
     self.downsample = 32
     self.min_input_size = 224
     self.max_input_size = 480
     self.min_net_size = (self.min_input_size //
                          self.downsample) * self.downsample
     self.max_net_size = (self.max_input_size //
                          self.downsample) * self.downsample
     self.jitter = 0.3
     self.on_epoch_end()
     np.random.shuffle(self.train_list)

Beispiel #7

0

Datei anzeigen

Datei: generator.py Projekt: jornb/keras-yolo3

    def __init__(self, 
        instances, 
        anchors,   
        labels,        
        downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3
        max_box_per_image=30,
        batch_size=1,
        min_net_size=320,
        max_net_size=608,
        shuffle=True,
        norm=None,
        explicit_net_size=None,
        num_scales=3,
        aug_jitter=0.3,
        aug_scale=(0.25, 2.0),
        aug_hue=18,
        aug_saturation=1.5,
        aug_exposure=1.5,
        aug_gray=False,
        aug_flip=True,
        aug_pad=True
    ):
        self.instances          = instances
        self.batch_size         = batch_size
        self.labels             = labels
        self.downsample         = downsample
        self.max_box_per_image  = max_box_per_image
        self.min_net_size       = (min_net_size//self.downsample)*self.downsample
        self.max_net_size       = (max_net_size//self.downsample)*self.downsample
        self.shuffle            = shuffle
        self.norm               = norm
        self.anchors            = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)]
        self.net_h              = 416  
        self.net_w              = 416
        self.explicit_net_size  = explicit_net_size
        self.num_scales         = num_scales
        
        self.aug_jitter          = aug_jitter or 0.0
        self.aug_scale           = aug_scale or (1.0, 1.0)
        self.aug_hue             = aug_hue or 0.0
        self.aug_saturation      = aug_saturation or 1.0
        self.aug_exposure        = aug_exposure or 1.0
        self.aug_gray            = aug_gray
        self.aug_flip            = aug_flip
        self.aug_pad             = aug_pad

        if shuffle: np.random.shuffle(self.instances)

Beispiel #8

0

Datei anzeigen

def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
    grid_h, grid_w = netout.shape[:2]
    nb_box = 3
    netout = netout.reshape((grid_h, grid_w, nb_box, -1))
    nb_class = netout.shape[-1] - 5

    boxes = []

    netout[..., :2] = _sigmoid(netout[..., :2])
    netout[..., 4] = _sigmoid(netout[..., 4])
    netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
    netout[..., 5:] *= netout[..., 5:] > obj_thresh

    for i in range(grid_h * grid_w):
        row = i // grid_w
        col = i % grid_w

        for b in range(nb_box):
            # 4th element is objectness score
            objectness = netout[row, col, b, 4]

            if (objectness <= obj_thresh): continue

            # first 4 elements are x, y, w, and h
            x, y, w, h = netout[row, col, b, :4]

            x = (col + x) / grid_w  # center position, unit: image width
            y = (row + y) / grid_h  # center position, unit: image height
            w = anchors[2 * b + 0] * np.exp(w) / net_w  # unit: image width
            h = anchors[2 * b + 1] * np.exp(h) / net_h  # unit: image height

            # last elements are class probabilities
            classes = netout[row, col, b, 5:]

            box = BoundBox(x - w / 2, y - h / 2, x + w / 2, y + h / 2, objectness, classes)

            boxes.append(box)

    return boxes

Beispiel #9

0

Datei anzeigen

    def __init__(
            self,
            instances,
            anchors,
            labels,
            downsample=32,  # ratio between network input's size and network output's size, 32 for YOLOv3
            max_box_per_image=30,
            batch_size=1,
            min_net_size=320,
            max_net_size=608,
            shuffle=True,
            jitter=True,
            norm=None):
        self.instances = instances
        self.batch_size = batch_size
        self.labels = labels
        self.downsample = downsample
        self.max_box_per_image = max_box_per_image
        self.min_net_size = (min_net_size // self.downsample) * self.downsample
        self.max_net_size = (max_net_size // self.downsample) * self.downsample
        self.shuffle = shuffle
        self.jitter = jitter
        self.norm = norm
        self.anchors = [
            BoundBox(0, 0, anchors[2 * i], anchors[2 * i + 1])
            for i in range(len(anchors) // 2)
        ]
        self.net_h = 416
        self.net_w = 416

        if shuffle: np.random.shuffle(self.instances)

        # A jugar to prevent me from changing all the xml annotations error:
        # Temp Jugar:
        for instance in self.instances:
            instance['filename'] = instance['filename'] + '.jpg'

Beispiel #10

0

Datei anzeigen

Datei: generator.py Projekt: icerove/keras-yolo3

    def __getitem__(self, idx):
        # get image input size, change every 10 batches
        net_h, net_w = self._get_net_size(idx)
        base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample

        # determine the first and the last indices of the batch
        l_bound = idx*self.batch_size
        r_bound = (idx+1)*self.batch_size

        if r_bound > len(self.instances):
            r_bound = len(self.instances)
            l_bound = r_bound - self.batch_size

        x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3))             # input images
        t_batch = np.zeros((r_bound - l_bound, 1, 1, 1,  self.max_box_per_image, 4))   # list of groundtruth boxes

        # initialize the inputs and the outputs
        yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h,  1*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 1
        yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h,  2*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 2
        yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h,  4*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 3
        yolos = [yolo_3, yolo_2, yolo_1]

        dummy_yolo_1 = np.zeros((r_bound - l_bound, 1))
        dummy_yolo_2 = np.zeros((r_bound - l_bound, 1))
        dummy_yolo_3 = np.zeros((r_bound - l_bound, 1))
        
        instance_count = 0
        true_box_index = 0

        # do the logic to fill in the inputs and the output
        for train_instance in self.instances[l_bound:r_bound]:
            # augment input image and fix object's position and size
            img, all_objs = self._aug_image(train_instance, net_h, net_w)
            
            for obj in all_objs:
                # find the best anchor box for this object
                max_anchor = None                
                max_index  = -1
                max_iou    = -1

                shifted_box = BoundBox(0, 
                                       0,
                                       obj['xmax']-obj['xmin'],                                                
                                       obj['ymax']-obj['ymin'])    
                
                for i in range(len(self.anchors)):
                    anchor = self.anchors[i]
                    iou    = bbox_iou(shifted_box, anchor)

                    if max_iou < iou:
                        max_anchor = anchor
                        max_index  = i
                        max_iou    = iou                
                
                # determine the yolo to be responsible for this bounding box
                yolo = yolos[max_index//3]
                grid_h, grid_w = yolo.shape[1:3]
                
                # determine the position of the bounding box on the grid
                center_x = .5*(obj['xmin'] + obj['xmax'])
                center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x
                center_y = .5*(obj['ymin'] + obj['ymax'])
                center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y
                
                # determine the sizes of the bounding box
                w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) # t_w
                h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) # t_h

                box = [center_x, center_y, w, h]

                # determine the index of the label
                obj_indx = self.labels.index(obj['name'])  

                # determine the location of the cell responsible for this object
                grid_x = int(np.floor(center_x))
                grid_y = int(np.floor(center_y))

                # assign ground truth x, y, w, h, confidence and class probs to y_batch
                yolo[instance_count, grid_y, grid_x, max_index%3]      = 0
                yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box
                yolo[instance_count, grid_y, grid_x, max_index%3, 4  ] = 1.
                yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1

                # assign the true box to t_batch
                true_box = [center_x, center_y, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin']]
                t_batch[instance_count, 0, 0, 0, true_box_index] = true_box

                true_box_index += 1
                true_box_index  = true_box_index % self.max_box_per_image    

            # assign input image to x_batch
            if self.norm != None: 
                x_batch[instance_count] = self.norm(img)
            else:
                # plot image and bounding boxes for sanity check
                for obj in all_objs:
                    cv2.rectangle(img, (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3)
                    cv2.putText(img, obj['name'], 
                                (obj['xmin']+2, obj['ymin']+12), 
                                0, 1.2e-3 * img.shape[0], 
                                (0,255,0), 2)
                
                x_batch[instance_count] = img

            # increase instance counter in the current batch
            instance_count += 1                 
                
        return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]

Beispiel #11

0

Datei anzeigen

    def get_x_y(self, indices: List[int], batch_no: int = 0):
        """
        Return an image an its corresponding ground truth boxes
        :param indices: List of indices to return from dataset
        :return: Tuple of images, boxes an zero array
        """

        #         return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]

        num_items = len(indices)
        # get image input size, change every 10 batches
        net_h, net_w = Yolo_3Dataset.net_h, Yolo_3Dataset.net_w
        base_grid_h, base_grid_w = net_h // self.downsample, net_w // self.downsample

        x_batch = np.zeros((num_items, net_h, net_w, 3))  # input images
        t_batch = np.zeros((num_items, 1, 1, 1, self.max_box_per_image,
                            4))  # list of groundtruth boxes

        # initialize the inputs and the outputs
        yolo_1 = np.zeros(
            (num_items, 1 * base_grid_h, 1 * base_grid_w,
             len(self.anchors) // 3,
             4 + 1 + len(self.labels)))  # desired network output 1
        yolo_2 = np.zeros(
            (num_items, 2 * base_grid_h, 2 * base_grid_w,
             len(self.anchors) // 3,
             4 + 1 + len(self.labels)))  # desired network output 2
        yolo_3 = np.zeros(
            (num_items, 4 * base_grid_h, 4 * base_grid_w,
             len(self.anchors) // 3,
             4 + 1 + len(self.labels)))  # desired network output 3
        yolos = [yolo_3, yolo_2, yolo_1]

        dummy_yolo_1 = np.zeros((num_items, 1))
        dummy_yolo_2 = np.zeros((num_items, 1))
        dummy_yolo_3 = np.zeros((num_items, 1))

        instance_count = 0
        true_box_index = 0

        # do the logic to fill in the inputs and the output
        for train_instance in [self.instances[i] for i in indices]:
            # augment input image and fix object's position and size
            img, all_objs = self._aug_image(train_instance, net_h, net_w)

            # ============================
            # draw = img.copy()
            # ============================

            for obj in all_objs:
                # find the best anchor box for this object
                max_anchor = None
                max_index = -1
                max_iou = -1

                shifted_box = BoundBox(0, 0, obj['xmax'] - obj['xmin'],
                                       obj['ymax'] - obj['ymin'])

                for i in range(len(self.anchors)):
                    anchor = self.anchors[i]
                    iou = bbox_iou(shifted_box, anchor)

                    if max_iou < iou:
                        max_anchor = anchor
                        max_index = i
                        max_iou = iou

                        # determine the yolo to be responsible for this bounding box
                yolo = yolos[max_index // 3]
                grid_h, grid_w = yolo.shape[1:3]

                # determine the position of the bounding box on the grid
                center_x = .5 * (obj['xmin'] + obj['xmax'])
                center_x = center_x / float(net_w) * grid_w  # sigma(t_x) + c_x
                center_y = .5 * (obj['ymin'] + obj['ymax'])
                center_y = center_y / float(net_h) * grid_h  # sigma(t_y) + c_y

                # determine the sizes of the bounding box
                w = np.log((obj['xmax'] - obj['xmin']) /
                           float(max_anchor.xmax))  # t_w
                h = np.log((obj['ymax'] - obj['ymin']) /
                           float(max_anchor.ymax))  # t_h

                box = [center_x, center_y, w, h]

                # determine the index of the label
                obj_indx = self.labels.index(obj['name'])

                # determine the location of the cell responsible for this object
                grid_x = int(np.floor(center_x))
                grid_y = int(np.floor(center_y))

                # assign ground truth x, y, w, h, confidence and class probs to y_batch
                yolo[instance_count, grid_y, grid_x, max_index % 3] = 0
                yolo[instance_count, grid_y, grid_x, max_index % 3, 0:4] = box
                yolo[instance_count, grid_y, grid_x, max_index % 3, 4] = 1.
                yolo[instance_count, grid_y, grid_x, max_index % 3,
                     5 + obj_indx] = 1

                # assign the true box to t_batch
                true_box = [
                    center_x, center_y, obj['xmax'] - obj['xmin'],
                    obj['ymax'] - obj['ymin']
                ]
                t_batch[instance_count, 0, 0, 0, true_box_index] = true_box

                # =========================
                # draw_box(draw, [int(obj['ymin']), int(obj['xmin']), int(obj['ymax']), int(obj['xmax'])], color=(255, 200, 0))
                # ==========================

                true_box_index += 1
                true_box_index = true_box_index % self.max_box_per_image

                # assign input image to x_batch

            # ============================
            # from matplotlib import pyplot as plt
            # plt.figure(figsize=(20,20))
            # plt.imshow(draw.astype('uint8'))
            # plt.show()
            # exit(0)
            # ============================

            if self.norm != None:
                x_batch[instance_count] = self.norm(img)
            else:
                # plot image and bounding boxes for sanity check
                for obj in all_objs:
                    cv2.rectangle(img, (obj['xmin'], obj['ymin']),
                                  (obj['xmax'], obj['ymax']), (255, 0, 0), 3)
                    cv2.putText(img, obj['name'],
                                (obj['xmin'] + 2, obj['ymin'] + 12), 0,
                                1.2e-3 * img.shape[0], (0, 255, 0), 2)

                x_batch[instance_count] = img

            # increase instance counter in the current batch
            instance_count += 1

        return [x_batch, t_batch, yolo_1, yolo_2,
                yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]

Beispiel #12

0

Datei anzeigen

    def __getitem__(self, idx):
        # get image input size, change every 10 batches
        # net_h, net_w 是输入图像的高宽，每10个batch随机变换一次
        net_h, net_w = self._get_net_size(idx)
        # 32倍下采样的特征图的高宽
        base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample

        # determine the first and the last indices of the batch
        l_bound = idx*self.batch_size
        r_bound = (idx+1)*self.batch_size

        # 这个感觉不是很合理
        if r_bound > len(self.instances):
            r_bound = len(self.instances)
            l_bound = r_bound - self.batch_size

        # 准备样本，一个batch的输入图像
        x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3))             # input images
        # 每个图像中的所有对象边框，shape=(batch,1,1,1,一个图像中最多几个对象,4个坐标)
        t_batch = np.zeros((r_bound - l_bound, 1, 1, 1,  self.max_box_per_image, 4))   # list of groundtruth boxes

        # initialize the inputs and the outputs，分别对应32、16、8倍下采样的输出特征图
        # [batch_size，特征图高，特征图宽，anchor数量3，边框坐标4+置信度1+预测对象类别数]
        yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h,  1*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 1
        yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h,  2*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 2
        yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h,  4*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 3

        # 8、16、32倍下采样对应到先验框 [55,69, 75,234, 133,240,   136,129, 142,363, 203,290,   228,184, 285,359, 341,260]
        yolos = [yolo_3, yolo_2, yolo_1]

        dummy_yolo_1 = np.zeros((r_bound - l_bound, 1))
        dummy_yolo_2 = np.zeros((r_bound - l_bound, 1))
        dummy_yolo_3 = np.zeros((r_bound - l_bound, 1))

        instance_count = 0  # batch中的第几张图像
        true_box_index = 0  # 图像中的第几个对象

        # do the logic to fill in the inputs and the output
        for train_instance in self.instances[l_bound:r_bound]:
            # augment input image and fix object's position and size
            img, all_objs = self._aug_image(train_instance, net_h, net_w)
            
            for obj in all_objs:
                # find the best anchor box for this object
                max_anchor = None  # IOU最大的那个anchor
                max_index  = -1     # IOU最大的那个anchor 的index
                max_iou    = -1

                shifted_box = BoundBox(0, 
                                       0,
                                       obj['xmax']-obj['xmin'],                                                
                                       obj['ymax']-obj['ymin'])    
                
                for i in range(len(self.anchors)):
                    anchor = self.anchors[i]
                    iou    = bbox_iou(shifted_box, anchor)

                    if max_iou < iou:
                        max_anchor = anchor
                        max_index  = i
                        max_iou    = iou                
                
                # determine the yolo to be responsible for this bounding box
                # 3种尺度的特征图，与当前对象最匹配的那种anchor，所属的那个特征图的tensor，就是这里的yolo
                yolo = yolos[max_index//3]
                grid_h, grid_w = yolo.shape[1:3]
                
                # determine the position of the bounding box on the grid
                # 对象的边框中心坐标 被转换到 特征图网格上，其值相当于 期望预测的坐标 sigma(t_x) + c_x，sigma(t_y) + c_y
                center_x = .5*(obj['xmin'] + obj['xmax'])
                center_x = center_x / float(net_w) * grid_w # 期望预测的坐标 sigma(t_x) + c_x = center_x
                center_y = .5*(obj['ymin'] + obj['ymax'])
                center_y = center_y / float(net_h) * grid_h # 期望预测的坐标 sigma(t_y) + c_y = center_y
                
                # determine the sizes of the bounding box
                w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) # t_w，注：truth_w = anchor_w * exp(t_w)
                h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) # t_h，注：truth_h = anchor_h * exp(t_h)

                box = [center_x, center_y, w, h]

                # determine the index of the label
                obj_indx = self.labels.index(obj['name'])  

                # determine the location of the cell responsible for this object
                grid_x = int(np.floor(center_x))
                grid_y = int(np.floor(center_y))

                # assign ground truth x, y, w, h, confidence and class probs to y_batch
                # max_index%3 对应到最佳匹配的anchor，一个对象仅有一个anchor负责检测
                yolo[instance_count, grid_y, grid_x, max_index%3]      = 0
                yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box      # 边框坐标
                yolo[instance_count, grid_y, grid_x, max_index%3, 4  ] = 1.       # 边框置信度
                yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1 # 对象分类

                # assign the true box to t_batch. true_box的x、y是特征图上的坐标（比如13*13特征图），宽和高是原始图像上对象的宽和高
                true_box = [center_x, center_y, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin']]
                t_batch[instance_count, 0, 0, 0, true_box_index] = true_box
                # 因为有 instance_count 区分不同的图像，true_box_index 应该只需在每次图像切换时 true_box_index=0 即可。这里在整个batch累加true_box_index，暂不确定是否有特别的用意。
                true_box_index += 1
                true_box_index  = true_box_index % self.max_box_per_image    

            # assign input image to x_batch
            if self.norm != None: 
                x_batch[instance_count] = self.norm(img)
            else:
                # plot image and bounding boxes for sanity check
                for obj in all_objs:
                    cv2.rectangle(img, (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3)
                    cv2.putText(img, obj['name'], 
                                (obj['xmin']+2, obj['ymin']+12), 
                                0, 1.2e-3 * img.shape[0], 
                                (0,255,0), 2)
                
                x_batch[instance_count] = img

            # increase instance counter in the current batch
            instance_count += 1                 
                
        return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]

Beispiel #13

0

Datei anzeigen

Datei: sample_inference.py Projekt: zzszmyf/cortex

def main(img_url_src, yolov3_endpoint, crnn_endpoint, output):

    # get the image in bytes representation
    image = get_url_image(img_url_src)
    image_bytes = image_to_jpeg_bytes(image)

    # encode image
    image_enc = base64.b64encode(image_bytes).decode("utf-8")
    image_dump = json.dumps({"img": image_enc})

    # make yolov3 api request
    resp = requests.post(yolov3_endpoint,
                         data=image_dump,
                         headers={"content-type": "application/json"})

    # parse response
    boxes_raw = resp.json()["boxes"]
    boxes = []
    for b in boxes_raw:
        box = BoundBox(*b)
        boxes.append(box)

    # purge bounding boxes with a low confidence score
    confidence_score = 0.8
    aux = []
    for b in boxes:
        label = -1
        for i in range(len(b.classes)):
            if b.classes[i] > confidence_score:
                label = i
        if label >= 0:
            aux.append(b)
    boxes = aux
    del aux

    dec_words = []
    if len(boxes) > 0:
        # create set of images of the detected license plates
        lps = []
        for b in boxes:
            lp = image[b.ymin:b.ymax, b.xmin:b.xmax]
            jpeg = image_to_jpeg_nparray(lp)
            lps.append(jpeg)

        # encode the cropped license plates
        lps = pickle.dumps(lps, protocol=0)
        lps_enc = base64.b64encode(lps).decode("utf-8")
        lps_dump = json.dumps({"imgs": lps_enc})

        # make crnn api request
        resp = requests.post(crnn_endpoint,
                             data=lps_dump,
                             headers={"content-type": "application/json"})

        # parse the response
        dec_lps = resp.json()["license-plates"]
        dec_lps = reorder_recognized_words(dec_lps)
        for dec_lp in dec_lps:
            dec_words.append([word[0] for word in dec_lp])

    if len(dec_words) == 0:
        dec_words = [[] for i in range(len(boxes))]

    # draw predictions as overlays on the source image
    draw_image = draw_boxes(image,
                            boxes,
                            overlay_text=dec_words,
                            labels=["LP"],
                            obj_thresh=confidence_score)

    # and save it to disk
    cv2.imwrite(output, draw_image)

Beispiel #14

0

Datei anzeigen

    def __init__(
            self,
            instances,
            anchors,  # for Feature Pyramid Networks we need 9 anchors, 3 for each scale
            labels,
            downsample=32,  # ratio between network input's size and network output's size, 32 for YOLOv1-3
            max_box_per_image=30,
            batch_size=1,
            # min_net_size=224,
            # max_net_size=224,
            shuffle=True,
            jitter=True,
            norm=None):
        self.instances = instances
        self.batch_size = batch_size
        self.labels = labels
        self.downsample = downsample
        self.max_box_per_image = max_box_per_image
        # self.min_net_size = (min_net_size // self.downsample) * self.downsample
        # self.max_net_size = (max_net_size // self.downsample) * self.downsample
        self.shuffle = shuffle
        self.jitter = jitter
        self.norm = norm
        self.anchors = [
            BoundBox(0, 0, anchors[2 * i], anchors[2 * i + 1])
            for i in range(len(anchors) // 2)
        ]
        self.net_h = 224
        self.net_w = 224

        # augmentors by https://github.com/aleju/imgaug
        sometimes = lambda aug: iaa.Sometimes(0.5, aug)

        # Define our sequence of augmentation steps that will be applied to every image
        # All augmenters with per_channel=0.5 will sample one value _per image_
        # in 50% of all cases. In all other cases they will sample new values
        # _per channel_.
        self.aug_pipe = iaa.Sequential(
            [
                sometimes(iaa.Affine()),
                # execute 0 to 5 of the following (less important) augmenters per image
                # don't execute all of them, as that would often be way too strong
                iaa.SomeOf(
                    (0, 5),
                    [
                        iaa.OneOf([
                            iaa.GaussianBlur(
                                (0, 3.0)
                            ),  # blur images with a sigma between 0 and 3.0
                            iaa.AverageBlur(k=(2, 7)),
                            # blur image using local means with kernel sizes between 2 and 7
                            iaa.MedianBlur(k=(3, 11)),
                            # blur image using local medians with kernel sizes between 2 and 7
                        ]),
                        iaa.Sharpen(alpha=(0, 1.0),
                                    lightness=(0.75, 1.5)),  # sharpen images
                        iaa.AdditiveGaussianNoise(
                            loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5),
                        # add gaussian noise to images
                        iaa.OneOf([
                            iaa.Dropout(
                                (0.01, 0.1), per_channel=0.5
                            ),  # randomly remove up to 10% of the pixels
                            # iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2),
                        ]),
                        # iaa.Invert(0.05, per_channel=True), # invert color channels
                        iaa.Add((-10, 10), per_channel=0.5),
                        # change brightness of images (by -10 to 10 of original value)
                        iaa.Multiply((0.5, 1.5), per_channel=0.5),
                        # change brightness of images (50-150% of original value)
                        iaa.ContrastNormalization(
                            (0.5, 2.0),
                            per_channel=0.5),  # improve or worsen the contrast
                    ],
                    random_order=True)
            ],
            random_order=True)

        if shuffle:
            np.random.shuffle(self.instances)

Beispiel #15

0

Datei anzeigen

Datei: dataloader.py Projekt: maveltoz/YOLOv3

    def __getitem__(self, idx):
        net_h, net_w = self._get_net_size(idx)
        base_grid_h, base_grid_w = net_h // self.downsample, net_w // self.downsample

        l_bound = idx * self.batch_size
        r_bound = (idx + 1) * self.batch_size

        if r_bound > len(self.train_list):
            r_bound = len(self.train_list)
            l_bound = r_bound - self.batch_size

        x_batch = np.zeros((self.batch_size, net_h, net_w, 3))
        t_batch = np.zeros(
            (self.batch_size, 1, 1, 1, self.max_box_per_image, 4))

        yolo_1 = np.zeros(
            (self.batch_size, 1 * base_grid_h, 1 * base_grid_w,
             len(self.anchors) // 3, 4 + 1 + len(self.label_list)))
        yolo_2 = np.zeros(
            (self.batch_size, 2 * base_grid_h, 2 * base_grid_w,
             len(self.anchors) // 3, 4 + 1 + len(self.label_list)))
        yolo_3 = np.zeros(
            (self.batch_size, 4 * base_grid_h, 4 * base_grid_w,
             len(self.anchors) // 3, 4 + 1 + len(self.label_list)))

        yolos = [yolo_3, yolo_2, yolo_1]

        dummy_yolo_1 = np.zeros((self.batch_size, 1))
        dummy_yolo_2 = np.zeros((self.batch_size, 1))
        dummy_yolo_3 = np.zeros((self.batch_size, 1))

        true_box_index = 0

        for instance_count, train_instace in enumerate(
                self.train_list[l_bound:r_bound]):
            aug_img, aug_objs = self.augmentation(train_instace, net_h, net_w)

            for obj in aug_objs:
                max_anchor = None
                max_index = -1
                max_iou = -1

                shifted_box = BoundBox(0, 0, obj['xmax'] - obj['xmin'],
                                       obj['ymax'] - obj['ymin'])

                for i in range(len(self.anchors)):
                    anchor = self.anchors[i]
                    iou = bbox_iou(shifted_box, anchor)

                    if max_iou < iou:
                        max_anchor = anchor
                        max_index = i
                        max_iou = iou

                yolo = yolos[max_index // 3]
                grid_h, grid_w = yolo.shape[1:3]

                center_x = .5 * (obj['xmin'] + obj['xmax'])
                center_x = center_x / float(net_w) * grid_w
                center_y = .5 * (obj['ymin'] + obj['ymax'])
                center_y = center_y / float(net_h) * grid_h

                w = np.log(
                    (obj['xmax'] - obj['xmin']) / float(max_anchor.xmax))
                h = np.log(
                    (obj['ymax'] - obj['ymin']) / float(max_anchor.ymax))

                box = [center_x, center_y, w, h]

                obj_indx = self.label_list.index(obj['name'])

                grid_x = int(np.floor(center_x))
                grid_y = int(np.floor(center_y))

                yolo[instance_count, grid_y, grid_x, max_index % 3] = 0
                yolo[instance_count, grid_y, grid_x, max_index % 3, 0:4] = box
                yolo[instance_count, grid_y, grid_x, max_index % 3, 4] = 1.
                yolo[instance_count, grid_y, grid_x, max_index % 3,
                     5 + obj_indx] = 1

                true_box = [
                    center_x, center_y, obj['xmax'] - obj['xmin'],
                    obj['ymax'] - obj['ymin']
                ]
                t_batch[instance_count, 0, 0, 0, true_box_index] = true_box

                true_box_index += 1
                true_box_index = true_box_index % self.max_box_per_image

            x_batch[instance_count] = normalize(aug_img)

        return [x_batch, t_batch, yolo_1, yolo_2,
                yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]

Beispiel #16

0

Datei anzeigen

Datei: import_dataset.py Projekt: Yatinanand99/No_NumberPlate_will_be_left

            baby5 = SubElement(child, 'bndbox')
            baby6 = SubElement(baby5, 'xmin')
            baby6.text = str(objecty[1])
            baby7 = SubElement(baby5, 'ymin')
            baby7.text = str(objecty[2])
            baby8 = SubElement(baby5, 'xmax')
            baby8.text = str(objecty[3])
            baby9 = SubElement(baby5, 'ymax')
            baby9.text = str(objecty[4])
        tree.write('{}{}.xml'.format(image_path_1,key[:-5]), pretty_print=True)


i = 0
for x in loaded_json_file:
    y = json.loads(x)
    i+=1
    img_loc_url = y["content"]
    save_loc = str("C:/Games/Projects/TCS_Project/train_imgs/"+str(i)+".jpeg")
    urllib.request.urlretrieve(img_loc_url, save_loc)
    image_height = y["annotation"][0]["imageHeight"]
    image_width = y["annotation"][0]["imageWidth"]
    xmin,ymin = y["annotation"][0]["points"][0]["x"],y["annotation"][0]["points"][0]["y"]
    xmax,ymax = y["annotation"][0]["points"][1]["x"],y["annotation"][0]["points"][1]["y"]
    labels = y["annotation"][0]["label"]
    box = [BoundBox(int(xmin*image_width), int(ymin*image_height), int(xmax*image_width), int(ymax*image_height),None,[1])]
    write_annotations(save_loc,box,labels,0.5,int(image_height),int(image_width))

Beispiel #17

0

Datei anzeigen

    def cloud_infer(self):
        """
        Main method that runs in the loop.
        """
        try:
            data = self.in_queue.get_nowait()
        except queue.Empty:
            # logger.warning("no data available for worker")
            return

        #############################

        # extract frame
        frame_num = data["frame_num"]
        img = data["jpeg"]
        # preprocess/compress the image
        image = image_from_bytes(img)
        reduced = compress_image(image)
        byte_im = image_to_jpeg_bytes(reduced)
        # encode image
        img_enc = base64.b64encode(byte_im).decode("utf-8")
        img_dump = json.dumps({"img": img_enc})

        # make inference request
        resp = self.yolov3_api_request(img_dump)
        if not resp:
            return

        #############################

        # parse response
        r_dict = resp.json()
        boxes_raw = r_dict["boxes"]
        boxes = []
        for b in boxes_raw:
            box = BoundBox(*b)
            boxes.append(box)

        # purge bounding boxes with a low confidence score
        aux = []
        for b in boxes:
            label = -1
            for i in range(len(b.classes)):
                if b.classes[i] > self.yolov3_obj_thresh:
                    label = i
            if label >= 0:
                aux.append(b)
        boxes = aux
        del aux

        # also scale the boxes for later uses
        camera_source_width = image.shape[1]
        boxes640 = self.scale_bbox(boxes, self.yolov3_input_size_px,
                                   self.bounding_boxes_upscale_px)
        boxes_source = self.scale_bbox(boxes, self.yolov3_input_size_px,
                                       camera_source_width)

        #############################

        # recognize the license plates in case
        # any bounding boxes have been detected
        dec_words = []
        if len(boxes) > 0 and len(self.api_endpoint_crnn) > 0:
            # create set of images of the detected license plates
            lps = []
            try:
                for b in boxes_source:
                    lp = image[b.ymin:b.ymax, b.xmin:b.xmax]
                    jpeg = image_to_jpeg_nparray(
                        lp, [int(cv2.IMWRITE_JPEG_QUALITY), self.crnn_quality])
                    lps.append(jpeg)
            except:
                logger.warning("encountered error while converting to jpeg")
                pass

            lps = pickle.dumps(lps, protocol=0)
            lps_enc = base64.b64encode(lps).decode("utf-8")
            lps_dump = json.dumps({"imgs": lps_enc})

            # make request to rcnn API
            dec_lps = self.rcnn_api_request(lps_dump)
            dec_lps = self.reorder_recognized_words(dec_lps)
            for dec_lp in dec_lps:
                dec_words.append([word[0] for word in dec_lp])

        if len(dec_words) > 0:
            logger.info("Detected the following words: {}".format(dec_words))
        else:
            dec_words = [[] for i in range(len(boxes))]

        #############################

        # draw detections
        upscaled = resize_image(image, self.bounding_boxes_upscale_px)
        draw_image = draw_boxes(
            upscaled,
            boxes640,
            overlay_text=dec_words,
            labels=["LP"],
            obj_thresh=self.yolov3_obj_thresh,
        )
        draw_byte_im = image_to_jpeg_bytes(
            draw_image,
            [int(cv2.IMWRITE_JPEG_QUALITY), self.broadcast_quality])

        #############################

        # push data for further processing in the queue
        output = {
            "boxes": boxes,
            "frame_num": frame_num,
            "avg_yolo3_rtt": self.rtt_yolo3_ms,
            "avg_crnn_rtt": self.rtt_crnn_ms,
            "image": draw_byte_im,
        }
        self.bc_queue.put(output)

        # push predictions to write to disk
        if len(dec_words) > 0:
            timestamp = time.time()
            literal_time = time.ctime(timestamp)
            predicts = {"predicts": dec_words, "date": literal_time}
            self.predicts_queue.put(predicts)

        logger.info(
            "Frame Count: {} - Avg YOLO3 RTT: {}ms - Avg CRNN RTT: {}ms - Detected: {}"
            .format(frame_num, int(self.rtt_yolo3_ms), int(self.rtt_crnn_ms),
                    len(boxes)))

Beispiel #18

0

Datei anzeigen

Datei: generator.py Projekt: chibuta/malaria-parasite-quantification

    def __init__(
            self,
            instances,
            anchors,
            labels,
            downsample,  # ratio between network input's size and network output's size, 32 for YOLOv3
            max_box_per_image,
            batch_size,
            min_net_size,
            max_net_size,
            net_size,
            shuffle,
            jitter,
            norm):
        self.instances = instances
        self.batch_size = batch_size
        self.labels = labels
        self.downsample = downsample
        self.max_box_per_image = max_box_per_image
        self.min_net_size = (min_net_size // self.downsample) * self.downsample
        self.max_net_size = (max_net_size // self.downsample) * self.downsample
        self.shuffle = shuffle
        self.jitter = jitter
        self.norm = norm
        self.anchors = [
            BoundBox(0, 0, anchors[2 * i], anchors[2 * i + 1])
            for i in range(len(anchors) // 2)
        ]
        self.net_h = net_size
        self.net_w = net_size

        self.aug = True

        #Augment using imaug pipeline https://github.com/aleju/imgaug
        sometimes = lambda aug: iaa.Sometimes(0.5, aug)

        # Define our sequence of augmentation steps that will be applied to every image
        # All augmenters with per_channel=0.5 will sample one value _per image_
        # in 50% of all cases. In all other cases they will sample new values
        # _per channel_.

        self.aug_pipe = iaa.Sequential(
            [
                # apply the following augmenters to most images
                iaa.Fliplr(0.5),  # horizontally flip 50% of all images
                iaa.Flipud(0.5),  # vertically flip 20% of all images
                # crop images by -5% to 10% of their height/width
                sometimes(
                    iaa.CropAndPad(percent=(-0.05, 0.1),
                                   pad_mode=ia.ALL,
                                   pad_cval=(0, 255))),
                sometimes(
                    iaa.Affine(
                        scale={
                            "x": (0.8, 1.2),
                            "y": (0.8, 1.2)
                        },  # scale images to 80-120% of their size, individually per axis
                        translate_percent={
                            "x": (-0.2, 0.2),
                            "y": (-0.2, 0.2)
                        },  # translate by -20 to +20 percent (per axis)
                        rotate=(-40, 40),  # rotate by -45 to +45 degrees
                        shear=(-10, 10),  # shear by -16 to +16 degrees
                        order=[
                            0,
                            1
                        ],  # use nearest neighbour or bilinear interpolation (fast)
                        cval=(
                            0, 255
                        ),  # if mode is constant, use a cval between 0 and 255
                        mode=ia.
                        ALL  # use any of scikit-image's warping modes (see 2nd image from the top for examples)
                    )),
                # execute 0 to 5 of the following (less important) augmenters per image
                # don't execute all of them, as that would often be way too strong
                iaa.SomeOf(
                    (0, 5),
                    [
                        sometimes(
                            iaa.Superpixels(p_replace=(0, 1.0),
                                            n_segments=(20, 200))
                        ),  # convert images into their superpixel representation
                        iaa.OneOf([
                            iaa.GaussianBlur(
                                (0, 3.0)
                            ),  # blur images with a sigma between 0 and 3.0
                            iaa.AverageBlur(
                                k=(2, 7)
                            ),  # blur image using local means with kernel sizes between 2 and 7
                            iaa.MedianBlur(
                                k=(3, 11)
                            ),  # blur image using local medians with kernel sizes between 2 and 7
                        ]),
                        iaa.Sharpen(alpha=(0, 1.0),
                                    lightness=(0.75, 1.5)),  # sharpen images
                        iaa.Emboss(alpha=(0, 1.0),
                                   strength=(0, 2.0)),  # emboss images
                        # search either for all edges or for directed edges,
                        # blend the result with the original image using a blobby mask
                        iaa.SimplexNoiseAlpha(
                            iaa.OneOf([
                                iaa.EdgeDetect(alpha=(0.5, 1.0)),
                                iaa.DirectedEdgeDetect(alpha=(0.5, 1.0),
                                                       direction=(0.0, 1.0)),
                            ])),
                        iaa.AdditiveGaussianNoise(
                            loc=0, scale=(0.0, 0.05 * 255),
                            per_channel=0.5),  # add gaussian noise to images
                        iaa.OneOf([
                            iaa.Dropout(
                                (0.01, 0.1), per_channel=0.5
                            ),  # randomly remove up to 10% of the pixels
                            iaa.CoarseDropout((0.03, 0.15),
                                              size_percent=(0.02, 0.05),
                                              per_channel=0.2),
                        ]),
                        iaa.Invert(0.05,
                                   per_channel=True),  # invert color channels
                        iaa.Add(
                            (-10, 10), per_channel=0.5
                        ),  # change brightness of images (by -10 to 10 of original value)
                        iaa.AddToHueAndSaturation(
                            (-20, 20)),  # change hue and saturation
                        # either change the brightness of the whole image (sometimes
                        # per channel) or change the brightness of subareas
                        iaa.OneOf([
                            iaa.Multiply((0.5, 1.5), per_channel=0.5),
                            iaa.FrequencyNoiseAlpha(
                                exponent=(-4, 0),
                                first=iaa.Multiply(
                                    (0.5, 1.5), per_channel=True),
                                second=iaa.ContrastNormalization((0.5, 2.0)))
                        ]),
                        iaa.ContrastNormalization(
                            (0.5, 2.0),
                            per_channel=0.5),  # improve or worsen the contrast
                        iaa.Grayscale(alpha=(0.0, 1.0)),
                        #sometimes(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)), # move pixels locally around (with random strengths)
                        #sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05))) # sometimes move parts of the image around
                    ],
                    random_order=True)
            ],
            random_order=True)
        if shuffle: np.random.shuffle(self.instances)

Beispiel #19

0

Datei anzeigen

from utils.bbox import draw_boxes, BoundBox
from keras.models import model_from_json
import cv2
import numpy as np

box = [BoundBox(582, 274, 700, 321, None, [.7])]

json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
loaded_model.load_weights("model.h5")
label_map = np.load('label_map.npy', allow_pickle=True).item()

im = cv2.imread("1.jpeg")
# cv2.imshow("input",im)
labels = ["number_plate"]
draw_boxes(im, box, loaded_model, label_map, labels, 0.5)

cv2.imshow("See here", im)
cv2.waitKey()

Beispiel #20

0

Datei anzeigen

Datei: generator.py Projekt: ctorney/wildFront

    def __getitem__(self, idx):
        # get image input size, change every 10 batches
        net_h, net_w = self._get_net_size(idx)
        base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample

        # determine the first and the last indices of the batch
        l_bound = idx*self.batch_size
        r_bound = (idx+1)*self.batch_size

        if r_bound > len(self.instances):
            r_bound = len(self.instances)
            l_bound = r_bound - self.batch_size

        x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3))             # input images
        t_batch = np.zeros((r_bound - l_bound, 1, 1, 1,  self.max_box_per_image, 4))   # list of groundtruth boxes

        # initialize the inputs and the outputs
        yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h,  1*base_grid_w, 3, 4+1+self.objects)) # desired network output 1
        yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h,  2*base_grid_w, 3, 4+1+self.objects)) # desired network output 2
        yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h,  4*base_grid_w, 3, 4+1+self.objects)) # desired network output 3
        yolos = [yolo_1, yolo_2, yolo_3]
        
        instance_count = 0
        true_box_index = 0

        # do the logic to fill in the inputs and the output
        for train_instance in self.instances[l_bound:r_bound]:
            # augment input image and fix object's position and size
            img, all_objs = self._aug_image(train_instance, net_h, net_w)
            
            for obj in all_objs:
                # find the best anchor box for this object
                max_anchor = None                
                max_index  = -1
                max_iou    = -1

                shifted_box = BoundBox(0, 0, obj['xmax']-obj['xmin'], obj['ymax']-obj['ymin'])    
                
                for i in range(len(ANC_VALS)):
                    anchor =BoundBox(0, 0, ANC_VALS[i][0],ANC_VALS[i][1]) 
                    iou    = bbox_iou(shifted_box, anchor)

                    if max_iou < iou:
                        max_anchor = anchor
                        max_index  = i
                        max_iou    = iou                
                
                # determine the yolo to be responsible for this bounding box
                yolo = yolos[max_index//3]
                grid_h, grid_w = yolo.shape[1:3]
                
                # determine the position of the bounding box on the grid
                center_x = .5*(obj['xmin'] + obj['xmax'])
                g_center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x
                center_y = .5*(obj['ymin'] + obj['ymax'])
                g_center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y
                
                # determine the sizes of the bounding box
                w = obj['xmax'] - obj['xmin']
                h = obj['ymax'] - obj['ymin']

                box = [center_x, center_y, w, h]

                # determine the index of the label
                obj_indx = self.labels.index(obj['name'])  

                # determine the location of the cell responsible for this object
                grid_x = int(np.floor(g_center_x))
                grid_y = int(np.floor(g_center_y))

                # assign ground truth x, y, w, h, confidence and class probs to y_batch
 #               yolo[instance_count, grid_y, grid_x, ]      = 0
                yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box
                yolo[instance_count, grid_y, grid_x, max_index%3, 4  ] = 1.
                yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1


            # assign input image to x_batch
            x_batch[instance_count] = img/255.

            # increase instance counter in the current batch
            instance_count += 1                 
                
  #      yolo_1 = yolo_1.reshape((yolo_1.shape[0],yolo_1.shape[1],yolo_1.shape[2],3*(self.objects+5)))
       # print(yolo_1.shape)
 #       return x_batch, yolo_3#  [dummy_yolo_1]
        return x_batch, [yolo_1, yolo_2, yolo_3]#  [dummy_yolo_1]
        return [x_batch, t_batch, yolo_1], [dummy_yolo_1]