def __init__(self, instances, #inst anchors, #anchor labels, #label downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3 max_box_per_image=30,#max_box per image default =3 batch_size=1,#default = 1 (sgd) min_net_size=320,#min net size max_net_size=608,#max net size shuffle=True, #shuffle jitter=True, #jitter (adding noise to the input data to increase module robust ) norm=None #norm ): self.instances = instances self.batch_size = batch_size self.labels = labels self.downsample = downsample self.max_box_per_image = max_box_per_image self.min_net_size = (min_net_size//self.downsample)*self.downsample self.max_net_size = (max_net_size//self.downsample)*self.downsample self.shuffle = shuffle self.jitter = jitter self.norm = norm #--------------------------------------------------------# self.anchors = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)] #create a boundbox class using the anchors #--------------------------------------------------------# self.net_h = 416 self.net_w = 416 if shuffle: np.random.shuffle(self.instances)
def compare(self, data1, data2, thresh_iou): if data2['xmin'] <= data1['xmin'] <= data1['xmax'] <= data2['xmax'] \ and data2['ymin'] <= data1['ymin'] <= data1['ymax'] <= data2['ymax']: return True if data1['xmin'] <= data2['xmin'] <= data2['xmax'] <= data1['xmax'] \ and data1['ymin'] <= data2['ymin'] <= data2['ymax'] <= data1['ymax']: return True box1 = BoundBox(data1['xmin'], data1['ymin'], data1['xmax'], data1['ymax']) box2 = BoundBox(data2['xmin'], data2['ymin'], data2['xmax'], data2['ymax']) iou = bbox_iou(box1, box2) if iou > thresh_iou: return True else: return False
def __init__(self, instances, # 训练样本,其结构参见 train.py 之 create_training_instances() anchors, # 先验框,[55,69, 75,234, 133,240, 136,129, 142,363, 203,290, 228,184, 285,359, 341,260] labels, # 通常就是config['model']['labels'],比如["raccoon"];如果没有指定,则为样本图像中的所有对象。 downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3 max_box_per_image=30, # 每张图像中最多有几个对象。是根据样本中的对象标注信息统计的来。 batch_size=1, min_net_size=320, # config['model']['min_input_size'],输入图像的最小尺寸(宽和高) max_net_size=608, # config['model']['max_input_size'],输入图像的最大尺寸(宽和高) shuffle=True, jitter=True, norm=None ): self.instances = instances self.batch_size = batch_size self.labels = labels self.downsample = downsample self.max_box_per_image = max_box_per_image self.min_net_size = (min_net_size//self.downsample)*self.downsample self.max_net_size = (max_net_size//self.downsample)*self.downsample self.shuffle = shuffle self.jitter = jitter self.norm = norm self.anchors = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)] # 9个BoundBox self.net_h = 416 self.net_w = 416 if shuffle: np.random.shuffle(self.instances)
def __init__(self, dataset_path: str, simplify_classes: bool = False, batch_size: int = 1, max_image_side_length: int = 512, augmentation: Augmenter = None, center_color_to_imagenet: bool = False, image_scale_mode: str = 'just', pre_image_scale=0.5): super(Yolo_3Dataset, self).__init__(dataset_path, simplify_classes, batch_size, max_image_side_length, augmentation, False, 'squash', pre_image_scale) self.anchors = [ BoundBox(0, 0, self.anchors[2 * i], self.anchors[2 * i + 1]) for i in range(len(self.anchors) // 2) ] self.get_item = BatchGenerator.__getitem__.__get__(self, Yolo_3Dataset) self.instances = self.get_instances() self.labels = ('Sharp Force', 'Blunt Force') self.downsample = 32 self.max_box_per_image = 30 self.min_net_size = max_image_side_length self.max_net_size = max_image_side_length self.shuffle = False self.jitter = 0.0 self.norm = normalize
def __init__(self, instances, anchors, labels, downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3 max_box_per_image=30, batch_size=1, min_net_size=320, max_net_size=608, shuffle=True, jitter=True, norm=None ): self.instances = instances self.batch_size = batch_size self.labels = labels self.downsample = downsample self.max_box_per_image = max_box_per_image self.min_net_size = (min_net_size//self.downsample)*self.downsample self.max_net_size = (max_net_size//self.downsample)*self.downsample self.shuffle = shuffle self.jitter = jitter self.norm = norm self.anchors = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)] self.net_h = 416 self.net_w = 416 if shuffle: np.random.shuffle(self.instances)
def __init__( self, train_list, label_list, anchors, max_box_per_image=42, batch_size=1, ): self.train_list = train_list self.label_list = label_list self.batch_size = batch_size self.max_box_per_image = max_box_per_image self.anchors = [ BoundBox(0, 0, anchors[2 * i], anchors[2 * i + 1]) for i in range(len(anchors) // 2) ] self.net_h = 416 self.net_w = 416 self.downsample = 32 self.min_input_size = 224 self.max_input_size = 480 self.min_net_size = (self.min_input_size // self.downsample) * self.downsample self.max_net_size = (self.max_input_size // self.downsample) * self.downsample self.jitter = 0.3 self.on_epoch_end() np.random.shuffle(self.train_list)
def __init__(self, instances, anchors, labels, downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3 max_box_per_image=30, batch_size=1, min_net_size=320, max_net_size=608, shuffle=True, norm=None, explicit_net_size=None, num_scales=3, aug_jitter=0.3, aug_scale=(0.25, 2.0), aug_hue=18, aug_saturation=1.5, aug_exposure=1.5, aug_gray=False, aug_flip=True, aug_pad=True ): self.instances = instances self.batch_size = batch_size self.labels = labels self.downsample = downsample self.max_box_per_image = max_box_per_image self.min_net_size = (min_net_size//self.downsample)*self.downsample self.max_net_size = (max_net_size//self.downsample)*self.downsample self.shuffle = shuffle self.norm = norm self.anchors = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) for i in range(len(anchors)//2)] self.net_h = 416 self.net_w = 416 self.explicit_net_size = explicit_net_size self.num_scales = num_scales self.aug_jitter = aug_jitter or 0.0 self.aug_scale = aug_scale or (1.0, 1.0) self.aug_hue = aug_hue or 0.0 self.aug_saturation = aug_saturation or 1.0 self.aug_exposure = aug_exposure or 1.0 self.aug_gray = aug_gray self.aug_flip = aug_flip self.aug_pad = aug_pad if shuffle: np.random.shuffle(self.instances)
def decode_netout(netout, anchors, obj_thresh, net_h, net_w): grid_h, grid_w = netout.shape[:2] nb_box = 3 netout = netout.reshape((grid_h, grid_w, nb_box, -1)) nb_class = netout.shape[-1] - 5 boxes = [] netout[..., :2] = _sigmoid(netout[..., :2]) netout[..., 4] = _sigmoid(netout[..., 4]) netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:]) netout[..., 5:] *= netout[..., 5:] > obj_thresh for i in range(grid_h * grid_w): row = i // grid_w col = i % grid_w for b in range(nb_box): # 4th element is objectness score objectness = netout[row, col, b, 4] if (objectness <= obj_thresh): continue # first 4 elements are x, y, w, and h x, y, w, h = netout[row, col, b, :4] x = (col + x) / grid_w # center position, unit: image width y = (row + y) / grid_h # center position, unit: image height w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height # last elements are class probabilities classes = netout[row, col, b, 5:] box = BoundBox(x - w / 2, y - h / 2, x + w / 2, y + h / 2, objectness, classes) boxes.append(box) return boxes
def __init__( self, instances, anchors, labels, downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3 max_box_per_image=30, batch_size=1, min_net_size=320, max_net_size=608, shuffle=True, jitter=True, norm=None): self.instances = instances self.batch_size = batch_size self.labels = labels self.downsample = downsample self.max_box_per_image = max_box_per_image self.min_net_size = (min_net_size // self.downsample) * self.downsample self.max_net_size = (max_net_size // self.downsample) * self.downsample self.shuffle = shuffle self.jitter = jitter self.norm = norm self.anchors = [ BoundBox(0, 0, anchors[2 * i], anchors[2 * i + 1]) for i in range(len(anchors) // 2) ] self.net_h = 416 self.net_w = 416 if shuffle: np.random.shuffle(self.instances) # A jugar to prevent me from changing all the xml annotations error: # Temp Jugar: for instance in self.instances: instance['filename'] = instance['filename'] + '.jpg'
def __getitem__(self, idx): # get image input size, change every 10 batches net_h, net_w = self._get_net_size(idx) base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample # determine the first and the last indices of the batch l_bound = idx*self.batch_size r_bound = (idx+1)*self.batch_size if r_bound > len(self.instances): r_bound = len(self.instances) l_bound = r_bound - self.batch_size x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3)) # input images t_batch = np.zeros((r_bound - l_bound, 1, 1, 1, self.max_box_per_image, 4)) # list of groundtruth boxes # initialize the inputs and the outputs yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h, 1*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 1 yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h, 2*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 2 yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h, 4*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 3 yolos = [yolo_3, yolo_2, yolo_1] dummy_yolo_1 = np.zeros((r_bound - l_bound, 1)) dummy_yolo_2 = np.zeros((r_bound - l_bound, 1)) dummy_yolo_3 = np.zeros((r_bound - l_bound, 1)) instance_count = 0 true_box_index = 0 # do the logic to fill in the inputs and the output for train_instance in self.instances[l_bound:r_bound]: # augment input image and fix object's position and size img, all_objs = self._aug_image(train_instance, net_h, net_w) for obj in all_objs: # find the best anchor box for this object max_anchor = None max_index = -1 max_iou = -1 shifted_box = BoundBox(0, 0, obj['xmax']-obj['xmin'], obj['ymax']-obj['ymin']) for i in range(len(self.anchors)): anchor = self.anchors[i] iou = bbox_iou(shifted_box, anchor) if max_iou < iou: max_anchor = anchor max_index = i max_iou = iou # determine the yolo to be responsible for this bounding box yolo = yolos[max_index//3] grid_h, grid_w = yolo.shape[1:3] # determine the position of the bounding box on the grid center_x = .5*(obj['xmin'] + obj['xmax']) center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x center_y = .5*(obj['ymin'] + obj['ymax']) center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y # determine the sizes of the bounding box w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) # t_w h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) # t_h box = [center_x, center_y, w, h] # determine the index of the label obj_indx = self.labels.index(obj['name']) # determine the location of the cell responsible for this object grid_x = int(np.floor(center_x)) grid_y = int(np.floor(center_y)) # assign ground truth x, y, w, h, confidence and class probs to y_batch yolo[instance_count, grid_y, grid_x, max_index%3] = 0 yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box yolo[instance_count, grid_y, grid_x, max_index%3, 4 ] = 1. yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1 # assign the true box to t_batch true_box = [center_x, center_y, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin']] t_batch[instance_count, 0, 0, 0, true_box_index] = true_box true_box_index += 1 true_box_index = true_box_index % self.max_box_per_image # assign input image to x_batch if self.norm != None: x_batch[instance_count] = self.norm(img) else: # plot image and bounding boxes for sanity check for obj in all_objs: cv2.rectangle(img, (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3) cv2.putText(img, obj['name'], (obj['xmin']+2, obj['ymin']+12), 0, 1.2e-3 * img.shape[0], (0,255,0), 2) x_batch[instance_count] = img # increase instance counter in the current batch instance_count += 1 return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]
def get_x_y(self, indices: List[int], batch_no: int = 0): """ Return an image an its corresponding ground truth boxes :param indices: List of indices to return from dataset :return: Tuple of images, boxes an zero array """ # return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3] num_items = len(indices) # get image input size, change every 10 batches net_h, net_w = Yolo_3Dataset.net_h, Yolo_3Dataset.net_w base_grid_h, base_grid_w = net_h // self.downsample, net_w // self.downsample x_batch = np.zeros((num_items, net_h, net_w, 3)) # input images t_batch = np.zeros((num_items, 1, 1, 1, self.max_box_per_image, 4)) # list of groundtruth boxes # initialize the inputs and the outputs yolo_1 = np.zeros( (num_items, 1 * base_grid_h, 1 * base_grid_w, len(self.anchors) // 3, 4 + 1 + len(self.labels))) # desired network output 1 yolo_2 = np.zeros( (num_items, 2 * base_grid_h, 2 * base_grid_w, len(self.anchors) // 3, 4 + 1 + len(self.labels))) # desired network output 2 yolo_3 = np.zeros( (num_items, 4 * base_grid_h, 4 * base_grid_w, len(self.anchors) // 3, 4 + 1 + len(self.labels))) # desired network output 3 yolos = [yolo_3, yolo_2, yolo_1] dummy_yolo_1 = np.zeros((num_items, 1)) dummy_yolo_2 = np.zeros((num_items, 1)) dummy_yolo_3 = np.zeros((num_items, 1)) instance_count = 0 true_box_index = 0 # do the logic to fill in the inputs and the output for train_instance in [self.instances[i] for i in indices]: # augment input image and fix object's position and size img, all_objs = self._aug_image(train_instance, net_h, net_w) # ============================ # draw = img.copy() # ============================ for obj in all_objs: # find the best anchor box for this object max_anchor = None max_index = -1 max_iou = -1 shifted_box = BoundBox(0, 0, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin']) for i in range(len(self.anchors)): anchor = self.anchors[i] iou = bbox_iou(shifted_box, anchor) if max_iou < iou: max_anchor = anchor max_index = i max_iou = iou # determine the yolo to be responsible for this bounding box yolo = yolos[max_index // 3] grid_h, grid_w = yolo.shape[1:3] # determine the position of the bounding box on the grid center_x = .5 * (obj['xmin'] + obj['xmax']) center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x center_y = .5 * (obj['ymin'] + obj['ymax']) center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y # determine the sizes of the bounding box w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) # t_w h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) # t_h box = [center_x, center_y, w, h] # determine the index of the label obj_indx = self.labels.index(obj['name']) # determine the location of the cell responsible for this object grid_x = int(np.floor(center_x)) grid_y = int(np.floor(center_y)) # assign ground truth x, y, w, h, confidence and class probs to y_batch yolo[instance_count, grid_y, grid_x, max_index % 3] = 0 yolo[instance_count, grid_y, grid_x, max_index % 3, 0:4] = box yolo[instance_count, grid_y, grid_x, max_index % 3, 4] = 1. yolo[instance_count, grid_y, grid_x, max_index % 3, 5 + obj_indx] = 1 # assign the true box to t_batch true_box = [ center_x, center_y, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin'] ] t_batch[instance_count, 0, 0, 0, true_box_index] = true_box # ========================= # draw_box(draw, [int(obj['ymin']), int(obj['xmin']), int(obj['ymax']), int(obj['xmax'])], color=(255, 200, 0)) # ========================== true_box_index += 1 true_box_index = true_box_index % self.max_box_per_image # assign input image to x_batch # ============================ # from matplotlib import pyplot as plt # plt.figure(figsize=(20,20)) # plt.imshow(draw.astype('uint8')) # plt.show() # exit(0) # ============================ if self.norm != None: x_batch[instance_count] = self.norm(img) else: # plot image and bounding boxes for sanity check for obj in all_objs: cv2.rectangle(img, (obj['xmin'], obj['ymin']), (obj['xmax'], obj['ymax']), (255, 0, 0), 3) cv2.putText(img, obj['name'], (obj['xmin'] + 2, obj['ymin'] + 12), 0, 1.2e-3 * img.shape[0], (0, 255, 0), 2) x_batch[instance_count] = img # increase instance counter in the current batch instance_count += 1 return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]
def __getitem__(self, idx): # get image input size, change every 10 batches # net_h, net_w 是输入图像的高宽,每10个batch随机变换一次 net_h, net_w = self._get_net_size(idx) # 32倍下采样的特征图的高宽 base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample # determine the first and the last indices of the batch l_bound = idx*self.batch_size r_bound = (idx+1)*self.batch_size # 这个感觉不是很合理 if r_bound > len(self.instances): r_bound = len(self.instances) l_bound = r_bound - self.batch_size # 准备样本,一个batch的输入图像 x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3)) # input images # 每个图像中的所有对象边框,shape=(batch,1,1,1,一个图像中最多几个对象,4个坐标) t_batch = np.zeros((r_bound - l_bound, 1, 1, 1, self.max_box_per_image, 4)) # list of groundtruth boxes # initialize the inputs and the outputs,分别对应32、16、8倍下采样的输出特征图 # [batch_size,特征图高,特征图宽,anchor数量3,边框坐标4+置信度1+预测对象类别数] yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h, 1*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 1 yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h, 2*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 2 yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h, 4*base_grid_w, len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 3 # 8、16、32倍下采样对应到先验框 [55,69, 75,234, 133,240, 136,129, 142,363, 203,290, 228,184, 285,359, 341,260] yolos = [yolo_3, yolo_2, yolo_1] dummy_yolo_1 = np.zeros((r_bound - l_bound, 1)) dummy_yolo_2 = np.zeros((r_bound - l_bound, 1)) dummy_yolo_3 = np.zeros((r_bound - l_bound, 1)) instance_count = 0 # batch中的第几张图像 true_box_index = 0 # 图像中的第几个对象 # do the logic to fill in the inputs and the output for train_instance in self.instances[l_bound:r_bound]: # augment input image and fix object's position and size img, all_objs = self._aug_image(train_instance, net_h, net_w) for obj in all_objs: # find the best anchor box for this object max_anchor = None # IOU最大的那个anchor max_index = -1 # IOU最大的那个anchor 的index max_iou = -1 shifted_box = BoundBox(0, 0, obj['xmax']-obj['xmin'], obj['ymax']-obj['ymin']) for i in range(len(self.anchors)): anchor = self.anchors[i] iou = bbox_iou(shifted_box, anchor) if max_iou < iou: max_anchor = anchor max_index = i max_iou = iou # determine the yolo to be responsible for this bounding box # 3种尺度的特征图,与当前对象最匹配的那种anchor,所属的那个特征图的tensor,就是这里的yolo yolo = yolos[max_index//3] grid_h, grid_w = yolo.shape[1:3] # determine the position of the bounding box on the grid # 对象的边框中心坐标 被转换到 特征图网格上,其值相当于 期望预测的坐标 sigma(t_x) + c_x,sigma(t_y) + c_y center_x = .5*(obj['xmin'] + obj['xmax']) center_x = center_x / float(net_w) * grid_w # 期望预测的坐标 sigma(t_x) + c_x = center_x center_y = .5*(obj['ymin'] + obj['ymax']) center_y = center_y / float(net_h) * grid_h # 期望预测的坐标 sigma(t_y) + c_y = center_y # determine the sizes of the bounding box w = np.log((obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) # t_w,注:truth_w = anchor_w * exp(t_w) h = np.log((obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) # t_h,注:truth_h = anchor_h * exp(t_h) box = [center_x, center_y, w, h] # determine the index of the label obj_indx = self.labels.index(obj['name']) # determine the location of the cell responsible for this object grid_x = int(np.floor(center_x)) grid_y = int(np.floor(center_y)) # assign ground truth x, y, w, h, confidence and class probs to y_batch # max_index%3 对应到最佳匹配的anchor,一个对象仅有一个anchor负责检测 yolo[instance_count, grid_y, grid_x, max_index%3] = 0 yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box # 边框坐标 yolo[instance_count, grid_y, grid_x, max_index%3, 4 ] = 1. # 边框置信度 yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1 # 对象分类 # assign the true box to t_batch. true_box的x、y是特征图上的坐标(比如13*13特征图),宽和高是原始图像上对象的宽和高 true_box = [center_x, center_y, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin']] t_batch[instance_count, 0, 0, 0, true_box_index] = true_box # 因为有 instance_count 区分不同的图像,true_box_index 应该只需在每次图像切换时 true_box_index=0 即可。这里在整个batch累加true_box_index,暂不确定是否有特别的用意。 true_box_index += 1 true_box_index = true_box_index % self.max_box_per_image # assign input image to x_batch if self.norm != None: x_batch[instance_count] = self.norm(img) else: # plot image and bounding boxes for sanity check for obj in all_objs: cv2.rectangle(img, (obj['xmin'],obj['ymin']), (obj['xmax'],obj['ymax']), (255,0,0), 3) cv2.putText(img, obj['name'], (obj['xmin']+2, obj['ymin']+12), 0, 1.2e-3 * img.shape[0], (0,255,0), 2) x_batch[instance_count] = img # increase instance counter in the current batch instance_count += 1 return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]
def main(img_url_src, yolov3_endpoint, crnn_endpoint, output): # get the image in bytes representation image = get_url_image(img_url_src) image_bytes = image_to_jpeg_bytes(image) # encode image image_enc = base64.b64encode(image_bytes).decode("utf-8") image_dump = json.dumps({"img": image_enc}) # make yolov3 api request resp = requests.post(yolov3_endpoint, data=image_dump, headers={"content-type": "application/json"}) # parse response boxes_raw = resp.json()["boxes"] boxes = [] for b in boxes_raw: box = BoundBox(*b) boxes.append(box) # purge bounding boxes with a low confidence score confidence_score = 0.8 aux = [] for b in boxes: label = -1 for i in range(len(b.classes)): if b.classes[i] > confidence_score: label = i if label >= 0: aux.append(b) boxes = aux del aux dec_words = [] if len(boxes) > 0: # create set of images of the detected license plates lps = [] for b in boxes: lp = image[b.ymin:b.ymax, b.xmin:b.xmax] jpeg = image_to_jpeg_nparray(lp) lps.append(jpeg) # encode the cropped license plates lps = pickle.dumps(lps, protocol=0) lps_enc = base64.b64encode(lps).decode("utf-8") lps_dump = json.dumps({"imgs": lps_enc}) # make crnn api request resp = requests.post(crnn_endpoint, data=lps_dump, headers={"content-type": "application/json"}) # parse the response dec_lps = resp.json()["license-plates"] dec_lps = reorder_recognized_words(dec_lps) for dec_lp in dec_lps: dec_words.append([word[0] for word in dec_lp]) if len(dec_words) == 0: dec_words = [[] for i in range(len(boxes))] # draw predictions as overlays on the source image draw_image = draw_boxes(image, boxes, overlay_text=dec_words, labels=["LP"], obj_thresh=confidence_score) # and save it to disk cv2.imwrite(output, draw_image)
def __init__( self, instances, anchors, # for Feature Pyramid Networks we need 9 anchors, 3 for each scale labels, downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv1-3 max_box_per_image=30, batch_size=1, # min_net_size=224, # max_net_size=224, shuffle=True, jitter=True, norm=None): self.instances = instances self.batch_size = batch_size self.labels = labels self.downsample = downsample self.max_box_per_image = max_box_per_image # self.min_net_size = (min_net_size // self.downsample) * self.downsample # self.max_net_size = (max_net_size // self.downsample) * self.downsample self.shuffle = shuffle self.jitter = jitter self.norm = norm self.anchors = [ BoundBox(0, 0, anchors[2 * i], anchors[2 * i + 1]) for i in range(len(anchors) // 2) ] self.net_h = 224 self.net_w = 224 # augmentors by https://github.com/aleju/imgaug sometimes = lambda aug: iaa.Sometimes(0.5, aug) # Define our sequence of augmentation steps that will be applied to every image # All augmenters with per_channel=0.5 will sample one value _per image_ # in 50% of all cases. In all other cases they will sample new values # _per channel_. self.aug_pipe = iaa.Sequential( [ sometimes(iaa.Affine()), # execute 0 to 5 of the following (less important) augmenters per image # don't execute all of them, as that would often be way too strong iaa.SomeOf( (0, 5), [ iaa.OneOf([ iaa.GaussianBlur( (0, 3.0) ), # blur images with a sigma between 0 and 3.0 iaa.AverageBlur(k=(2, 7)), # blur image using local means with kernel sizes between 2 and 7 iaa.MedianBlur(k=(3, 11)), # blur image using local medians with kernel sizes between 2 and 7 ]), iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images iaa.AdditiveGaussianNoise( loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5), # add gaussian noise to images iaa.OneOf([ iaa.Dropout( (0.01, 0.1), per_channel=0.5 ), # randomly remove up to 10% of the pixels # iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2), ]), # iaa.Invert(0.05, per_channel=True), # invert color channels iaa.Add((-10, 10), per_channel=0.5), # change brightness of images (by -10 to 10 of original value) iaa.Multiply((0.5, 1.5), per_channel=0.5), # change brightness of images (50-150% of original value) iaa.ContrastNormalization( (0.5, 2.0), per_channel=0.5), # improve or worsen the contrast ], random_order=True) ], random_order=True) if shuffle: np.random.shuffle(self.instances)
def __getitem__(self, idx): net_h, net_w = self._get_net_size(idx) base_grid_h, base_grid_w = net_h // self.downsample, net_w // self.downsample l_bound = idx * self.batch_size r_bound = (idx + 1) * self.batch_size if r_bound > len(self.train_list): r_bound = len(self.train_list) l_bound = r_bound - self.batch_size x_batch = np.zeros((self.batch_size, net_h, net_w, 3)) t_batch = np.zeros( (self.batch_size, 1, 1, 1, self.max_box_per_image, 4)) yolo_1 = np.zeros( (self.batch_size, 1 * base_grid_h, 1 * base_grid_w, len(self.anchors) // 3, 4 + 1 + len(self.label_list))) yolo_2 = np.zeros( (self.batch_size, 2 * base_grid_h, 2 * base_grid_w, len(self.anchors) // 3, 4 + 1 + len(self.label_list))) yolo_3 = np.zeros( (self.batch_size, 4 * base_grid_h, 4 * base_grid_w, len(self.anchors) // 3, 4 + 1 + len(self.label_list))) yolos = [yolo_3, yolo_2, yolo_1] dummy_yolo_1 = np.zeros((self.batch_size, 1)) dummy_yolo_2 = np.zeros((self.batch_size, 1)) dummy_yolo_3 = np.zeros((self.batch_size, 1)) true_box_index = 0 for instance_count, train_instace in enumerate( self.train_list[l_bound:r_bound]): aug_img, aug_objs = self.augmentation(train_instace, net_h, net_w) for obj in aug_objs: max_anchor = None max_index = -1 max_iou = -1 shifted_box = BoundBox(0, 0, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin']) for i in range(len(self.anchors)): anchor = self.anchors[i] iou = bbox_iou(shifted_box, anchor) if max_iou < iou: max_anchor = anchor max_index = i max_iou = iou yolo = yolos[max_index // 3] grid_h, grid_w = yolo.shape[1:3] center_x = .5 * (obj['xmin'] + obj['xmax']) center_x = center_x / float(net_w) * grid_w center_y = .5 * (obj['ymin'] + obj['ymax']) center_y = center_y / float(net_h) * grid_h w = np.log( (obj['xmax'] - obj['xmin']) / float(max_anchor.xmax)) h = np.log( (obj['ymax'] - obj['ymin']) / float(max_anchor.ymax)) box = [center_x, center_y, w, h] obj_indx = self.label_list.index(obj['name']) grid_x = int(np.floor(center_x)) grid_y = int(np.floor(center_y)) yolo[instance_count, grid_y, grid_x, max_index % 3] = 0 yolo[instance_count, grid_y, grid_x, max_index % 3, 0:4] = box yolo[instance_count, grid_y, grid_x, max_index % 3, 4] = 1. yolo[instance_count, grid_y, grid_x, max_index % 3, 5 + obj_indx] = 1 true_box = [ center_x, center_y, obj['xmax'] - obj['xmin'], obj['ymax'] - obj['ymin'] ] t_batch[instance_count, 0, 0, 0, true_box_index] = true_box true_box_index += 1 true_box_index = true_box_index % self.max_box_per_image x_batch[instance_count] = normalize(aug_img) return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3]
baby5 = SubElement(child, 'bndbox') baby6 = SubElement(baby5, 'xmin') baby6.text = str(objecty[1]) baby7 = SubElement(baby5, 'ymin') baby7.text = str(objecty[2]) baby8 = SubElement(baby5, 'xmax') baby8.text = str(objecty[3]) baby9 = SubElement(baby5, 'ymax') baby9.text = str(objecty[4]) tree.write('{}{}.xml'.format(image_path_1,key[:-5]), pretty_print=True) i = 0 for x in loaded_json_file: y = json.loads(x) i+=1 img_loc_url = y["content"] save_loc = str("C:/Games/Projects/TCS_Project/train_imgs/"+str(i)+".jpeg") urllib.request.urlretrieve(img_loc_url, save_loc) image_height = y["annotation"][0]["imageHeight"] image_width = y["annotation"][0]["imageWidth"] xmin,ymin = y["annotation"][0]["points"][0]["x"],y["annotation"][0]["points"][0]["y"] xmax,ymax = y["annotation"][0]["points"][1]["x"],y["annotation"][0]["points"][1]["y"] labels = y["annotation"][0]["label"] box = [BoundBox(int(xmin*image_width), int(ymin*image_height), int(xmax*image_width), int(ymax*image_height),None,[1])] write_annotations(save_loc,box,labels,0.5,int(image_height),int(image_width))
def cloud_infer(self): """ Main method that runs in the loop. """ try: data = self.in_queue.get_nowait() except queue.Empty: # logger.warning("no data available for worker") return ############################# # extract frame frame_num = data["frame_num"] img = data["jpeg"] # preprocess/compress the image image = image_from_bytes(img) reduced = compress_image(image) byte_im = image_to_jpeg_bytes(reduced) # encode image img_enc = base64.b64encode(byte_im).decode("utf-8") img_dump = json.dumps({"img": img_enc}) # make inference request resp = self.yolov3_api_request(img_dump) if not resp: return ############################# # parse response r_dict = resp.json() boxes_raw = r_dict["boxes"] boxes = [] for b in boxes_raw: box = BoundBox(*b) boxes.append(box) # purge bounding boxes with a low confidence score aux = [] for b in boxes: label = -1 for i in range(len(b.classes)): if b.classes[i] > self.yolov3_obj_thresh: label = i if label >= 0: aux.append(b) boxes = aux del aux # also scale the boxes for later uses camera_source_width = image.shape[1] boxes640 = self.scale_bbox(boxes, self.yolov3_input_size_px, self.bounding_boxes_upscale_px) boxes_source = self.scale_bbox(boxes, self.yolov3_input_size_px, camera_source_width) ############################# # recognize the license plates in case # any bounding boxes have been detected dec_words = [] if len(boxes) > 0 and len(self.api_endpoint_crnn) > 0: # create set of images of the detected license plates lps = [] try: for b in boxes_source: lp = image[b.ymin:b.ymax, b.xmin:b.xmax] jpeg = image_to_jpeg_nparray( lp, [int(cv2.IMWRITE_JPEG_QUALITY), self.crnn_quality]) lps.append(jpeg) except: logger.warning("encountered error while converting to jpeg") pass lps = pickle.dumps(lps, protocol=0) lps_enc = base64.b64encode(lps).decode("utf-8") lps_dump = json.dumps({"imgs": lps_enc}) # make request to rcnn API dec_lps = self.rcnn_api_request(lps_dump) dec_lps = self.reorder_recognized_words(dec_lps) for dec_lp in dec_lps: dec_words.append([word[0] for word in dec_lp]) if len(dec_words) > 0: logger.info("Detected the following words: {}".format(dec_words)) else: dec_words = [[] for i in range(len(boxes))] ############################# # draw detections upscaled = resize_image(image, self.bounding_boxes_upscale_px) draw_image = draw_boxes( upscaled, boxes640, overlay_text=dec_words, labels=["LP"], obj_thresh=self.yolov3_obj_thresh, ) draw_byte_im = image_to_jpeg_bytes( draw_image, [int(cv2.IMWRITE_JPEG_QUALITY), self.broadcast_quality]) ############################# # push data for further processing in the queue output = { "boxes": boxes, "frame_num": frame_num, "avg_yolo3_rtt": self.rtt_yolo3_ms, "avg_crnn_rtt": self.rtt_crnn_ms, "image": draw_byte_im, } self.bc_queue.put(output) # push predictions to write to disk if len(dec_words) > 0: timestamp = time.time() literal_time = time.ctime(timestamp) predicts = {"predicts": dec_words, "date": literal_time} self.predicts_queue.put(predicts) logger.info( "Frame Count: {} - Avg YOLO3 RTT: {}ms - Avg CRNN RTT: {}ms - Detected: {}" .format(frame_num, int(self.rtt_yolo3_ms), int(self.rtt_crnn_ms), len(boxes)))
def __init__( self, instances, anchors, labels, downsample, # ratio between network input's size and network output's size, 32 for YOLOv3 max_box_per_image, batch_size, min_net_size, max_net_size, net_size, shuffle, jitter, norm): self.instances = instances self.batch_size = batch_size self.labels = labels self.downsample = downsample self.max_box_per_image = max_box_per_image self.min_net_size = (min_net_size // self.downsample) * self.downsample self.max_net_size = (max_net_size // self.downsample) * self.downsample self.shuffle = shuffle self.jitter = jitter self.norm = norm self.anchors = [ BoundBox(0, 0, anchors[2 * i], anchors[2 * i + 1]) for i in range(len(anchors) // 2) ] self.net_h = net_size self.net_w = net_size self.aug = True #Augment using imaug pipeline https://github.com/aleju/imgaug sometimes = lambda aug: iaa.Sometimes(0.5, aug) # Define our sequence of augmentation steps that will be applied to every image # All augmenters with per_channel=0.5 will sample one value _per image_ # in 50% of all cases. In all other cases they will sample new values # _per channel_. self.aug_pipe = iaa.Sequential( [ # apply the following augmenters to most images iaa.Fliplr(0.5), # horizontally flip 50% of all images iaa.Flipud(0.5), # vertically flip 20% of all images # crop images by -5% to 10% of their height/width sometimes( iaa.CropAndPad(percent=(-0.05, 0.1), pad_mode=ia.ALL, pad_cval=(0, 255))), sometimes( iaa.Affine( scale={ "x": (0.8, 1.2), "y": (0.8, 1.2) }, # scale images to 80-120% of their size, individually per axis translate_percent={ "x": (-0.2, 0.2), "y": (-0.2, 0.2) }, # translate by -20 to +20 percent (per axis) rotate=(-40, 40), # rotate by -45 to +45 degrees shear=(-10, 10), # shear by -16 to +16 degrees order=[ 0, 1 ], # use nearest neighbour or bilinear interpolation (fast) cval=( 0, 255 ), # if mode is constant, use a cval between 0 and 255 mode=ia. ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples) )), # execute 0 to 5 of the following (less important) augmenters per image # don't execute all of them, as that would often be way too strong iaa.SomeOf( (0, 5), [ sometimes( iaa.Superpixels(p_replace=(0, 1.0), n_segments=(20, 200)) ), # convert images into their superpixel representation iaa.OneOf([ iaa.GaussianBlur( (0, 3.0) ), # blur images with a sigma between 0 and 3.0 iaa.AverageBlur( k=(2, 7) ), # blur image using local means with kernel sizes between 2 and 7 iaa.MedianBlur( k=(3, 11) ), # blur image using local medians with kernel sizes between 2 and 7 ]), iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)), # emboss images # search either for all edges or for directed edges, # blend the result with the original image using a blobby mask iaa.SimplexNoiseAlpha( iaa.OneOf([ iaa.EdgeDetect(alpha=(0.5, 1.0)), iaa.DirectedEdgeDetect(alpha=(0.5, 1.0), direction=(0.0, 1.0)), ])), iaa.AdditiveGaussianNoise( loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5), # add gaussian noise to images iaa.OneOf([ iaa.Dropout( (0.01, 0.1), per_channel=0.5 ), # randomly remove up to 10% of the pixels iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2), ]), iaa.Invert(0.05, per_channel=True), # invert color channels iaa.Add( (-10, 10), per_channel=0.5 ), # change brightness of images (by -10 to 10 of original value) iaa.AddToHueAndSaturation( (-20, 20)), # change hue and saturation # either change the brightness of the whole image (sometimes # per channel) or change the brightness of subareas iaa.OneOf([ iaa.Multiply((0.5, 1.5), per_channel=0.5), iaa.FrequencyNoiseAlpha( exponent=(-4, 0), first=iaa.Multiply( (0.5, 1.5), per_channel=True), second=iaa.ContrastNormalization((0.5, 2.0))) ]), iaa.ContrastNormalization( (0.5, 2.0), per_channel=0.5), # improve or worsen the contrast iaa.Grayscale(alpha=(0.0, 1.0)), #sometimes(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)), # move pixels locally around (with random strengths) #sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05))) # sometimes move parts of the image around ], random_order=True) ], random_order=True) if shuffle: np.random.shuffle(self.instances)
from utils.bbox import draw_boxes, BoundBox from keras.models import model_from_json import cv2 import numpy as np box = [BoundBox(582, 274, 700, 321, None, [.7])] json_file = open('model.json', 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) loaded_model.load_weights("model.h5") label_map = np.load('label_map.npy', allow_pickle=True).item() im = cv2.imread("1.jpeg") # cv2.imshow("input",im) labels = ["number_plate"] draw_boxes(im, box, loaded_model, label_map, labels, 0.5) cv2.imshow("See here", im) cv2.waitKey()
def __getitem__(self, idx): # get image input size, change every 10 batches net_h, net_w = self._get_net_size(idx) base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample # determine the first and the last indices of the batch l_bound = idx*self.batch_size r_bound = (idx+1)*self.batch_size if r_bound > len(self.instances): r_bound = len(self.instances) l_bound = r_bound - self.batch_size x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3)) # input images t_batch = np.zeros((r_bound - l_bound, 1, 1, 1, self.max_box_per_image, 4)) # list of groundtruth boxes # initialize the inputs and the outputs yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h, 1*base_grid_w, 3, 4+1+self.objects)) # desired network output 1 yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h, 2*base_grid_w, 3, 4+1+self.objects)) # desired network output 2 yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h, 4*base_grid_w, 3, 4+1+self.objects)) # desired network output 3 yolos = [yolo_1, yolo_2, yolo_3] instance_count = 0 true_box_index = 0 # do the logic to fill in the inputs and the output for train_instance in self.instances[l_bound:r_bound]: # augment input image and fix object's position and size img, all_objs = self._aug_image(train_instance, net_h, net_w) for obj in all_objs: # find the best anchor box for this object max_anchor = None max_index = -1 max_iou = -1 shifted_box = BoundBox(0, 0, obj['xmax']-obj['xmin'], obj['ymax']-obj['ymin']) for i in range(len(ANC_VALS)): anchor =BoundBox(0, 0, ANC_VALS[i][0],ANC_VALS[i][1]) iou = bbox_iou(shifted_box, anchor) if max_iou < iou: max_anchor = anchor max_index = i max_iou = iou # determine the yolo to be responsible for this bounding box yolo = yolos[max_index//3] grid_h, grid_w = yolo.shape[1:3] # determine the position of the bounding box on the grid center_x = .5*(obj['xmin'] + obj['xmax']) g_center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x center_y = .5*(obj['ymin'] + obj['ymax']) g_center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y # determine the sizes of the bounding box w = obj['xmax'] - obj['xmin'] h = obj['ymax'] - obj['ymin'] box = [center_x, center_y, w, h] # determine the index of the label obj_indx = self.labels.index(obj['name']) # determine the location of the cell responsible for this object grid_x = int(np.floor(g_center_x)) grid_y = int(np.floor(g_center_y)) # assign ground truth x, y, w, h, confidence and class probs to y_batch # yolo[instance_count, grid_y, grid_x, ] = 0 yolo[instance_count, grid_y, grid_x, max_index%3, 0:4] = box yolo[instance_count, grid_y, grid_x, max_index%3, 4 ] = 1. yolo[instance_count, grid_y, grid_x, max_index%3, 5+obj_indx] = 1 # assign input image to x_batch x_batch[instance_count] = img/255. # increase instance counter in the current batch instance_count += 1 # yolo_1 = yolo_1.reshape((yolo_1.shape[0],yolo_1.shape[1],yolo_1.shape[2],3*(self.objects+5))) # print(yolo_1.shape) # return x_batch, yolo_3# [dummy_yolo_1] return x_batch, [yolo_1, yolo_2, yolo_3]# [dummy_yolo_1] return [x_batch, t_batch, yolo_1], [dummy_yolo_1]