def read_label(self, label_file, names_dict, anchors): ''' parsement label_file, and generates label_y1, label_y2, label_y3 return:label_y1, label_y2, label_y3 ''' contents = tools.parse_voc_xml(label_file, names_dict) if not contents: return None, None, None # flip the label if self.is_flip: self.is_flip = False for i in range(len(contents)): contents[i][1] = 1.0 - contents[i][1] label_y1 = np.zeros((self.height // 32, self.width // 32, 3, 5 + self.class_num), np.float32) label_y2 = np.zeros((self.height // 16, self.width // 16, 3, 5 + self.class_num), np.float32) label_y3 = np.zeros((self.height // 8, self.width // 8, 3, 5 + self.class_num), np.float32) delta = self.smooth_delta if delta: label_y1[:, :, :, 4] = delta / self.class_num label_y2[:, :, :, 4] = delta / self.class_num label_y3[:, :, :, 4] = delta / self.class_num y_true = [label_y3, label_y2, label_y1] ratio = {0:8, 1:16, 2:32} test_result = [] for label in contents: label_id = int(label[0]) box = np.asarray(label[1: 5]).astype(np.float32) # the value saved in label is x,y,w,h test_result.append([box[0]-box[2]/2, box[1]-box[3]/2, box[0]+box[2]/2, box[1]+box[3]/2]) best_giou = 0 best_index = 0 for i in range(len(anchors)): min_wh = np.minimum(box[2:4], anchors[i]) max_wh = np.maximum(box[2:4], anchors[i]) giou = (min_wh[0] * min_wh[1]) / (max_wh[0] * max_wh[1]) if giou > best_giou: best_giou = giou best_index = i # 012->0, 345->1, 678->2 x = int(np.floor(box[0] * self.width / ratio[best_index // 3])) y = int(np.floor(box[1] * self.height / ratio[best_index // 3])) k = best_index % 3 y_true[best_index // 3][y, x, k, 0:4] = box # label smooth label_value = 1.0 if not delta else (1-delta) y_true[best_index // 3][y, x, k, 5:] = delta/self.class_num y_true[best_index // 3][y, x, k, 4:5] = label_value y_true[best_index // 3][y, x, k, 5 + label_id] = label_value return label_y1, label_y2, label_y3, test_result
def parse_voc_xml(self, xml_file): ''' return: ids and boxes are integrate list ''' # 这里的box的 [name_id, xmin, ymin, xmax, ymax] 是整数 contents = tools.parse_voc_xml(xml_file, names_dict=self.names_dict, get_ori=True) ids = [] boxes = [] for content in contents: ids.append(content[0]) boxes.append(content[1:]) return ids, boxes
def read_label(self, label_file, names_dict): ''' 读取 label_file, 并生成 label_y1, label_y2, label_y3 return:label_y1, label_y2, label_y3 ''' contents = tools.parse_voc_xml(label_file, names_dict) if not contents: return None, None, None label_y1 = np.zeros((self.height // 32, self.width // 32, 3, 5 + self.class_num), np.float32) label_y2 = np.zeros((self.height // 16, self.width // 16, 3, 5 + self.class_num), np.float32) label_y3 = np.zeros((self.height // 8, self.width // 8, 3, 5 + self.class_num), np.float32) y_true = [label_y3, label_y2, label_y1] ratio = {0:8, 1:16, 2:32} for label in contents: label_id = int(label[0]) box = np.asarray(label[1: 5]).astype(np.float32) # label中保存的就是 x,y,w,h best_giou = 0 best_index = 0 for i in range(len(self.anchors)): min_wh = np.minimum(box[2:4], self.anchors[i]) max_wh = np.maximum(box[2:4], self.anchors[i]) giou = (min_wh[0] * min_wh[1]) / (max_wh[0] * max_wh[1]) if giou > best_giou: best_giou = giou best_index = i # 012->0, 345->1, 678->2 x = int(np.floor(box[0] * self.width / ratio[best_index // 3])) y = int(np.floor(box[1] * self.height / ratio[best_index // 3])) k = best_index % 3 y_true[best_index // 3][y, x, k, 0:4] = box y_true[best_index // 3][y, x, k, 4:5] = 1.0 y_true[best_index // 3][y, x, k, 5 + label_id] = 1.0 return label_y1, label_y2, label_y3
def read_label(self, label_file, names_dict, anchors, new_w, new_h): ''' 读取 label_file, 并生成 label_y1, label_y2, label_y3 new_w:缩放以后的图片宽,真值 new_h:缩放以后的图片高,真值 return:label_y1, label_y2, label_y3 ''' contents = tools.parse_voc_xml(label_file, names_dict) if not contents: return None, None, None # flip the label if self.flip_img: for i in range(len(contents)): contents[i][1] = 1.0 - contents[i][1] if self.keep_img_shape: x_pad = (self.width - new_w) // 2 y_pad = (self.height - new_h) // 2 label_y1 = np.zeros( (self.height // 32, self.width // 32, 3, 5 + self.class_num), np.float32) label_y2 = np.zeros( (self.height // 16, self.width // 16, 3, 5 + self.class_num), np.float32) label_y3 = np.zeros( (self.height // 8, self.width // 8, 3, 5 + self.class_num), np.float32) y_true = [label_y3, label_y2, label_y1] ratio = {0: 8, 1: 16, 2: 32} test_result = [] for label in contents: label_id = int(label[0]) box = np.asarray(label[1:5]).astype( np.float32) # label中保存的就是 x,y,w,h if self.keep_img_shape: # 加入填充的黑边宽高,重新修正坐标 box[0:2] = (box[0:2] * [new_w, new_h] + [x_pad, y_pad]) / [self.width, self.height] box[2:4] = (box[2:4] * [new_w, new_h]) / [self.width, self.height] test_result.append([ box[0] - box[2] / 2, box[1] - box[3] / 2, box[0] + box[2] / 2, box[1] + box[3] / 2 ]) best_giou = 0 best_index = 0 for i in range(len(anchors)): min_wh = np.minimum(box[2:4], anchors[i]) max_wh = np.maximum(box[2:4], anchors[i]) giou = (min_wh[0] * min_wh[1]) / (max_wh[0] * max_wh[1]) if giou > best_giou: best_giou = giou best_index = i # 012->0, 345->1, 678->2 x = int(np.floor(box[0] * self.width / ratio[best_index // 3])) y = int(np.floor(box[1] * self.height / ratio[best_index // 3])) k = best_index % 3 y_true[best_index // 3][y, x, k, 0:4] = box # label smooth label_value = 1.0 if not self.label_smooth else ( (1 - self.smooth_delta) + self.smooth_delta * 1 / self.class_num) y_true[best_index // 3][y, x, k, 4:5] = label_value y_true[best_index // 3][ y, x, k, 5: -1] = 0.0 if not self.label_smooth else self.smooth_delta / self.class_num y_true[best_index // 3][y, x, k, 5 + label_id] = label_value return label_y1, label_y2, label_y3, test_result