def text_detect(img,prob = 0.05): im = Image.fromarray(img) scale = IMGSIZE[0] w,h = im.size w_,h_ = resize_im(w,h, scale=scale, max_scale=2048)##短边固定为608,长边max_scale<4000 #boxed_image,f = letterbox_image(im, (w_,h_)) boxed_image = im.resize((w_,h_), Image.BICUBIC) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) # Add batch dimension. imgShape = np.array([[h,w]]) inputShape = np.array([[h_,w_]]) global graph with graph.as_default(): ##定义 graph变量 解决web.py 相关报错问题 """ pred = textModel.predict_on_batch([image_data,imgShape,inputShape]) box,scores = pred[:,:4],pred[:,-1] """ box,scores = sess.run( [box_score], feed_dict={ textModel.input: image_data, input_shape: [h_, w_], image_shape: [h, w], K.learning_phase(): 0 })[0] keep = np.where(scores>prob) box[:, 0:4][box[:, 0:4]<0] = 0 box[:, 0][box[:, 0]>=w] = w-1 box[:, 1][box[:, 1]>=h] = h-1 box[:, 2][box[:, 2]>=w] = w-1 box[:, 3][box[:, 3]>=h] = h-1 box = box[keep[0]] scores = scores[keep[0]] return box,scores
def show(p, scale=608): im = Image.open(p) xmlP = p.replace('.jpg', '.xml').replace('.png', '.xml') boxes = read_voc_xml(xmlP) print(boxes) im = Image.open(p) w, h = resize_im(im.size[0], im.size[1], scale=scale, max_scale=4096) input_shape = (h, w) isRoate = True rorateDegree = np.random.uniform(-5, 5) rorateDegree = 0 newBoxes, newIm = get_box_spilt(boxes, im, sizeW=w, SizeH=h, splitW=splitW, isRoate=isRoate, rorateDegree=rorateDegree) return plot_boxes(im, 0, boxes, color=(0, 0, 0)), plot_box(newIm, newBoxes), newBoxes
def get_xml_box_wh(self, filename): xmlP = filename.replace('.jpg', '.xml').replace('.png', '.xml') boxes = read_voc_xml(xmlP) im = Image.open(filename) scale = np.random.choice(self.scales, 1)[0] w, h = resize_im(im.size[0], im.size[1], scale=scale, max_scale=2048) input_shape = (h, w) isRoate = False rorateDegree = 0 newBoxes, newIm = get_box_spilt(boxes, im, sizeW=w, SizeH=h, splitW=self.splitW, isRoate=isRoate, rorateDegree=rorateDegree) box = [] for bx in newBoxes: w = int(bx[2] - bx[0]) h = int(bx[3] - bx[1]) box.append([w, h]) return box
def data_generator(roots, anchors, num_classes, splitW): ''' data generator for fit_generator @@roots:jpg/png ''' n = len(roots) np.random.shuffle(roots) scales = [416, 608, 608, 608] ##多尺度训练 i = 0 j = 0 m = len(scales) while True: root = roots[i] i += 1 if i >= n: i = 0 scale = scales[j] j += 1 if j >= m: j = 0 xmlP = os.path.splitext(root)[0] + '.xml' boxes = read_voc_xml(xmlP) im = Image.open(root) w, h = resize_im(im.size[0], im.size[1], scale=scale, max_scale=None) if max(w, h) > 2048: w, h = resize_im(im.size[0], im.size[1], scale=scale, max_scale=2048) input_shape = (h, w) isRoate = True rorateDegree = np.random.uniform(-5, 5) rorateDegree = 0 newBoxes, newIm = get_box_spilt(boxes, im, w, h, splitW=splitW, isRoate=isRoate, rorateDegree=rorateDegree) newBoxes = np.array(newBoxes) if len(newBoxes) == 0: continue if np.random.randint(0, 100) > 70: if np.random.randint(0, 100) > 50: ##图像水平翻转 newBoxes[:, [0, 2]] = w - newBoxes[:, [2, 0]] im = Image.fromarray(cv2.flip(np.array(im), 1)) else: ##垂直翻转 newBoxes[:, [1, 3]] = h - newBoxes[:, [3, 1]] im = Image.fromarray(cv2.flip(np.array(im), 0)) maxN = 128 ##随机选取128个box用于训练 image_data = [] box_data = [] image, box = get_random_data(newIm, newBoxes, input_shape, max_boxes=maxN) image_data = np.array([image]) box_data = np.array([box]) y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes) yield [image_data, *y_true], [np.zeros(1)] * 4