def OverlapTextBBox(self, ipTextComp, ip_list): merge = Merger() ((SX, SY, EX, EY), ipop, ipprob, iptextID) = ipTextComp final_list = ip_list.copy() for ((startX, startY, endX, endY), op, prob, textID) in ip_list: if merge._is_rectangles_overlapped( Rectangle.from_2_pos(startX, startY, endX, endY), Rectangle.from_2_pos(SX, SY, EX, EY)): rec = merge._merge_2_rectangles( Rectangle.from_2_pos(startX, startY, endX, endY), Rectangle.from_2_pos(startX, startY, endX, endY)) mergeOp = " ".join(ipop.split() + list(set(op.split()) - set(ipop.split()))) mergeProb = (ipprob + prob) / 2 mergeID = textID final_list.remove( ((startX, startY, endX, endY), op, prob, textID)) final_list.append(((rec.x1, rec.y1, rec.x2, rec.y2), mergeOp, mergeProb, mergeID)) return True, final_list return False, None
def mergeOrAppend(self, c, shape, component, conf, imcpy, merge=0): hull = cv2.convexHull(c) x, y, width, height = cv2.boundingRect(c) area = cv2.contourArea(c) hullarea = cv2.contourArea(hull) solidity = float(area) / float(hullarea) merger = Merger() mergable_index = [] if merge == 1: mergable_index = self.getMergeIndex(component, c) if solidity > 0.4: if len(mergable_index) != 0: for ind in mergable_index: mergable_conponent = component[ind] x1, y1, w1, h1 = cv2.boundingRect(c) x2, y2, w2, h2 = cv2.boundingRect(mergable_conponent) merge_rec = merger._merge_2_rectangles( Rectangle.from_2_pos(x1, y1, x1 + w1, y1 + h1), Rectangle.from_2_pos(x2, y2, x2 + w2, y2 + h2)) merge_contour = np.array([[[merge_rec.x1, merge_rec.y1]], [[merge_rec.x2, merge_rec.y1]], [[merge_rec.x2, merge_rec.y2]], [[merge_rec.x1, merge_rec.y2]]], dtype=np.int32) component.pop(ind) prev_conf = conf.pop(ind) component.insert(ind, merge_contour) conf.insert(ind, max(prev_conf, solidity)) else: if shape == 'line': cv2.drawContours(imcpy, [c], 0, (255, 255, 0), 2) component.append(c) conf.append(solidity) elif shape == 'rectangle' or shape == 'square': cv2.drawContours(imcpy, [c], 0, (0, 255, 255), 2) component.append(c) conf.append(solidity) elif shape == 'pentagon' or shape == 'hexagon': cv2.drawContours(imcpy, [c], 0, (0, 0, 255), 2) component.append(c) conf.append(solidity) elif shape == 'circle': # or shape =='ellipse': cv2.drawContours(imcpy, [c], 0, (0, 255, 0), 2) component.append(c) conf.append(solidity) elif shape == 'triangle': cv2.drawContours(imcpy, [c], 0, (255, 0, 255), 2) component.append(c) conf.append(solidity) return (component, conf)
def getMergeIndex(self, component, c): merger = Merger() rects = self.convertContour2Rect(component) x, y, w, h = cv2.boundingRect(c) index = [] for i in range(0, len(rects)): if merger._is_rectangles_overlapped_horizontally( rects[i], Rectangle.from_2_pos(x, y, x + w, y + h)): index.append(i) return index
def getText(self, fileName, image, fig_text): spellcorr = sc() imcpy = image.copy() (height, width) = image.shape[:2] (new_height, new_width) = (self.size, self.size) im_resize = cv2.resize(image, (new_width, new_height)) # image2 rW = width / float(new_width) rH = height / float(new_height) layerNames = [ "feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3" ] net = cv2.dnn.readNet( os.path.dirname(os.path.abspath(__file__)) + "/frozen_east_text_detection.pb") blob = cv2.dnn.blobFromImage(im_resize, 1.0, (new_width, new_height), (123.68, 116.78, 103.94), swapRB=True, crop=False) net.setInput(blob) (scores, geometry) = net.forward(layerNames) (rects, confidences) = self.decodePredictions(scores, geometry) # applying non-maxima suppression to supppress weak and overlapping bounding boxes boxes = non_max_suppression(np.array(rects), probs=confidences, overlapThresh=self.overlapTh) rectangle_list = self.boxToRec(boxes) # Merge rectangles merger = Merger() rectangle_list = merger.merge_rectangle_list(rectangle_list) results = [] for rec in rectangle_list: startX = int(rec.x1 * rW) startY = int(rec.y1 * rH) endX = int(rec.x2 * rW) endY = int(rec.y2 * rH) dX = int((endX - startX) * self.padding) dY = int((endY - startY) * self.padding) # apply padding to each side of the bounding box, respectively startX = max(0, startX - dX) startY = max(0, startY - dY) endX = min(width, endX + (dX * 2)) endY = min(height, endY + (dY * 2)) # extract the actual padded ROI roi = imcpy[startY:endY, startX:endX] if (type(roi) is np.ndarray): gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) # Apply preprocessing to enhance the text gray_roi = cv2.resize(gray_roi, (0, 0), fx=1.5, fy=1.5) # interpolation = cv2.INTER_CUBIC) gray_roi = Image.fromarray(gray_roi) gray_roi = ImageEnhance.Sharpness(gray_roi).enhance(2) gray_roi = ImageEnhance.Contrast(gray_roi).enhance(2) output_dict = pytesseract.image_to_data(roi, config=self.tess_config, output_type='dict') # add the bounding box coordinates and OCR'd text to the list of results results.append(((startX, startY, endX, endY), output_dict)) # sort the results bounding box coordinates from top to bottom results = sorted(results, key=lambda r: r[0][1]) output = imcpy.copy() final_results = [] # loop over the results for ((startX, startY, endX, endY), output_dict) in results: # display the text OCR'd by Tesseract text_list = output_dict.get('text') prob_list = output_dict.get('conf') text = "" prob = 0 tot_text = 0 for index in range(len(text_list)): op = re.sub( r'^[^()+*-<>#=&{}[\]/\0-9a-zA-Z]*', '', re.sub(r'[^()+*-<>#=&{}[\]/\0-9a-zA-Z]*$', '', text_list[index])) op = "".join([c if ord(c) < 128 else "" for c in op]).strip() op = re.sub('\W+', '', op) if op != "": op_cor = spellcorr.correctWord(op, fig_text) #print("EAST op = %s, op_cor: %s"%(op, op_cor)) text = text + op_cor + " " prob = (prob + prob_list[index]) tot_text = tot_text + 1 if tot_text > 0: prob = prob / tot_text final_results.append( ((startX, startY, endX, endY), text, prob)) cv2.rectangle(output, (startX, startY), (endX, endY), (0, 0, 255), 1) cv2.putText(output, text, (startX, startY - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) return final_results
def combinedTextDetect(self, fileName, image, components, fig_text): final_text_im = image.copy() merge = Merger() combined_results = [] clean_combined_results = [] text_fullIm = self.getTextFullImage(fileName, image, fig_text) text_ROI = text_ROI = self.getTextEast(fileName, image, fig_text) text_componentIm = self.getTextComponent(fileName, components, image, fig_text) for ((startX1, startY1, endX1, endY1), op1, prob1) in text_componentIm: for ((startX2, startY2, endX2, endY2), op2, prob2) in text_fullIm: #print("Comparing %s within %s\n"% (op2, op1)) if merge._is_rectangles_inside( Rectangle.from_2_pos(startX1, startY1, endX1, endY1), Rectangle.from_2_pos(startX2, startY2, endX2, endY2)): #print("removing op2 = %s"%(op2)) text_fullIm.remove( ((startX2, startY2, endX2, endY2), op2, prob2)) break component_number = 0 for ((startX, startY, endX, endY), op, prob) in text_fullIm: #print("========") #print("Component %d: Position: (%d %d %d %d), text: %s, confidence = %d\n"% (component_number, startX, startY, endX, endY, op, prob)) component_number = component_number + 1 # sort the results bounding box coordinates from top to bottom text_fullIm = sorted(text_fullIm, key=lambda r: r[0][1]) text_ROI = sorted(text_ROI, key=lambda r: r[0][1]) text_componentIm = sorted(text_componentIm, key=lambda r: r[0][1]) ################ Find if there are same text extracted from multiple detection methods, then keep only the best one ##################### temp_combine = self.mergeOverlappingTextROI(text_fullIm, text_ROI, fig_text) combined_results = temp_combine.copy() combined_results = [ combined_results[i] for i in range(len(combined_results)) if i == 0 or combined_results[i] != combined_results[i - 1] ] combined_results = sorted(combined_results, key=lambda r: r[0][1]) comb = combinations(combined_results, 2) ################################### Check if there are horizontally near text boxes, then merge them #################################### for tup in list(comb): if (merge._is_rectangles_overlapped_horizontally( Rectangle.from_2_pos(tup[0][0][0], tup[0][0][1], tup[0][0][2], tup[0][0][3]), Rectangle.from_2_pos(tup[1][0][0], tup[1][0][1], tup[1][0][2], tup[1][0][3]))): merged_rec_pos = merge._merge_2_rectangles( Rectangle.from_2_pos(tup[0][0][0], tup[0][0][1], tup[0][0][2], tup[0][0][3]), Rectangle.from_2_pos(tup[1][0][0], tup[1][0][1], tup[1][0][2], tup[1][0][3])) if re.match('^[0-9a-zA-Z]*$', tup[0][1].lower().replace( " ", "")) and re.search( tup[0][1].lower().replace(" ", ""), tup[1][1].lower().replace(" ", ""), flags=0) is not None: merged_text = tup[1][1] merged_conf = tup[1][2] elif re.match('^[0-9a-zA-Z]*$', tup[1][1].lower().replace( " ", "")) and re.search( tup[1][1].lower().replace(" ", ""), tup[0][1].lower().replace(" ", ""), flags=0) is not None: merged_text = tup[0][1] merged_conf = tup[0][2] else: merged_text = self.getMergedText( tup[0][0], tup[0][1], tup[1][0], tup[1][1]) #pos1, text1, pos2, text2 merged_conf = int((tup[0][2] + tup[1][2]) / 2) if tup[0] in combined_results: combined_results.remove(tup[0]) if tup[1] in combined_results: combined_results.remove(tup[1]) combined_results.append( ((merged_rec_pos.x1, merged_rec_pos.y1, merged_rec_pos.x2, merged_rec_pos.y2), merged_text, merged_conf)) ################################################# Include text detected only from EAST detector ####################################### for ((startX2, startY2, endX2, endY2), op2, prob2) in text_ROI: find = 0 for ((startX1, startY1, endX1, endY1), op1, prob1) in temp_combine: if merge._is_rectangles_overlapped_horizontally( Rectangle.from_2_pos(startX1, startY1, endX1, endY1), Rectangle.from_2_pos(startX2, startY2, endX2, endY2)): find = 1 if find == 0: combined_results.append( ((startX2, startY2, endX2, endY2), op2, prob2)) ############################################ Include text detected only from Full Image Tesseract detector ############################# for ((startX3, startY3, endX3, endY3), op3, prob3) in text_fullIm: find = 0 for ((startX1, startY1, endX1, endY1), op1, prob1) in temp_combine: if merge._is_rectangles_overlapped_horizontally( Rectangle.from_2_pos(startX1, startY1, endX1, endY1), Rectangle.from_2_pos(startX3, startY3, endX3, endY3)): find = 1 if find == 0: combined_results.append( ((startX3, startY3, endX3, endY3), op3, prob3)) ############################################### Include text detected only from rotated components ###################################### for ((startX3, startY3, endX3, endY3), op3, prob3) in text_componentIm: find = 0 for ((startX1, startY1, endX1, endY1), op1, prob1) in temp_combine: if merge._is_rectangles_overlapped_horizontally( Rectangle.from_2_pos(startX1, startY1, endX1, endY1), Rectangle.from_2_pos(startX3, startY3, endX3, endY3)): find = 1 if find == 0: combined_results.append( ((startX3, startY3, endX3, endY3), op3, prob3)) combined_results = [ combined_results[i] for i in range(len(combined_results)) if i == 0 or combined_results[i] != combined_results[i - 1] ] combined_results = sorted(combined_results, key=lambda r: r[0][1]) ############################################ remove duplicate text boxes ########################################### for ((startX, startY, endX, endY), op, prob) in combined_results: find = 0 for ((startX1, startY1, endX1, endY1), op1, prob1) in clean_combined_results: if (startX1, startY1, endX1, endY1) == (startX, startY, endX, endY) and op == op1: find = 1 if find == 0: clean_combined_results.append( ((startX, startY, endX, endY), op, prob)) component_number = 1 for ((startX, startY, endX, endY), op, prob) in clean_combined_results: component_number = component_number + 1 cv2.rectangle(final_text_im, (startX, startY), (endX, endY), (0, 0, 255), 1) cv2.putText(final_text_im, op, (startX, startY - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1) clean_combined_results = sorted(clean_combined_results, key=lambda r: r[0][1]) return clean_combined_results
def mergeOverlappingTextROI(self, text_list1, text_list2, fig_text): merge = Merger() merged_text_list = [] spellcorr = sc() for ((startX1, startY1, endX1, endY1), op1, prob1) in text_list1: for ((startX2, startY2, endX2, endY2), op2, prob2) in text_list2: if merge._is_rectangles_overlapped_horizontally( Rectangle.from_2_pos(startX1, startY1, endX1, endY1), Rectangle.from_2_pos(startX2, startY2, endX2, endY2)): rec = merge._merge_2_rectangles( Rectangle.from_2_pos(startX1, startY1, endX1, endY1), Rectangle.from_2_pos(startX2, startY2, endX2, endY2)) sop1 = op1.strip() sop2 = op2.strip() if (sop1 == sop2): op = sop1 if prob1 > prob2: prob = prob1 else: prob = prob2 merged_text_list.append( ((rec.x1, rec.y1, rec.x2, rec.y2), op, prob)) elif re.match('^[0-9a-zA-Z]*$', sop1.lower().replace(" ", "")) and re.search( sop1.lower().replace(" ", ""), sop2.lower().replace(" ", ""), flags=0) is not None: op = sop2 prob = prob2 merged_text_list.append( ((rec.x1, rec.y1, rec.x2, rec.y2), op, prob)) elif re.match('^[0-9a-zA-Z]*$', sop2.lower().replace(" ", "")) and re.search( sop2.lower().replace(" ", ""), sop1.lower().replace(" ", ""), flags=0) is not None: op = sop1 prob = prob1 merged_text_list.append( ((rec.x1, rec.y1, rec.x2, rec.y2), op, prob)) else: matcher = difflib.SequenceMatcher(None, sop1, sop2) match = matcher.find_longest_match( 0, len(sop1), 0, len(sop2)) if (match.size > 1): if prob1 > prob2: remaining_text = sop2[0:match. b] + sop2[match.b + match.size:] if len(remaining_text) > 1: remaining_text = spellcorr.correctWord( remaining_text, fig_text) op = sop1 + " " + remaining_text else: op = sop1 prob = prob1 merged_text_list.append( ((rec.x1, rec.y1, rec.x2, rec.y2), op, prob)) else: remaining_text = sop1[0:match. a] + sop1[match.a + match.size:] if len(remaining_text) > 1: remaining_text = spellcorr.correctWord( remaining_text, fig_text) op = sop2 + " " + remaining_text else: op = sop2 prob = prob2 merged_text_list.append( ((rec.x1, rec.y1, rec.x2, rec.y2), op, prob)) else: if prob1 >= 50 and prob2 >= 50: if prob1 > prob2: op = sop1 + " " + sop2 prob = prob1 else: op = sop2 + " " + sop1 prob = prob2 merged_text_list.append( ((rec.x1, rec.y1, rec.x2, rec.y2), op, prob)) elif prob1 >= 50 and prob2 < 50: op = sop1 prob = prob1 merged_text_list.append( ((rec.x1, rec.y1, rec.x2, rec.y2), op, prob)) elif prob2 >= 50 and prob1 < 50: op = sop2 prob = prob2 merged_text_list.append( ((rec.x1, rec.y1, rec.x2, rec.y2), op, prob)) else: op = "NONE" prob = 0 #break # else: # combined_results.append(((startX2, startY2, endX2, endY2), op2, prob2)) # combined_results.append(((startX1, startY1, endX1, endY1), op1, prob1)) return merged_text_list