def main(): global args args = parser.parse_args() ## crop my_roi = PolygonDrawer("Crop", args.input_img) my_roi.run() ## preprocessing zoom("./input/crop_" + args.input_img.split('/')[-1], args.zoom_parameter, args.show_image) threshold("./input/zoom_crop_" + args.input_img.split('/')[-1], "global") ## tesseract img_to_str("./input/threshold_zoom_crop_" + args.input_img.split('/')[-1])
def splitDataset(filename): img, _ = preprocess.bounding_letter(preprocess.threshold(cv2.imread(filename, cv2.CV_LOAD_IMAGE_GRAYSCALE))) let = (path.basename(filename)).split(".")[0] i = 0 print("Filing dataset with letter '{}'".format(let)) for letter in img: if letter.shape[0] > 5 and letter.shape[1] > 5: cv2.imwrite("./dataset/" + let + str(i) + ".bmp", preprocess.erode(preprocess.erode(letter))) i += 1
def splitDataset(filename): img, _ = preprocess.bounding_letter( preprocess.threshold(cv2.imread(filename, cv2.CV_LOAD_IMAGE_GRAYSCALE))) let = (path.basename(filename)).split(".")[0] i = 0 print("Filing dataset with letter '{}'".format(let)) for letter in img: if letter.shape[0] > 5 and letter.shape[1] > 5: cv2.imwrite("./dataset/" + let + str(i) + ".bmp", preprocess.erode(preprocess.erode(letter))) i += 1
def extractWords(image, lines, average_letter_size): # apply bilateral filter filtered = preprocess.bilateralFilter(image, 5) # convert to grayscale and binarize the image by INVERTED binary thresholding thresh = preprocess.threshold(filtered, 180) width = thresh.shape[1] space_zero = [] # stores the amount of space between words words = [ ] # a 2D list storing the coordinates of each word: y1, y2, x1, x2 # Isolated words or components will be extacted from each line by looking at occurance of 0's in its vertical projection. for i, line in enumerate(lines): extract = thresh[int(line[0]):int(line[1]), 0:width] # y1:y2, x1:x2 vp = preprocess.verticalProjection(extract) #print i #print vp wordStart = 0 wordEnd = 0 spaceStart = 0 spaceEnd = 0 indexCount = 0 setWordStart = True setSpaceStart = True includeNextSpace = True spaces = [] # we are scanning the vertical projection for j, sum in enumerate(vp): # sum being 0 means blank space if (sum == 0): if (setSpaceStart): spaceStart = indexCount setSpaceStart = False # spaceStart will be set once for each start of a space between lines indexCount += 1 spaceEnd = indexCount if ( j < len(vp) - 1 ): # this condition is necessary to avoid array index out of bound error if ( vp[j + 1] == 0 ): # if the next vertical projectin is 0, keep on counting, it's still in blank space continue # we ignore spaces which is smaller than half the average letter size if ((spaceEnd - spaceStart) > int(average_letter_size / 2)): spaces.append(spaceEnd - spaceStart) setSpaceStart = True # next time we encounter 0, it's begining of another space so we set new spaceStart # sum greater than 0 means word/component if (sum > 0): if (setWordStart): wordStart = indexCount setWordStart = False # wordStart will be set once for each start of a new word/component indexCount += 1 wordEnd = indexCount if ( j < len(vp) - 1 ): # this condition is necessary to avoid array index out of bound error if ( vp[j + 1] > 0 ): # if the next horizontal projectin is > 0, keep on counting, it's still in non-space zone continue # append the coordinates of each word/component: y1, y2, x1, x2 in 'words' # we ignore the ones which has height smaller than half the average letter size # this will remove full stops and commas as an individual component count = 0 for k in range(int(line[1]) - int(line[0])): row = thresh[int(line[0] + k):int(line[0] + k + 1), wordStart:wordEnd] # y1:y2, x1:x2 if (np.sum(row)): count += 1 if (count > int(average_letter_size / 2)): words.append([line[0], line[1], wordStart, wordEnd]) setWordStart = True # next time we encounter value > 0, it's begining of another word/component so we set new wordStart space_zero.extend(spaces[1:-1]) #print space_zero space_columns = np.sum(space_zero) space_count = len(space_zero) if (space_count == 0): space_count = 1 average_word_spacing = float(space_columns) / space_count relative_word_spacing = average_word_spacing / average_letter_size return average_word_spacing, words
def letterSize(image): filtered = preprocess.bilateralFilter(image, 5) thresh = preprocess.threshold(filtered, 160) # extract a python list containing values of the horizontal projection of the image into 'hp' hpList = preprocess.horizontalProjection(thresh) indexCount = 0 setLineTop = True lines = [] lineTop = 0 lineBottom = 0 spaceTop = 0 spaceBottom = 0 setSpaceTop = True includeNextSpace = True space_zero = [] # stores the amount of space between lines # we are scanning the whole horizontal projection now for i, sum in enumerate(hpList): # sum being 0 means blank space if (sum == 0): if (setSpaceTop): spaceTop = indexCount setSpaceTop = False # spaceTop will be set once for each start of a space between lines indexCount += 1 spaceBottom = indexCount if ( i < len(hpList) - 1 ): # this condition is necessary to avoid array index out of bound error if ( hpList[i + 1] == 0 ): # if the next horizontal projectin is 0, keep on counting, it's still in blank space continue # we are using this condition if the previous contour is very thin and possibly not a line if (includeNextSpace): space_zero.append(spaceBottom - spaceTop) else: if (len(space_zero) == 0): previous = 0 else: previous = space_zero.pop() space_zero.append(previous + spaceBottom - lineTop) setSpaceTop = True # next time we encounter 0, it's begining of another space so we set new spaceTop # sum greater than 0 means contour if (sum > 0): if (setLineTop): lineTop = indexCount setLineTop = False # lineTop will be set once for each start of a new line/contour indexCount += 1 lineBottom = indexCount if ( i < len(hpList) - 1 ): # this condition is necessary to avoid array index out of bound error if ( hpList[i + 1] > 0 ): # if the next horizontal projectin is > 0, keep on counting, it's still in contour continue # if the line/contour is too thin <10 pixels (arbitrary) in height, we ignore it. # Also, we add the space following this and this contour itself to the previous space to form a bigger space: spaceBottom-lineTop. if (lineBottom - lineTop < 20): includeNextSpace = False setLineTop = True # next time we encounter value > 0, it's begining of another line/contour so we set new lineTop continue includeNextSpace = True # the line/contour is accepted, new space following it will be accepted # append the top and bottom horizontal indices of the line/contour in 'lines' lines.append([lineTop, lineBottom]) setLineTop = True # next time we encounter value > 0, it's begining of another line/contour so we set new lineTop fineLines = [ ] # a 2D list storing the horizontal start index and end index of each individual line for i, line in enumerate(lines): anchor = line[ 0] # 'anchor' will locate the horizontal indices where horizontal projection is > ANCHOR_POINT for uphill or < ANCHOR_POINT for downhill(ANCHOR_POINT is arbitrary yet suitable!) anchorPoints = [ ] # python list where the indices obtained by 'anchor' will be stored upHill = True # it implies that we expect to find the start of an individual line (vertically), climbing up the histogram downHill = False # it implies that we expect to find the end of an individual line (vertically), climbing down the histogram segment = hpList[int(line[0]):int( line[1] )] # we put the region of interest of the horizontal projection of each contour here for j, sum in enumerate(segment): if (upHill): if (sum < ANCHOR_POINT): anchor += 1 continue anchorPoints.append(anchor) upHill = False downHill = True if (downHill): if (sum > ANCHOR_POINT): anchor += 1 continue anchorPoints.append(anchor) downHill = False upHill = True #print anchorPoints # we can ignore the contour here ''' # the contour turns out to be an individual line if(len(anchorPoints)<=3): fineLines.append(line) continue ''' # len(anchorPoints) > 3 meaning contour composed of multiple lines lineTop = line[0] for x in range(1, len(anchorPoints) - 1, 2): # 'lineMid' is the horizontal index where the segmentation will be done lineMid = (anchorPoints[x] + anchorPoints[x + 1]) / 2 lineBottom = lineMid # line having height of pixels <20 is considered defects, so we just ignore it # this is a weakness of the algorithm to extract lines (anchor value is ANCHOR_POINT, see for different values!) if (lineBottom - lineTop < 20): continue fineLines.append([lineTop, lineBottom]) lineTop = lineBottom if (line[1] - lineTop < 20): continue fineLines.append([lineTop, line[1]]) space_nonzero_row_count = 0 midzone_row_count = 0 lines_having_midzone_count = 0 flag = False for i, line in enumerate(fineLines): segment = hpList[int(line[0]):int(line[1])] for j, sum in enumerate(segment): if (sum < MIDZONE_THRESHOLD): space_nonzero_row_count += 1 else: midzone_row_count += 1 flag = True # This line has contributed at least one count of pixel row of midzone if (flag): lines_having_midzone_count += 1 flag = False # error prevention ^-^ if (lines_having_midzone_count == 0): lines_having_midzone_count = 1 total_space_row_count = space_nonzero_row_count + np.sum(space_zero[1:-1]) average_line_spacing = float( total_space_row_count) / lines_having_midzone_count average_letter_size = float(midzone_row_count) / lines_having_midzone_count # letter size is actually height of the letter and we are not considering width LETTER_SIZE = average_letter_size # error prevention ^-^ if (average_letter_size == 0): average_letter_size = 1 # We can't just take the average_line_spacing as a feature directly. We must take the average_line_spacing relative to average_letter_size. # Let's take the ratio of average_line_spacing to average_letter_size as the LINE SPACING, which is perspective to average_letter_size. relative_line_spacing = average_line_spacing / average_letter_size return average_letter_size, relative_line_spacing, fineLines
def straighten(image): global BASELINE_ANGLE angle = 0.0 angle_sum = 0.0 countour_count = 0 # filtered = bilateralFilter(image, 3) # cv2.imshow('filtered',filtered) # convert to grayscale and binarize the image by INVERTED binary thresholding thresh = preprocess.threshold(image, 120) kernel = np.ones((2, 2)) #cv2.imshow('thresh',thresh) #cv2.waitKey(0) mat = preprocess.horizontalProjection(thresh) img_w = thresh.shape[1] #print("width: ", img_w) #print(type(thresh),thresh.shape) for i in range(len(mat)): # print(mat[i]/255) if (mat[i] / 255.0) < ( img_w / 70.0 ): # Mat is sum of pixels in each row. Divide by 255 to get number of black lines for j in range(thresh[i].shape[0]): thresh[i][j] = 0 #cv2.imshow('thresh', thresh) #cv2.waitKey(0) dil = 50 dilated = preprocess.dilate(thresh, (5, dil)) #cv2.imshow('Dilated', dilated) #cv2.waitKey(0) while (True): ctrs, heirarchy = cv2.findContours(dilated.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[-2:] if len(ctrs) > 3: break dil -= 5 dilated = preprocess.dilate(thresh, (5, dil)) #print(len(ctrs)) for i, ctr in enumerate(ctrs): x, y, w, h = cv2.boundingRect(ctr) # print(x,y,w,h) # We can be sure the contour is not a line if height > width or height is < 20 pixels. Here 20 is arbitrary. if h < 5: continue # We extract the region of interest/contour to be straightened. roi = image[y:y + h, x:x + w] #rows, cols = ctr.shape[:2] # If the length of the line is less than half the document width, especially for the last line, # ignore because it may yeild inacurate baseline angle which subsequently affects proceeding features. if w < image.shape[1] / 7: roi = 255 image[y:y + h, x:x + w] = roi continue # minAreaRect is necessary for straightening rect = cv2.minAreaRect(ctr) center = rect[0] angle = rect[2] #print "original: "+str(i)+" "+str(angle) # I actually gave a thought to this but hard to remember anyway! if angle < -45.0: angle += 90.0 #print "+90 "+str(i)+" "+str(angle) rot = cv2.getRotationMatrix2D(((x + w) / 2, (y + h) / 2), angle, 1) #extract = cv2.warpAffine(roi, rot, (w,h), borderMode=cv2.BORDER_TRANSPARENT) extract = cv2.warpAffine(roi, rot, (w, h), borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255)) #cv2.imshow('warpAffine:'+str(i),extract) # image is overwritten with the straightened contour image[y:y + h, x:x + w] = extract ''' # Please Ignore. This is to draw visual representation of the contour rotation. box = cv2.boxPoints(rect) box = np.int0(box) cv2.drawContours(display,[box],0,(0,0,255),1) cv2.rectangle(display,(x,y),( x + w, y + h ),(0,255,0),1) ''' # print(angle) angle_sum += angle countour_count += 1 ''' # sum of all the angles of downward baseline if(angle>0.0): positive_angle_sum += angle positive_count += 1 # sum of all the angles of upward baseline else: negative_angle_sum += angle negative_count += 1 if(positive_count == 0): positive_count = 1 if(negative_count == 0): negative_count = 1 average_positive_angle = positive_angle_sum / positive_count average_negative_angle = negative_angle_sum / negative_count print "average_positive_angle: "+str(average_positive_angle) print "average_negative_angle: "+str(average_negative_angle) if(abs(average_positive_angle) > abs(average_negative_angle)): average_angle = average_positive_angle else: average_angle = average_negative_angle print "average_angle: "+str(average_angle) ''' #cv2.imshow('countours', display) # mean angle of the contours (not lines) is found mean_angle = angle_sum / countour_count BASELINE_ANGLE = mean_angle #print("Average baseline angle: "+str(mean_angle)) return mean_angle, image