예제 #1
0
def find_content2(d, img, dir):  #寻找二级目录,有二级目录,则调用,否则不调用
    imgCopy = cv2.erode(img, tools.box(2, 2))
    w, h_origin = img.shape[::-1]
    img = img[slice(0, h_origin - 200), :]
    d.p2 = erosion = cv2.erode(img, tools.box(2, 10))
    d.p2 = dilate = cv2.dilate(erosion, tools.box(200, 20))

    _, contours, _ = cv2.findContours(dilate, cv2.RETR_EXTERNAL,
                                      cv2.CHAIN_APPROX_NONE)

    ContentPath2, Hight2, PagePath2 = [], [], []

    regions = np.zeros_like(dilate)
    for i in range(0, len(contours)):
        x, y, w, h = cv2.boundingRect(contours[i])
        cv2.rectangle(regions, (x, y), (x + w, y + h), (255, 255, 255), 5)
        print("x = ", x, " ;y = ", y, " ;w = ", w, " ;h = ", h)
        #        cv2.imwrite("/image/"+d.n+"_class2_content2_"+str(i+1)+".jpg",imgCopy[slice(y,y+h),slice(x,x+w)])

        ContentPath2.append("/image/" + d.n + "_class2_content2_" +
                            str(i + 1) + ".jpg")
        Hight2.append(h + y)
        PagePath2.append(0)

    d.p2 = regions
    dir.extend(list(zip(ContentPath2, Hight2, PagePath2)))
예제 #2
0
def contents(d, roi, lines, dir, isleft):  #roi已经在find_circle中自适应二值化
    roi_cpy = roi
    # roi = cv2.imread("/home/xxy/project/tsl_project/dir1_right.jpg",0)
    width, hight = roi.shape[::-1]
    kk = 0
    content_height = []
    ContentPath3, Height3, PagePath3, ClassPath = [], [], [], []

    for contour in lines:
        x, y, w, h = contour
        #cv2.rectangle(img, (x, y), (x + w, y + h), (255, 255, 0), 2)
        d.p2 = img1 = content_target = roi[slice(y, h + y + 20), slice(0, x)]
        d.p2 = img2 = page_target = roi[slice(y, h + y + 20),
                                        slice(x + w - 5, width - 50)]
        content_height.extend([y])

        if mean(content_target) < 10:
            content_target = roi[slice(y - h - 30, y + 10), slice(60, width)]
        elif len(content_height) > 1:
            height = content_height[kk] - content_height[kk - 1]
            # print("高的差值为: ",height)

            #判断每一个标题是都有多行
            # if height>(h+10+30)*1.5 and height<2.2*(h+10+30): #此处的10+30借鉴上面content_target = roi[slice(y-h-30,y+10),slice(60,width)]
            #     img1=roi[slice(y, h + y + 20), slice(0, x)]
            #     img2=roi[slice(y-h-20, y ), slice(0, width)]#拼接在前面的图像
            #     content_target=img_concatenate(img2,img1)
            #     sh3(content_target)

        content_target = cv2.erode(content_target, tools.box(2, 2))
        page_target = cv2.erode(page_target, tools.box(2, 2))
        # content_target = img_concatenate(img1, img2)

        cv2.imwrite(
            "./image/" + d.n + "-content-3-" + str(kk) + isleft + ".jpg",
            content_target)
        cv2.imwrite("./image/" + d.n + "-page-3-" + str(kk) + isleft + ".jpg",
                    page_target)

        ContentPath3.append("./image/" + d.n + "-content-3-" + str(kk) +
                            isleft + ".jpg")
        PagePath3.append("./image/" + d.n + "-page-3-" + str(kk) + isleft +
                         ".jpg")

        Height3.append(h + y + 10)
        ClassPath.append(0)

        roi[slice(y, y + h + 10), slice(0, width)] = 0
        roi[slice(y - h - 30, y + 10), slice(0, width)] = 0
        roi[slice(y, h + y + 20), slice(0, x)] = 0
        roi[slice(y - h - 20, y), slice(0, width)] = 0

        kk += 1
    dir.extend(list(zip(ContentPath3, Height3, PagePath3, ClassPath)))
    d.p2 = roi
    dir.append(find_content2(d, roi, dir))
    return dir
예제 #3
0
def contents(d, roi, lines, dir):  #roi已经在find_circle中自适应二值化
    roi_cpy = roi
    # roi = cv2.imread("/home/xxy/project/tsl_project/dir1_right.jpg",0)
    width, hight = roi.shape[::-1]
    kk = 0
    content_height = []
    ContentPath3, Height3, PagePath3, ClassPath = [], [], [], []

    for contour in lines:
        x, y, w, h = contour
        #cv2.rectangle(img, (x, y), (x + w, y + h), (255, 255, 0), 2)
        d.p2 = img1 = content_target = roi[slice(y, h + y + 20), slice(0, x)]
        d.p2 = img2 = page_target = roi[slice(y, h + y + 20),
                                        slice(x + w - 5, width - 50)]
        content_height.extend([y])
        for i in content_height:
            pass
            # print ("content_height = ",i)
        # print ("均值 = ",mean(content_target))
        if mean(content_target) < 10:
            content_target = roi[slice(y - h - 30, y + 10), slice(60, width)]
        elif len(content_height) > 1:
            height = content_height[kk] - content_height[kk - 1]
            # print("高的差值为: ",height)
            if height > 1.65 * h and height < 2.5 * h:
                img1 = roi[slice(y, h + y + 20), slice(0, x)]
                img2 = roi[slice(y - h - 20, y), slice(0, x)]
                img = img_concatenate(img1, img2)
                sh3(img)
        content_target = cv2.erode(content_target, tools.box(2, 2))

        page_target = cv2.erode(page_target, tools.box(2, 2))
        # content_target = img_concatenate(img1, img2)
        cv2.imwrite("./image/" + d.n + "-content-3-" + str(kk) + ".jpg",
                    content_target)

        cv2.imwrite("/image" + d.n + "_class3_pageNUM_" + str(kk) + ".jpg",
                    page_target)
        PagePath3.append("./image/" + d.n + "_class3_pageNUM_" + str(kk) +
                         ".jpg")
        ContentPath3.append("./image/" + d.n + "_class3_content_" + str(kk) +
                            ".jpg")
        Height3.append(h + y + 10)
        ClassPath.append(0)

        # PagePath3.append(path3+d.n+"_class3_pageNUM_"+str(kk)+".jpg")

        roi[slice(y, y + h + 10), slice(0, width)] = 0
        roi[slice(y - h - 30, y + 10), slice(0, width)] = 0

        kk += 1
    dir.extend(list(zip(ContentPath3, Height3, PagePath3, ClassPath)))
    # CONTENT.append(list(zip(ContentPath3,Height3,PagePath3)))
    d.p2 = roi
    return dir
예제 #4
0
def get_content_roi(d, mat):
    copy_mat = mat
    d.f = "get_content_roi"
    d.p2 = img = cv2.adaptiveThreshold(copy_mat, 255,
                                       cv2.ADAPTIVE_THRESH_MEAN_C,
                                       cv2.THRESH_BINARY_INV, 55, 25)
    w, h = mat.shape[::-1]
    w4, h4 = int(w / 4), int(h / 4)

    d.p2 = top_roi = img[slice(0, h4), slice(w4, 3 * w4)]
    ret, top_thresh = cv2.threshold(top_roi, 0, 255, cv2.THRESH_OTSU)
    d.p2 = mask = cv2.dilate(top_thresh, tools.box(150, 80))
    _, contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
                                      cv2.CHAIN_APPROX_NONE)
    _, boundingBoxes = tools.sort_contours(contours)
    top = boundingBoxes[0][3]

    d.p2 = left_roi = img[slice(h4, 3 * h4), slice(0, w4)]
    ret, left_thresh = cv2.threshold(left_roi, 0, 255, cv2.THRESH_OTSU)
    d.p2 = mask = cv2.dilate(left_thresh, tools.box(100, 10))
    _, contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
                                      cv2.CHAIN_APPROX_NONE)
    _, boundingBoxes = tools.sort_contours(contours, method="left-right")
    left = boundingBoxes[0][2]

    d.p2 = right_roi = img[slice(h4, 3 * h4), slice(3 * w4, w)]
    ret, right_thresh = cv2.threshold(right_roi, 0, 255, cv2.THRESH_OTSU)
    d.p2 = mask = cv2.dilate(right_thresh, tools.box(100, 10))
    _, contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
                                      cv2.CHAIN_APPROX_NONE)
    _, boundingBoxes = tools.sort_contours(contours, method="left-right")
    right = 3 * w4 + boundingBoxes[-1][0]

    d.p2 = bottom_roi = img[slice(3 * h4, h), slice(w4, 3 * w4)]
    ret, botton_thresh = cv2.threshold(bottom_roi, 0, 255, cv2.THRESH_OTSU)
    d.p2 = mask = cv2.dilate(botton_thresh, tools.box(2, 100))
    _, contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
                                      cv2.CHAIN_APPROX_NONE)
    _, boundingBoxes = tools.sort_contours(contours)
    bottom = 3 * h4 + boundingBoxes[-1][1]

    d.p2 = content = copy_mat[slice(top, bottom), slice(left, right)]
    # return content
    # return [slice(top,bottom), slice(left,right)]
    return [top, bottom, left, right]
예제 #5
0
def dp_split_height2(d, roi, dir):  #roi已经在find_circle中自适应二值化
    # roi_copy = roi
    # roi = cv2.adaptiveThreshold(roi, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 55, 25)
    mask = cv2.dilate(roi, tools.box(2, 2))
    tf = cv2.imread("./dot.7.7.jpg")
    tf2 = cv2.cvtColor(tf, cv2.COLOR_RGB2GRAY)
    #template = cv2.adaptiveThreshold(tf2, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 55, 25)
    res = cv2.matchTemplate(roi, tf2, cv2.TM_CCORR_NORMED)
    threshold = 0.7
    loc = np.where(res >= threshold)
    regions = np.zeros_like(roi)
    # found all match dot image point
    for pt in zip(*loc[::-1]):
        cv2.rectangle(regions, pt, (pt[0] + 10, pt[1] + 5), (255, 255, 0), 2)
    d.p2 = regions
    regions = cv2.dilate(regions, tools.box(5, 5))

    _, contours, _ = cv2.findContours(regions, cv2.RETR_EXTERNAL,
                                      cv2.CHAIN_APPROX_NONE)
    lines = tools.contact_contours(contours)
    regions = np.zeros_like(roi)
    i = 0
    width, height = tf2.shape[::-1]
    for contour in lines:
        x, y, w, h = contour
        contour[2] = contour[2] + width - 10
        contour[3] = height + 2
    for contour in lines:
        x, y, w, h = contour
        cv2.rectangle(regions, (x, y), (x + w, y + h), (255, 255, 0), 2)
        # cv2.putText(regions, "{}".format(i + 1), (x + 20, y + 20), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 0), 2)
        i += 1
    d.p2 = regions
    cv2.imwrite("./dot_match.png", regions)
    # ret,roi_for_contents = cv2.threshold(roi_copy,100,255,cv2.THRESH_OTSU)
    content = contents(d, roi, lines, dir)
    return content
예제 #6
0
def find_mid_line(d, img):
    mat = img
    w, h = mat.shape[::-1]
    d.p2 = mat = mat[:, int(0.4 * w):int(0.6 * w)]

    minLineLength = int(h / 2)
    maxLineGap = 15

    d.p2 = mat_adaptive = cv2.adaptiveThreshold(mat, 255,
                                                cv2.ADAPTIVE_THRESH_MEAN_C,
                                                cv2.THRESH_BINARY_INV, 101, 15)

    d.p2 = mask = cv2.dilate(mat_adaptive, tools.box(3, 10))
    lines = cv2.HoughLinesP(mask, 1, np.pi / 180, 30, minLineLength,
                            maxLineGap)

    for x1, y1, x2, y2 in lines[0]:
        print("x1=", x1, "; y1=", y1, "; x2=", x2, "; y2=", y2)
        d.p2 = drae_line = cv2.line(mat, (x1, y1), (x2, y2), (255, 255, 0), 10)

    loc = int(0.5 * (x1 + x2) + 0.4 * w)
    return loc