Beispiel #1
0
 def processLabeledImages(self):
     image = self.image_processing_list[0]
     boundries = ImageProcessing.GetLineBounds(image.imageArray)
     line_images_array = []
     for i in range(len(boundries)):
         x, y, w, h = boundries[i]
         cutImage = image.cutImage(image.imageArray, x, y, x + w, y + h)
         line_images_array.append(cutImage)
     Main.userSetLabel(line_images_array, self)
def extract_ines_of_text_from_image_and_match_label(imageArray, text,fontname, folderSave):
    lines_txt = text.split('\n')
    num_lines_txt = len(lines_txt)
    print(num_lines_txt)
    num_line=0
    newImagesForTrain = []

    width = imageArray.shape[1]
    height = imageArray.shape[0]
    boundries = ImageProcessing.GetLineBounds(imageArray)

    print(len(boundries))
    num_lines_image = len(boundries)
    for i in range(len(boundries)):
        x, y, w, h = boundries[i]
        cutImage = imageArray[max(0, min(y, y+h)-NUMPIXELS):min(max(y, y+h)+NUMPIXELS,height) , max(0, min(x, x+w)-NUMPIXELS) :min(width, max(x, x+w)+NUMPIXELS)]
        while lines_txt[num_line]=="" or lines_txt[num_line]==" " and num_line<num_lines_txt-1:
            num_line+=1
        if num_line<=num_lines_txt-1:
            Label = lines_txt[num_line]
            num_line += 1
            newImagesForTrain.append(ImageProcessing.ImageProcessing(cutImage, imagePath=None, handwrite_ID=fontname, Label = Label))

    Insert_to_folder(newImagesForTrain, folderSave)
def extract_ines_of_text_from_images_and_match_labels(folder, txtfile):
    font_name = os.path.basename(folder)[:-8] #remove "docx_" and "_images_"
    font_origin = font_name
    if len(font_name)>5:
        font_name = font_name[0:10]
        while font_name in names_of_fonts:
            font_name= font_name+"a"
    names_of_fonts.append(font_name)
    files = os.listdir(folder)

    if font_name in files:
        #shutil.rmtree(os.path.join(folder, font_name))
        return

    os.mkdir(os.path.join(folder, font_name))
    folderSave = os.path.join(folder, font_name)

    till_page = len(files)
    print(till_page )

    txt_file = open(txtfile, "r" , encoding="utf-8")
    text = txt_file.read()
    txt_file.close()

    lines_txt = text.split('\n')
    num_lines_txt = len(lines_txt)
    print(num_lines_txt)

    num_line=0
    newImagesForTrain = []
    num_lines_images = 0

    for i in range(till_page-1):
        print(i)
        image = os.path.join(folder, font_origin+"_"+str(i)+".tif")

        print(image)
        imageArray = cv.imread(image, 0)
        width = imageArray.shape[1]
        height = imageArray.shape[0]
        boundries = ImageProcessing.GetLineBounds(imageArray)
        #print(boundries)
        print(len(boundries))
        thispageSnum = num_lines_images
        num_lines_images += len(boundries)

        for i in range(len(boundries)):
            x, y, w, h = boundries[i]
            cutImage = imageArray[max(0, min(y, y+h)-NUMPIXELS):min(max(y, y+h)+NUMPIXELS,height) , max(0, min(x, x+w)-NUMPIXELS) :min(width, max(x, x+w)+NUMPIXELS)]
            while lines_txt[num_line]=="" or lines_txt[num_line]==" " and num_line<num_lines_txt-1:
                num_line+=1
            if num_line<=num_lines_txt-1:
                Label = lines_txt[num_line]
                print(Label)
                #cv.imshow("bb", cutImage)
                #cv.waitKey(0)
                num_line += 1
                print(thispageSnum+i)
                print(num_line)
                newImagesForTrain.append(ImageProcessing.ImageProcessing(cutImage, imagePath=None, handwrite_ID=font_name, Label = Label))
    Insert_to_folder(newImagesForTrain, folderSave)
    print(num_line, num_lines_images)