Exemple #1
0
 def printLine(self, line):
     sty = style.currentStyle()
     fontName = sty.font.name
     fontSize = sty.fontsize
     lineWidth = fitz.getTextlength(line, fontName, fontSize)
     budget = width - margin - self.indent - self.insertPoint.x
     if lineWidth < budget:
         self.printSegment(line)
     else:
         prefix = line
         suffix = line
         while True:
             index = line.rfind(" ", 0, len(prefix))
             if index == -1:  # No spaces found.  Print it on the next line
                 self.cr(chr(0xAC))
                 self.printSegment(suffix)
                 break
             prefix = line[:index]
             prefixWidth = fitz.getTextlength(prefix,
                                              fontname=fontName,
                                              fontsize=fontSize)
             if prefixWidth > budget:  # Still too large.  Chop again.
                 continue
             else:
                 self.printSegment(prefix)
                 suffix = line[index + 1:]
                 # Recurse for the remainder.  It might be too long as well.
                 self.cr(chr(0xAC))
                 self.printLine(suffix)
                 break
Exemple #2
0
def createTextbox(textfile, textboxRect, page, path):

    fontSize = 12
    linecounter = 0

    # Extracts text from file and eliminates blank lines
    # Counts the number of lines in the file
    with open(textfile) as input_txt:
        lineString = ""
        lineArray = []
        for line in input_txt:
            if not line.isspace():
                lineString = lineString + line
                lineArray.append(line)
                linecounter = linecounter + 1
                lineLength = fitz.getTextlength(line, "Times-Roman", fontSize)
                while (lineLength > textboxRect.width):
                    linecounter = linecounter + 1
                    lineLength = lineLength - textboxRect.width

    # At font size 12, textbox can hold up to 10 lines of text
    # Decreases font size by increments of 4 while line capacity is exceeded
    lineCapacity = 10
    while (linecounter > lineCapacity):

        # Line capacity of textbox increases by the percent increase from each font size decrement
        # Ex: 12 --> 8 makes for a 50% increase in line capacity
        percentIncrease = calculatePercentIncrease(fontSize, fontSize - 2)
        lineCapacity = lineCapacity + (lineCapacity * percentIncrease)

        # Decrement font size by increment of 2
        fontSize = fontSize - 2
        linecounter = 0

        # Recalculate the number of lines
        # This is necessary since decreasing font size may decrease the number of lines
        # When text exceeds horizontal boundaries, a new line is started
        for line in lineArray:
            linecounter = linecounter + 1
            lineLength = fitz.getTextlength(line, "Times-Roman", fontSize)
            while (lineLength > textboxRect.width):
                linecounter = linecounter + 1
                lineLength = lineLength - textboxRect.width

    # Generate textbox with text
    textboxShape = page.new_shape()
    textboxShape.draw_rect(textboxRect)
    textboxShape.finish(color=getColor("dodgerblue4"),
                        fill_opacity=1,
                        closePath=False)
    textboxShape.commit()
    page.insertTextbox(textboxRect,
                       lineString,
                       fontsize=fontSize,
                       fontname="Times-Roman",
                       align=0)

    return
Exemple #3
0
    def insert_fields(self, fields, form_type, PdfProcess):
        for _, field in enumerate(fields):
            document_id = field["document_id"]
            value = field["value"].upper()
            location = field["location"]
            index = field["index"] if "index" in field else 0

            rect = PdfProcess.find_text(document_id, index=index)

            if rect is None:
                continue

            max_width = 0
            if form_type == "con29r":
                max_width = 200
            elif form_type == "llc1":
                max_width = 200
            elif form_type == "con29o":
                max_width = 175

            point = (rect.x1 + 2, rect.y0)
            if location == "below":
                point = (rect.x0, rect.y0 + rect.height)
            elif location == "before":
                text_length = fitz.getTextlength(value, fontsize=9)
                point = (rect.x1 - rect.width - text_length, rect.y0 - 1)

            rect = fitz.Rect(point[0], point[1], point[0] + max_width,
                             point[1] + 200)
            PdfProcess.insert_textbox(value, rect, fontsize=9)
Exemple #4
0
def append_TOC(existingDoc, newDoc, entryname, filename, startPoint, path):

    # Appends the new page to the existing document that contains the table of contents
    existingDoc.insert_pdf(newDoc)
    TOC_page = existingDoc.load_page(page_id=0)

    # Generates the text entry for the new page
    TOC_page.insertText(startPoint, entryname, fontname="helv", fontsize=16, rotate=0)
    x_distance = (fitz.getTextlength(entryname, fontname="helv", fontsize=16)) + 105
    targetPageNumber = existingDoc.page_count
    entrynumber = "   %i" % targetPageNumber
    while(x_distance < 475):
        dotLocation = fitz.Point(x_distance, startPoint.y)
        TOC_page.insertText(dotLocation, ".", fontname="helv", fontsize=16, rotate=0)
        x_distance = x_distance + 5
    TOC_page.insertText(dotLocation, entrynumber, fontname="helv", fontsize=16, rotate=0)

    # Creates the hyperlink for the newly appended page
    # When the entry is clicked on in the Table of Contents, user is sent to that particular page
    linkRect = Rect(100, startPoint.y-20, x_distance + 25, startPoint.y + 15)
    newLink = TOC_page.insert_link({'kind': 1, 'from': linkRect, 'type': 'goto', 'page': targetPageNumber-1, 'to': fitz.Point(0, 0), 'zoom': 0.0})

    # Inserts the page number on the bottom of the newly appended page
    insertedPage = existingDoc.load_page(page_id=-1)
    pageNumberPoint = fitz.Point(294, 830)
    insertPageNumber = "%i" % targetPageNumber
    insertedPage.insertText(pageNumberPoint, insertPageNumber, fontname="Times-Roman", fontsize=14, rotate=0)

    # Calculates the new start point for the next entry and saves the pdf
    newStartPoint = fitz.Point(100, startPoint.y + 35)
    existingDoc.save(path + filename)

    return newStartPoint
Exemple #5
0
    def printSegment(self, line):
        sty = style.currentStyle()
        fontName = sty.font.name
        fontSize = sty.fontsize
        lineWidth = fitz.getTextlength(line,
                                       fontname=fontName,
                                       fontsize=fontSize)

        # self.currentPage.insertText(
        #     self.insertPoint,
        #     str(self.insertPoint.x),
        #     fontname="cobo",
        #     fontsize=fontSize * 2,
        # )
        log.debug(f"printSegment: {line}")
        self.currentPage.insertText(self.insertPoint,
                                    line,
                                    fontname=fontName,
                                    fontsize=fontSize)
        if self.linkDestination:
            self.linkRects.append(
                fitz.Rect(
                    self.insertPoint.x,
                    self.insertPoint.y - lineheight,
                    self.insertPoint.x + lineWidth,
                    self.insertPoint.y,
                ))
        self.insertPoint.x += lineWidth
Exemple #6
0
 def printRight(self, line):
     sty = style.currentStyle()
     lineWidth = fitz.getTextlength(line,
                                    fontname=sty.font.name,
                                    fontsize=sty.fontsize)
     self.insertPoint.x = width - margin - lineWidth
     self.currentPage.insertText(self.insertPoint,
                                 line,
                                 fontname=sty.font.name,
                                 fontsize=sty.fontsize)
Exemple #7
0
 def softbreak(self, node=None, entering=None):
     sty = style.currentStyle()
     fontName = sty.font.name
     fontSize = sty.fontsize
     lineWidth = fitz.getTextlength(" ",
                                    fontname=fontName,
                                    fontsize=fontSize)
     budget = width - margin - self.insertPoint.x
     if lineWidth < budget:
         self.printSegment(" ")
     else:
         pass  # We're about to to a hard break anyway.
Exemple #8
0
    def apply(original, tokens: List[PDFToken], outfile, highlight=False):
        pdf_original = fitz.open(str(original))
        pdf_corrected = fitz.open()

        PDFTokenizer.log.info('Copying images from original to corrected PDF')
        for page in pdf_original:
            PDFTokenizer.log.debug(f'(page {page.number})')
            newpage = pdf_corrected.newPage(-1, page.rect.width,
                                            page.rect.height)
            for image_info in page.getImageList():
                xref = image_info[0]
                stream = pdf_original.extractImage(xref)['image']
                newpage.insertImage(page.rect, stream=stream)

        blue = fitz.utils.getColor('blue')
        red = fitz.utils.getColor('red')

        PDFTokenizer.log.info('Inserting tokens in corrected PDF')
        for token in sorted(tokens, key=lambda x: x.ordering):
            if token.is_discarded:
                continue

            page = pdf_corrected[token.ordering[0]]
            word = token.gold or token.original

            # Adjust rectangle to fit word:
            fontfactor = 0.70
            size = token.rect.height * fontfactor
            textwidth = fitz.getTextlength(word, fontsize=size)
            rect = fitz.Rect(
                token.rect.x0, token.rect.y0,
                max(token.rect.x1, token.rect.x0 + textwidth + 1.0),
                token.rect.y1 + token.rect.height)

            res = page.insertTextbox(rect,
                                     f'{word} ',
                                     fontsize=size,
                                     render_mode=3)
            if res < 0:
                PDFTokenizer.log.warning(
                    f'Token was not inserted properly: {word}\n'
                    f' -- token.rect: {token.rect}\n'
                    f' -- rect: {rect}\n'
                    f' -- font size: {size}\n'
                    f' -- calc.width: {textwidth} rect.width: {rect.width}\n'
                    f' -- rect.height: {rect.height} result: {res}\n')
                if highlight:
                    page.drawRect(rect, color=red)
            elif highlight:
                page.drawRect(rect, color=blue)

        PDFTokenizer.log.info(f'Saving corrected PDF to {outfile}')
        pdf_corrected.save(str(outfile))  #, garbage=4, deflate=True)
Exemple #9
0
def generateTOC(existingDoc, filename, path):
    generatedPage = existingDoc.newPage(pno=0)

    # Generates the Table of Contents Title
    tableOfContentsText = "-- Table of Contents --"
    TOC_textLength = fitz.getTextlength(tableOfContentsText)
    TOC_startPoint_X = ((595 / 2) - TOC_textLength)
    TOC_startPoint_Y = 85
    TOC_startPoint = fitz.Point(TOC_startPoint_X, TOC_startPoint_Y)
    generatedPage.insertText(TOC_startPoint, tableOfContentsText, fontname="Times-Roman", color=(0, 0.35, 0.8),
                             fontsize=24, rotate=0)

    # Inserts the page number at the bottom of the page.
    # Table of Contents will be page 1
    pageNumberPoint = fitz.Point(294, 815)
    generatedPage.insertText(pageNumberPoint, "1", fontname="Times-Roman", fontsize=14, rotate=0)
    existingDoc.save(path + filename)

    return existingDoc
Exemple #10
0
    def apply(original,
              tokens: List[HOCRToken],
              outfile: Path,
              highlight=False):
        pdf = fitz.open()
        pix = fitz.Pixmap(str(original))
        page = pdf.newPage(-1, width=pix.width, height=pix.height)
        page.insertImage(page.rect, pixmap=pix)

        for token in progressbar.progressbar(tokens):
            if token.is_discarded:
                continue

            page = pdf[token.page]
            word = token.gold or token.original
            # Adjust rectangle to fit word:
            fontfactor = 0.70
            size = token.rect.height * fontfactor
            textwidth = fitz.getTextlength(word, fontsize=size)
            rect = fitz.Rect(
                token.rect.x0, token.rect.y0,
                max(token.rect.x1, token.rect.x0 + textwidth + 1.0),
                token.rect.y1 + token.rect.height * 2)
            res = page.insertTextbox(rect,
                                     f'{word} ',
                                     fontsize=size,
                                     color=(1, 0, 0))
            if res < 0:
                HOCRTokenizer.log.warning(
                    f'Token was not inserted properly: {word}\n'
                    f' -- token.rect: {token.rect}\n'
                    f' -- rect: {rect}\n'
                    f' -- font size: {size}\n'
                    f' -- calc.width: {textwidth} rect.width: {rect.width}\n'
                    f' -- rect.height: {rect.height} result: {res}\n')

        pdf.save(str(outfile.with_suffix('.pdf')))
Exemple #11
0
def insert_extra_info(extra, exam, test_mode=False, test_folder=None):
    """Creates the extra info (ususally student name and id) boxes and places them in the first page.

    Arguments:
        extra {dict} -- (Str:Str) dictioary with student id and name.
        exam {fitz.Document} -- PDF document type returned as the exam, similar to a dictionary with the ge numbers as the keys.

    Keyword Arguments:
        test_mode {bool} -- Boolean elements used for testing, testing case with show the documents. (default: {False})
        test_folder {Str} -- String for where to place the generated test files. (default: {None})

    Raises:
        ValueError: Raise error if the student name and number is not encodable.

    Returns:
        fitz.Document -- The same exam object as the input, except we add the extra infor into the first page.
    """

    # Get page width and height
    page_width = exam[0].bound().width
    page_height = exam[0].bound().height

    student_id = extra["id"]
    student_name = extra["name"]
    # a file for the student-details
    YSHIFT = 0.4  # where on page is centre of box 0=top, 1=bottom

    # Creating the student id \n name text file
    txt = "{}\n{}".format(student_id, student_name)

    sign_here = "Please sign here"

    # Getting the dimentions of the box
    try:
        student_id_width = (max(
            fitz.get_text_length(student_id, fontsize=36,
                                 fontname="Helvetica"),
            fitz.get_text_length(
                student_name, fontsize=36, fontname="Helvetica"),
            fitz.get_text_length(sign_here, fontsize=48, fontname="Helvetica"),
        ) * 1.1 * 0.5)
    except AttributeError:
        # workaround https://github.com/pymupdf/PyMuPDF/issues/1085
        # TODO: drop this code when we dependent on PyMuPDF>=1.18.14
        student_id_width = (max(
            fitz.getTextlength(student_id, fontsize=36, fontname="Helvetica"),
            fitz.getTextlength(student_name, fontsize=36,
                               fontname="Helvetica"),
            fitz.getTextlength(sign_here, fontsize=48, fontname="Helvetica"),
        ) * 1.1 * 0.5)
    student_id_height = 36 * 1.3

    # We have 2 rectangles for the student name and student id
    student_id_rect_1 = fitz.Rect(
        page_width // 2 - student_id_width,
        page_height * YSHIFT - student_id_height,
        page_width // 2 + student_id_width,
        page_height * YSHIFT + student_id_height,
    )
    student_id_rect_2 = fitz.Rect(
        student_id_rect_1.x0,
        student_id_rect_1.y1,
        student_id_rect_1.x1,
        student_id_rect_1.y1 + 48 * 1.3,
    )
    exam[0].draw_rect(student_id_rect_1,
                      color=[0, 0, 0],
                      fill=[1, 1, 1],
                      width=2)
    exam[0].draw_rect(student_id_rect_2,
                      color=[0, 0, 0],
                      fill=[1, 1, 1],
                      width=2)

    # TODO: This could be put into one function
    # Also VALA doesn't understand the TODO s
    if is_possible_to_encode_as(txt, "Latin-1"):
        fontname = "Helvetica"
    elif is_possible_to_encode_as(txt, "gb2312"):
        # TODO: Double-check encoding name.  Add other CJK (how does Big5
        # vs GB work?).  Check printers can handle these or do we need to
        # embed a font?  (Adobe Acrobat users need to download something)
        fontname = "china-ss"
    else:
        # TODO: or warn use Helvetica, get "?" chars
        raise ValueError(
            "Don't know how to write name {} into PDF".format(txt))

    # We insert the student id text boxes
    insertion_confirmed = exam[0].insert_textbox(
        student_id_rect_1,
        txt,
        fontsize=36,
        color=[0, 0, 0],
        fontname=fontname,
        fontfile=None,
        align=1,
    )
    # TODO: VALA suggests we do the insertion_confirmed check here as well
    assert (insertion_confirmed >
            0), "Text didn't fit: shortname too long?  or font issue/bug?"

    # We insert the student name text boxes
    insertion_confirmed = exam[0].insert_textbox(
        student_id_rect_2,
        sign_here,
        fontsize=48,
        color=[0.9, 0.9, 0.9],
        fontname="Helvetica",
        fontfile=None,
        align=1,
    )
    # TODO: VALA suggests we do the insertion_confirmed check here as well
    assert (insertion_confirmed >
            0), "Text didn't fit: shortname too long?  or font issue/bug?"

    return exam
Exemple #12
0
print("PDF conversion results for file '%s':" % (ifn,))
print(out_ctr, "lines read,", total_ctr,
      "lines written,", nlines, "lines per page.")
print(ofn, "contains", len(doc), "pages.")

# Now add some header and footer to each created page

hdr_fontsz = 16                             # header fontsize
ftr_fontsz = 8                              # footer fontsize
blue = (0, 0, 1)                            # header / footer color
pspace = 500                                # available line width

for page in doc:
    footer = "%i (%i)" % (page.number + 1, len(doc))  # footer text
    plen_ftr = fitz.getTextlength(footer, fontname="Helvetica", fontsize=ftr_fontsz)
    
    page.insertText(fitz.Point(50, 50), ifn,          # header = input filename
                    color = blue,
                    fontsize = hdr_fontsz)
                    
    page.drawLine(fitz.Point(50,60), fitz.Point(50 + pspace,60), # line below hdr
                  color= blue, width = 0.5)
                  
    page.drawLine(fitz.Point(50, height-33),          # line above footer
                  fitz.Point(50 + pspace, height - 33),
                  color = blue, width = 0.5)
                  
    page.insertText(fitz.Point(50 + pspace - plen_ftr,     # insert footer
                               height - 33 + ftr_fontsz * 1.2),
                    footer, fontsize = ftr_fontsz, color = blue)
Exemple #13
0
def generateDoc(title, data, dataStart, dataEnd, bio1, bio2, bio3, bio4,
                vid_filename, textfile, filename, pageNum, path):

    # Creates a new blank PDF
    doc = fitz.open()
    generatedPage = doc.newPage()

    font = "Times-Roman"
    fontSize = 24
    titleLength = fitz.getTextlength(title, font, fontSize)

    # Prints the dimensions of the newly generated page.
    # These values may be useful for determining the locations of the plots
    pageRect = generatedPage.bound()
    page_x0 = pageRect.x0
    page_x1 = pageRect.x1

    # Ensures that the title will always be centered, despite text length
    pageMidpoint_X = (page_x1 - page_x0) / 2
    titleStartPoint_X = pageMidpoint_X - (titleLength / 2)
    titleStartPoint_Y = fontSize + 11
    titleStartPoint = fitz.Point(titleStartPoint_X, titleStartPoint_Y)
    generatedPage.insertText(titleStartPoint,
                             title,
                             fontname=font,
                             fontsize=fontSize,
                             rotate=0)

    # Autogenerates the biometric plots
    bio1_filename = path + "page%i_" % pageNum + bio1 + ".png"
    bioPlotter.plotBiometric(data, dataStart, dataEnd, bio1, bio1_filename)

    bio2_filename = path + "page%i_" % pageNum + bio2 + ".png"
    bioPlotter.plotBiometric(data, dataStart, dataEnd, bio2, bio2_filename)

    bio3_filename = path + "page%i_" % pageNum + bio3 + ".png"
    bioPlotter.plotBiometric(data, dataStart, dataEnd, bio3, bio3_filename)

    bio4_filename = path + "page%i_" % pageNum + bio4 + ".png"
    bioPlotter.plotBiometric(data, dataStart, dataEnd, bio4, bio4_filename)

    # Autogenerates the EEG heatmaps
    eeg.eeg_viz(data, dataStart, dataEnd, path + "page%i_eeg_" % pageNum)
    fontSize = 14

    # Extracts a frame from the video in the specified time range
    # extracted_frame_filename = path + "page%i_extracted_frame.jpg"% pageNum
    # vidFrame.extractFrame(vid_filename, dataStart, dataEnd, extracted_frame_filename)

    # Inserts heatmap visualizations
    heatmapAlpha_Location = fitz.Rect(10, 50, 198, 238)
    generatedPage.insertImage(heatmapAlpha_Location,
                              filename=path + "page%i_eeg_alpha.png" % pageNum,
                              keep_proportion=False)
    alphaText = "Alpha Band"
    textLength = fitz.getTextlength(alphaText, font, fontSize)
    startPoint = fitz.Point(((10 + 94) - textLength / 2), 240)
    generatedPage.insertText(startPoint,
                             alphaText,
                             fontname=font,
                             fontsize=fontSize,
                             rotate=0)

    heatmapBeta_Location = fitz.Rect(203, 50, 391, 238)
    generatedPage.insertImage(heatmapBeta_Location,
                              filename=path + "page%i_eeg_beta.png" % pageNum,
                              keep_proportion=False)
    betaText = "Beta Band"
    textLength = fitz.getTextlength(alphaText, font, fontSize)
    startPoint = fitz.Point(((203 + 94) - textLength / 2), 240)
    generatedPage.insertText(startPoint,
                             betaText,
                             fontname=font,
                             fontsize=fontSize,
                             rotate=0)

    heatmapTheta_Location = fitz.Rect(396, 50, 585, 238)
    generatedPage.insertImage(heatmapTheta_Location,
                              filename=path + "page%i_eeg_theta.png" % pageNum,
                              keep_proportion=False)
    thetaText = "Theta Band"
    textLength = fitz.getTextlength(alphaText, font, fontSize)
    startPoint = fitz.Point(((396 + 94) - textLength / 2), 240)
    generatedPage.insertText(startPoint,
                             thetaText,
                             fontname=font,
                             fontsize=fontSize,
                             rotate=0)

    # Inserts biometric plots
    bio3_Location = fitz.Rect(10, 443, 300, 653)
    generatedPage.insertImage(bio3_Location,
                              filename=bio3_filename,
                              keep_proportion=False)

    bio4_Location = fitz.Rect(305, 443, 595, 653)
    generatedPage.insertImage(bio4_Location,
                              filename=bio4_filename,
                              keep_proportion=False)

    bio1_Location = fitz.Rect(10, 253, 300, 463)
    generatedPage.insertImage(bio1_Location,
                              filename=bio1_filename,
                              keep_proportion=False)

    bio2_Location = fitz.Rect(305, 253, 595, 463)
    generatedPage.insertImage(bio2_Location,
                              filename=bio2_filename,
                              keep_proportion=False)

    # Generates textbox
    textboxBack_Location = fitz.Rect(250, 650, 585, 815)
    textHandler.createTextbox(textfile, textboxBack_Location, generatedPage,
                              path)

    # Inserts video frame
    vidFrame_Location = fitz.Rect(25, 675, 245, 799)
    # generatedPage.insertImage(vidFrame_Location, filename=extracted_frame_filename, keep_proportion=False)
    generatedPage.insertImage(vidFrame_Location,
                              filename=vid_filename,
                              keep_proportion=False)

    # Saves the PDF -- not needed anymore
    #doc.save(filename)

    return doc
Exemple #14
0
    for block in image_blocks:
        outpage.insertImage(block["bbox"], stream=block["image"])
        print("Inserted an image on page", page.number)

    for block in text_blocks:  # read text blocks
        shape.drawRect(block["bbox"])  # draw all text on white background,
        # because images may cover same area

        for line in block["lines"]:  # for each line in the block ...
            for span in line["spans"]:  # for each span in the line ...
                fontname = get_font(span["font"])  # get replacing fontname
                fontsize = span["size"]
                text = span["text"]
                bbox = fitz.Rect(span["bbox"])  # text rectangle on input
                text_size = fitz.getTextlength(  # measure text length on output
                    text, fontname=fontname, fontsize=fontsize
                )

                # adjust fontsize if text is too long with new the font
                if text_size > bbox.width:
                    fontsize *= bbox.width / text_size
                try:
                    shape.insertText(  # copy text to output page
                        bbox.bl,  # insertion point on output page
                        text,  # the text to insert
                        fontsize=fontsize,  # fontsize
                        # decide on output font here: the place for sophistication!
                        fontname=fontname,
                        color=pdf_color(span["color"]),
                    )
                except ValueError: