Esempio n. 1
0
def test_font1():
    text = "PyMuPDF"
    font = fitz.Font("helv")
    assert font.name == "Helvetica"
    tl = font.text_length(text, fontsize=20)
    cl = font.char_lengths(text, fontsize=20)
    assert len(text) == len(cl)
    assert abs(sum(cl) - tl) < fitz.EPSILON
    for i in range(len(cl)):
        assert cl[i] == font.glyph_advance(ord(text[i])) * 20
    font2 = fitz.Font(fontbuffer=font.buffer)
    assert font2.valid_codepoints() == font.valid_codepoints()
Esempio n. 2
0
def generate_content_page(header_to_pagenumber, headers_and_subheaders, page_height, page_width):
    """
    Generates a document that serves as a Table of Contents, with header and subheader information.
    """
    doc = fitz.open()
    page = doc.newPage(height=page_height, width=page_width)
    horizontal_start_point = 40
    vertical_start_point = 60
    spacing = 15
    num_lines = 1
    tab = 30

    # Add Table of Contents heading (centered)
    rect_topleft = fitz.Point(0, vertical_start_point + num_lines * spacing)
    num_lines += 4
    rect_bottomright = fitz.Point(page_width, vertical_start_point + num_lines * spacing)
    rect = fitz.Rect(rect_topleft, rect_bottomright)
    page.insertTextbox(rect, "Table of Contents", fontsize=32, align=fitz.TEXT_ALIGN_CENTER)
    num_lines += 2

    # Create a TextWriter (per page)
    wr = fitz.TextWriter(page.rect)
    for h1_item, h2_items in headers_and_subheaders.items():
        # Insert the h1_item
        p = fitz.Point(
            horizontal_start_point, vertical_start_point + num_lines * spacing
        )
        wr.append(p, h1_item, fontsize=24, font=fitz.Font("Arial"))
        num_lines += 2
        for h2_item in h2_items:
            # Insert each h2_item
            p_tab = fitz.Point(
                tab + horizontal_start_point, vertical_start_point + num_lines * spacing
            )
            wr.append(p_tab, h2_item, fontsize=16)

            # Insert ... between h2_item and page number
            p_tab_number = fitz.Point(
                tab + horizontal_start_point + 500,
                vertical_start_point + num_lines * spacing,
            )
            add_dot_connector(wr, wr.lastPoint, p_tab_number)

            # Insert page number for h2_item
            wr.append(p_tab_number, str(header_to_pagenumber[h2_item]), fontsize=16)
            num_lines += 1

            # Move to new page if nearing end of page
            if num_lines >= 45:
                wr.writeText(page)
                page = doc.newPage(height=page_height, width=page_width)
                wr = fitz.TextWriter(page.rect)
                num_lines = 0
        num_lines += 2

    wr.writeText(page)
    return doc
Esempio n. 3
0
def build_repl_table(doc, fname):
    """Populate font replacement information.

    Read the JSON font relacement file and store its information in
    dictionaries 'font_subsets', 'font_buffers' and 'new_fontnames'.
    """
    fd = open(fname)
    fontdicts = json.load(fd)
    fd.close()

    for fontdict in fontdicts:
        oldfont = fontdict["oldfont"]
        newfont = fontdict["newfont"].strip()

        if newfont == "keep":  # ignore if not replaced
            continue
        if "." in newfont or "/" in newfont or "\\" in newfont:
            try:
                font = fitz.Font(fontfile=newfont)
            except:
                sys.exit("Could not create font '%s'." % newfont)
            fontbuffer = font.buffer
            new_fontname = font.name
            font_subsets[new_fontname] = set()
            font_buffers[new_fontname] = fontbuffer
            for item in oldfont:
                new_fontnames[item] = new_fontname
            del font
            continue

        try:
            font = fitz.Font(newfont)
        except:
            sys.exit("Could not create font '%s'." % newfont)
        fontbuffer = font.buffer
        new_fontname = font.name
        font_subsets[new_fontname] = set()
        font_buffers[new_fontname] = fontbuffer
        for item in oldfont:
            new_fontnames[item] = new_fontname
        del font
        continue
Esempio n. 4
0
def build_repl_table(doc, fname):
    """Populate font replacement information.

    Read the font relacement file and store its information in dictionaries
    'font_subsets', 'font_buffers' and 'new_fontnames'.
    """
    fd = open(fname, "rb")
    lines = fd.read().splitlines()
    fd.close()

    for line in lines:
        line = line.decode()
        if line.endswith("\n"):
            line = line[:-1]
        if not line:
            continue
        line = line.strip()
        if line.startswith("#"):
            continue
        oldfont, newfont = line.split(";")[:2]

        if newfont == "keep":  # ignore if not replaced
            continue
        if "." in newfont or "/" in newfont or "\\" in newfont:
            font = fitz.Font(fontfile=newfont)
            fontbuffer = font.buffer
            new_fontname = font.name
            font_subsets[new_fontname] = set()
            font_buffers[new_fontname] = fontbuffer
            new_fontnames[oldfont] = new_fontname
            del font
            continue

        font = fitz.Font(newfont)
        fontbuffer = font.buffer
        new_fontname = font.name
        font_subsets[new_fontname] = set()
        font_buffers[new_fontname] = fontbuffer
        new_fontnames[oldfont] = new_fontname
        del font
        continue
Esempio n. 5
0
def build_repl_table(doc, fname):
    """Populate font replacement information.

    Read the font relacement file and store its information in dictionaries
    'font_subsets', 'font_buffers' and 'new_fontnames'.
    """
    repl_file = open(fname)
    while True:
        line = repl_file.readline()
        if not line or line == "\n":
            break
        if line.endswith("\n"):
            line = line[:-1]
        line = line.strip()
        if line.startswith("#"):
            continue
        xref, oldfont, newfont = line.split(";")[:3]
        if newfont == "keep":
            continue
        if "." in newfont or "/" in newfont or "\\" in newfont:
            font = fitz.Font(fontfile=newfont)
            fontbuffer = font.buffer
            new_fontname = font.name
            font_subsets[new_fontname] = set()
            font_buffers[new_fontname] = fontbuffer
            new_fontnames[oldfont] = new_fontname
            del font
            continue

        font = fitz.Font(newfont)
        fontbuffer = font.buffer
        new_fontname = font.name
        font_subsets[new_fontname] = set()
        font_buffers[new_fontname] = fontbuffer
        new_fontnames[oldfont] = new_fontname
        del font
        continue
Esempio n. 6
0
def build_repl_table(doc, fname):
    """Populate font replacement information.

    Read the font relacement file and store its information in dictionaries
    'font_subsets', 'font_buffers' and 'new_fontnames'.
    """
    fd = open(fname)
    fontdicts = json.load(fd)

    fd.close()

    for fontdict in fontdicts:
        oldfont = fontdict["oldfont"]
        newfont = fontdict["newfont"]
        oldfont = oldfont.strip()
        newfont = newfont.strip()
        if newfont == "keep":  # ignore if not replaced
            continue
        if "." in newfont or "/" in newfont or "\\" in newfont:
            font = fitz.Font(fontfile=newfont)
            fontbuffer = font.buffer
            new_fontname = font.name
            font_subsets[new_fontname] = set()
            font_buffers[new_fontname] = fontbuffer
            new_fontnames[oldfont] = new_fontname
            del font
            continue

        font = fitz.Font(newfont)
        fontbuffer = font.buffer
        new_fontname = font.name
        font_subsets[new_fontname] = set()
        font_buffers[new_fontname] = fontbuffer
        new_fontnames[oldfont] = new_fontname
        del font
        continue
Esempio n. 7
0
    def _change_font_and_update_bbox(self, font_name: str):
        '''Set new font, and update font size, span/char bbox accordingly.

        It's generally used for span with unnamed fonts. 
        See this `issue <https://github.com/pymupdf/PyMuPDF/issues/642>`_.        

        In corner case, where the PDF file containing unnamed and not embedded fonts, the span bbox
        extracted from ``PyMuPDF`` is not correct. ``PyMuPDF`` provides feature to replace these 
        unnamed fonts with specified fonts, then extract correct bbox from the updated PDF. Since we 
        care less about the original PDF itself but its layout, the idea here is to set a default font 
        for text spans with unnamed fonts, and estimate the updated bbox with method from 
        ``fitz.TextWriter``.

        Args:
            font_name (str): Font name.
        '''
        # set new font property
        self.font = font_name

        # compute text length under new font with that size
        font = fitz.Font(font_name)
        new_length = font.text_length(self.text, fontsize=self.size)
        if new_length > self.bbox.width:
            self.size *= self.bbox.width / new_length

        # estimate occupied rect when added with TextWriter
        x0, y0, x1, y1 = self.bbox
        tw = fitz.TextWriter((0, 0, x1, y1))
        rect, _ = tw.append(
            self.chars[0].
            origin,  # the bottom left point of the first character
            self.text,
            font=font,
            fontsize=self.size)

        # update span bbox
        # - x-direction: use original horizontal range
        # - y-direction: centerline defined by estimated vertical range, and height by font size
        buff = (rect.height - self.size) / 2.0
        y0 = rect.y0 + buff
        y1 = rect.y1 - buff
        self.update_bbox((x0, y0, x1, y1))

        # update contained char bbox
        for char in self.chars:
            x0, _, x1, _ = char.bbox
            char.update_bbox((x0, y0, x1, y1))
Esempio n. 8
0
def test_textbox4():
    """Use TextWriter for text insertion."""
    doc = fitz.open()
    ocg = doc.add_ocg("ocg1")
    page = doc.new_page()
    rect = fitz.Rect(50, 50, 400, 600)
    blue = (0, 0, 1)
    tw = fitz.TextWriter(page.rect, color=blue)
    tw.fill_textbox(
        rect,
        text,
        align=fitz.TEXT_ALIGN_LEFT,
        fontsize=12,
        font=fitz.Font("cour"),
        right_to_left=True,
    )
    tw.write_text(page, oc=ocg, morph=(rect.tl, fitz.Matrix(1, 1)))
    # check text containment
    assert page.get_text() == page.get_text(clip=rect)
Esempio n. 9
0
def test_textbox3():
    """Use TextWriter for text insertion."""
    doc = fitz.open()
    page = doc.new_page()
    font = fitz.Font("cjk")
    rect = fitz.Rect(50, 50, 400, 400)
    blue = (0, 0, 1)
    tw = fitz.TextWriter(page.rect, color=blue)
    tw.fill_textbox(
        rect,
        text,
        align=fitz.TEXT_ALIGN_LEFT,
        font=font,
        fontsize=12,
        right_to_left=True,
    )
    tw.write_text(page, morph=(rect.tl, fitz.Matrix(1, 1)))
    # check text containment
    assert page.get_text() == page.get_text(clip=rect)
    doc.scrub()
    doc.subset_fonts()
Esempio n. 10
0
def test_font2():
    """Old and new length computation must be the same."""
    font = fitz.Font("helv")
    text = "PyMuPDF"
    assert font.text_length(text) == fitz.get_text_length(text)

infilename = sys.argv[1]
font_list = set()
doc = fitz.open(infilename)
for i in range(len(doc)):
    for f in doc.getPageFontList(i, full=True):
        if f[-1] == 0:
            continue  # no support for text in XObjects
        msg = ""
        xref = f[0]
        fontname = f[3]
        if f[1] == "n/a":
            msg = "not embedded"
        else:
            extr = doc.extractFont(xref)
            font = fitz.Font(fontbuffer=extr[-1])
            msg = make_msg(font)
        idx = fontname.find("+") + 1
        fontname = fontname[idx:]
        font_list.add((xref, fontname, msg))

font_list = list(font_list)
font_list.sort(key=lambda x: x[1])
outname = infilename + "-fontnames.csv"
out = open(outname, "w")
for xref, fontname, msg in font_list:
    msg1 = "keep"
    out.write("%i;%s;%s; %s\n" % (xref, fontname, msg1, msg))
out.close()
Esempio n. 12
0
    textwriters = {}  # contains one text writer per detected text color

    for block in blocks:
        for line in block["lines"]:
            wmode = line["wmode"]  # writing mode (horizontal, vertical)
            wdir = list(line["dir"])  # writing direction
            markup_dir = 0
            bidi_level = 0  # not used
            if wdir == [0, 1]:
                markup_dir = 4
            for span in line["spans"]:
                new_fontname = get_new_fontname(span["font"])
                if new_fontname is None:  # do not replace this font
                    continue

                font = fitz.Font(fontbuffer=font_buffers[new_fontname])
                text = span["text"].replace(chr(0xFFFD), chr(0xB6))
                # guard against non-utf8 characters
                textb = text.encode("utf8", errors="backslashreplace")
                text = textb.decode("utf8", errors="backslashreplace")
                span["text"] = text
                if wdir != [1, 0]:  # special treatment for tilted text
                    tilted_span(page, wdir, span, font)
                    continue
                color = span["color"]  # make or reuse textwriter for the color
                if color in textwriters.keys():  # already have a textwriter?
                    tw = textwriters[color]  # re-use it
                else:  # make new
                    tw = fitz.TextWriter(page.rect)  # make text writer
                    textwriters[color] = tw  # store it for later use
                try:
Esempio n. 13
0
def fitzfont(name):
    try:
        import fitz
    except ImportError:
        raise ImportError("Install PyMuPDF to use this method.")
    return fitz.Font(fontbuffer=myfont(name))
Esempio n. 14
0
print(fitz.__doc__)

highlight = "this text is highlighted"
underline = "this text is underlined"
strikeout = "this text is striked out"
squiggled = "this text is zigzag-underlined"
red = (1, 0, 0)
blue = (0, 0, 1)
gold = (1, 1, 0)
green = (0, 1, 0)

displ = fitz.Rect(0, 50, 0, 50)
r = fitz.Rect(72, 72, 220, 100)
t1 = u"têxt üsès Lätiñ charß,\nEUR: €, mu: µ, super scripts: ²³!"

font = fitz.Font("helv")  # used by the TextWriter class

doc = fitz.open()
page = doc.newPage()

page.setRotation(0)

# following makes sure that TextWriter references the **unrotated** page rect
# as everything else does ...
page_rect = page.rect * page.derotationMatrix


def print_descr(annot):
    """Print a short description to the right of the annot rect."""
    rect = annot.rect
    page = annot.parent
Esempio n. 15
0
import fitz, os

thisdir = lambda f: os.path.join(os.path.dirname(__file__), f)
thisfile = os.path.abspath(__file__)
outfile = thisfile.replace(".py", ".pdf")

font1 = fitz.Font("helv")
font2 = fitz.Font("tiro")
doc = fitz.open()
page = doc.newPage()
point = fitz.Point(50, 72)
matrix = fitz.Matrix(-20)

wrt1 = fitz.TextWriter(page.rect, color=(0, 0, 1))
wrt2 = fitz.TextWriter(page.rect, color=(1, 0, 0))

_, last = wrt1.append(point, "This text changes color,", font1, 11)
_, last = wrt2.append(last, " font and fontsize", font2, 18)
_, last = wrt1.append(last, " several", font1, 11)
_, last = wrt2.append(last, " times!", font2, 24)

# output both text writers on current page in arbitrary sequence
wrt1.writeText(page, morph=(point, matrix))  # using the same morph parameter
wrt2.writeText(page, morph=(point, matrix))  # also preserves the joint text.

# make a new page
page = doc.newPage()
rect = wrt1.textRect | wrt2.textRect  # join rect of blue and red text
# make new rectangle from it, rotated by 90 degrees
nrect = fitz.Rect(
    rect.tl,  # same top-left, but width and height exchanged
import fitz

outfile = os.path.abspath(__file__).replace(".py", ".pdf")

doc = fitz.open()
page = doc.newPage()

page_rect = page.rect

blue = (0, 0, 1)  # color 1
red = (1, 0, 0)

# This font will be used for Latin, Greek, Russian characters only.
# CJK characters always are looked up in 'Doid Sans Fallback Regular'.
font = fitz.Font(ordering=0)  # results in fallback font for everything
fsize = 11  # fontsize

"""
-------------------------------------------------------------------------------
Our text lines. We split them into words such that the first word of each
line starts with a line break. Multiple spaces between words will be kept.

Disclaimer:
Non-English text pieces are arbitrary copies out of Wikipedia pages. I have no
idea what they mean nor am I responsible for that content.
-------------------------------------------------------------------------------
"""
# Our text: a language mix. Font above (if different from fallback) will be
# used for non-CJK characters. For CJK, the fallback is always used.
text = """This is a text of mixed languages to demonstrate MuPDF's text output capabilities.
Esempio n. 17
0
    raise ValueError("Need PyMuPDF v.1.17.4 at least.")
if len(sys.argv) != 2:
    startyear = fitz.getPDFnow()[2:6]  # take current year
else:
    startyear = sys.argv[1]

if len(startyear) != 4 or not startyear.isnumeric():
    raise ValueError("Start year must be 4 digits")

suffix = "-%s.pdf" % startyear
outfile = __file__.replace(".py", suffix)
startyear = int(startyear)

doc = fitz.open()  # new empty PDF
# font = fitz.Font("cour")  # use the built-in font Courier
font = fitz.Font("spacemo")  # use Space Mono - a nicer mono-spaced font
cal = calendar.LocaleTextCalendar(locale="de")  # use your locale
# cal = calendar.TextCalendar()  # or stick with English


page_rect = fitz.PaperRect("a4-l")  # A4 landscape paper
w = page_rect.width
h = page_rect.height
print_rect = page_rect + (36, 72, -36, -36)  # fill this rectangle

# one line in calendar output is at most 98 characters, so we calculate
# the maximum possible fontsize cum grano salis as:
char_width = font.glyph_advance(32)  # character width of the font
fontsize = print_rect.width / (char_width * 100)