def make_oval(f): """Make a PDF page and draw an oval inside a Quad. The upper left and the lower right quad points and the fill color are subject to a passed-in parameter. Effectively, they exchange their position, thus causing changes to the drawn shape. The resulting page picture is passed back as a PNG image and the PDF is discarded again. The execution speed of this function mainly determines the number of "frames" shown per second. """ doc = fitz.open() # dummy PDF page = doc.newPage(width=400, height=400) # page dimensions as you like r = page.rect + (4, 4, -4, -4) q = r.quad # full page rect as a quad q1 = fitz.Quad( q.lr + (q.ul - q.lr) * f, q.ur, q.ll, q.ul + (q.lr - q.ul) * f # upper left ) # lower right # make an entertaining fill color - simulating rotation around # a diagonal c1 = min(1, f) c3 = min(1, max(1 - f, 0)) fill = (c1, 0, c3) page.drawOval( q1, color=(0, 0, 1), fill=fill, width=0.3 # blue border # variable fill color ) # border width pix = page.getPixmap(alpha=False) # make pixmap, no alpha doc.close() # discard PDF again return pix.getImageData("pgm") # return a data stream
def make_oval(f): """Make a PDF page and draw an oval inside a Quad. The lower two quad points and the fill color are subject to a passed-in parameter. Effectively, they exchange their position, thus causing changes to the drawn shape. The resulting page picture is passed back as an image and the PDF is dicarded again. """ doc = fitz.open() # dummy PDF page = doc.newPage(width=400, height=400) # page dimensions as you like r = page.rect + (4, 4, -4, -4) q = r.quad # full page rect as a quad q1 = fitz.Quad(q.ul, q.ur, q.ll + (q.lr - q.ll) * f, q.ll + (q.lr - q.ll) * (1 - f)) # make an entertaining fill color c1 = min(1, f) c3 = min(1, max(1 - f, 0)) c2 = c1 * c3 fill = (c1, c2, c3) page.drawOval( q1, color=(0, 0, 1), fill=fill, width=0.3 # blue border # variable fill color ) # border width pix = page.getPixmap(alpha=False) # make pixmap, no alpha doc.close() # discard PDF again return pix.getImageData("ppm") # return a PGM image of the page
def __post_init__(self): self.rect = fitz.Rect(self.topLeft, self.botRight) if self.vertices: n = len(self.vertices) // 4 self.quads: List[fitz.Quad] = [ fitz.Quad(*self.vertices[i * 4:i * 4 + 4]) for i in range(n) ] else: self.quads = None self.point = self.rect.top_left
def _parse_highlight(annot: fitz.Annot, wordlist: List) -> str: points = annot.vertices quad_count = int(len(points) / 4) sentences = ["" for i in range(quad_count)] for i in range(quad_count): r = fitz.Quad(points[i * 4 : i * 4 + 4]).rect words = [w for w in wordlist if fitz.Rect(w[:4]).intersects(r)] sentences[i] = " ".join(w[4] for w in words) sentence = " ".join(sentences) return sentence
def make_oval(i): """Make a PDF page and draw an oval inside a Quad. The lower two quad points and the fill color are subject to a passed-in parameter. Effectively, they exchange their position, thus causing changes to the drawn shape. The resulting page picture is passed back as an image and the PDF is dicarded again. """ doc = fitz.open() # dummy PDF red = (1, 0, 0) blue = (0, 0, 1) page = doc.newPage(width=400, height=300) # page dimensions as you like r = page.rect + (+4, +4, -4, -4) # leave a border of 4 pix q = r.quad # full page rect as a quad f = i / 100.0 if f >= 0: u = f o = 0 else: u = 0 o = -f q1 = fitz.Quad( q.ul + (q.ur - q.ul) * o, q.ul + (q.ur - q.ul) * (1 - o), q.ll + (q.lr - q.ll) * u, q.ll + (q.lr - q.ll) * (1 - u), ) # make an entertaining fill color c1 = min(1, max(o, u)) c3 = min(1, max(1 - u, 1 - o)) fill = (c1, 0, c3) img = page.newShape() img.drawOval(q1) img.finish( color=blue, fill=fill, width=0.3 # blue border # variable fill color ) # border width img.drawCircle(q1.ll, 4) img.finish(color=red, fill=red) img.drawCircle(q1.lr, 4) img.finish(color=blue, fill=blue) img.drawCircle(q1.ul, 4) img.finish(color=red, fill=red) img.drawCircle(q1.ur, 4) img.finish(color=blue, fill=blue) img.commit() pix = page.getPixmap(alpha=False) # make pixmap, no alpha doc.close() # discard PDF again image = pix.getImageData("ppm") del pix del page del img return image # return a PGM image of the page
def rect(self) -> fitz.Rect: if self.type_id == SQUARE: return self.annot.rect elif self.type_id in [INK, LINE]: page_width = self.page.mediabox.x1 return fitz.Rect(0, self.annot.rect.y0, page_width, self.annot.rect.y1) elif self.type_id in [HIGHLIGHT, UNDERLINE, SQUIGGLY, STRIKEOUT]: points = self.annot.vertices # TODO return fitz.Quad(points).rect else: return fitz.Rect()
def _parse_highlight(self, annot, wordlist): points = annot.vertices # find how many quadrilaterals are in the doc quad_count = int(len(points) / 4) sentences = [] for i in range(quad_count): # where the highlighted part is by intersecting # the rectangles r = fitz.Quad(points[i * 4 : i * 4 + 4]).rect words = [w for w in wordlist if fitz.Rect(w[:4]).intersects(r)] sentences.append(" ".join(w[4] for w in words)) sentence = " ".join(sentences) return sentence
def add_highlights(page, highlights): for high in highlights: # Create the location, note that the Mendeley coordinate system seems to have its Y axis inverted when # compared to the pdf library's axis. So the Y position should be flipped page_lr = page.rect ll = pdf.Point(high.x[0], page_lr.y1 - high.y[0]) ul = pdf.Point(high.x[0], page_lr.y1 - high.y[1]) ur = pdf.Point(high.x[1], page_lr.y1 - high.y[1]) lr = pdf.Point(high.x[1], page_lr.y1 - high.y[0]) points = pdf.Quad(ul, ur, ll, lr) # Create the highlight anot = page.addHighlightAnnot(points) # Update the color color = anot.colors color["fill_color"] = high.color color["stroke_color"] = high.color anot.setColors(color) anot.update()
def _check_contain(r_word, points): """If `r_word` is contained in the rectangular area. The area of the intersection should be large enough compared to the area of the given word. Args: r_word (fitz.Rect): rectangular area of a single word. points (list): list of points in the rectangular area of the given part of a highlight. Returns: bool: whether `r_word` is contained in the rectangular area. """ # `r` is mutable, so everytime a new `r` should be initiated. r = fitz.Quad(points).rect r.intersect(r_word) if r.getArea() >= r_word.getArea() * _threshold_intersection: contain = True else: contain = False return contain
sys.exit(1) print("Opened {} with {} pages".format(file, len(doc))) highlight_text = [] for page in doc: # list to store the co-ordinates of all highlights highlights = [] # loop till we have highlight annotation in the page annot = page.firstAnnot while annot: if annot.type[0] == 8: all_coordinates = annot.vertices if len(all_coordinates) == 4: highlight_coord = fitz.Quad(all_coordinates).rect highlights.append(highlight_coord) else: all_coordinates = [ all_coordinates[x:x + 4] for x in range(0, len(all_coordinates), 4) ] for i in range(0, len(all_coordinates)): coord = fitz.Quad(all_coordinates[i]).rect highlights.append(coord) annot = annot.next all_words = page.getTextWords() # List to store all the highlighted texts for h in highlights: