Example #1
0
    def import_doc(self, file_uri, config, docsearch, current_doc=None):
        parent = gio.File(file_uri)
        doc = None

        idx = 0

        for child in self.__get_all_children(parent):
            if not child.get_basename().lower().endswith(".pdf"):
                continue
            try:
                # make sure we can import it
                poppler.document_new_from_file(child.get_uri(),
                                               password=None)
            except Exception:
                continue
            doc = PdfDoc(config.workdir)
            doc.path += ("_%02d" % idx)
            doc.docid += ("_%02d" % idx)
            doc.import_pdf(config, child.get_uri())
            for page in doc.pages:
                docsearch.index_page(page)
            idx += 1

        assert(doc != None)
        return (doc, doc.pages[0])
Example #2
0
def which_colours(input_path):

    document = poppler.document_new_from_file('file://%s' % \
    urllib.pathname2url(os.path.abspath(input_path)), None)
    n_pages = document.get_n_pages()
    all_annots = 0

    unique_colours = []

    for i in range(n_pages):
        page = document.get_page(i)
        annot_mappings = page.get_annot_mapping()
        #print("## annot mappings ##")
        #print(annot_mappings)
        num_annots = len(annot_mappings)
        if num_annots > 0:
            for annot_mapping in annot_mappings:
                if annot_mapping.annot.get_annot_type(
                ).value_name != 'POPPLER_ANNOT_LINK':
                    #print annot_mapping.annot.get_color()
                    all_annots += 1
                    rgb = []
                    try:
                        rgb.append(annot_mapping.annot.get_color().red)
                        rgb.append(annot_mapping.annot.get_color().green)
                        rgb.append(annot_mapping.annot.get_color().blue)
                    except AttributeError:
                        pass
                        continue

                    if rgb not in unique_colours:
                        unique_colours.append(rgb)

    print("Unique colours: ", unique_colours)
Example #3
0
 def LoadDocument(self, file):
     self.document = poppler.document_new_from_file("file://" + file, None)
     self.n_pages = self.document.get_n_pages()
     self.current_page = self.document.get_page(self.n_page)
     self.width, self.height = self.current_page.get_size()
     print "wt:",self.width, self.height
     self._UpdateSize()
Example #4
0
 def _load(self, fobj):
     self._loaded = False
     if not fobj.readline().startswith(SEP):
         raise IOError, "Not a SlideDeX file"
     str = fobj.read()
     segments = str.split(SEP)
     if len(segments) < 3:
         raise IOError, "Could not load from file"
     self.settings = DocumentSettings(self, segments[0])
     self.pages.clear()
     self.header.set_content(segments[1][1:])  # Ignore empty line for filename
     self.footer.set_content(segments[-1][1:])
     for s in segments[2:-1]:
         filename, content = s.split('\n', 1)
         self.add_page(content, filename)
     
     pdffn = base_filename(self.fullfilename) + '.pdf'
     select_first_page = lambda status: self.slidelist_view.select_path((0,))
     if os.path.exists(pdffn) and os.stat(pdffn).st_mtime >= os.stat(self.fullfilename).st_mtime:
         self.compile_pages()
         self.doc = poppler.document_new_from_file('file://' + os.path.abspath(pdffn), None)
         self.executor.add_callback(select_first_page)
     else:
         self.compile(select_first_page)
     self.modified = False  # Set modified_since_save, and update the window title
     self._loaded = True
Example #5
0
    def __load_pdf_file(self, filename):
        self.__pdf_filename = filename
        LAST_OPEN_FOLDER = os.path.dirname(filename)

        filename = os.path.abspath(filename)
        self.__pdf_document = poppler.document_new_from_file(
            'file://%s' % filename, None)
        self.__n_pages = self.__pdf_document.get_n_pages()

        self.__pages_model.clear()
        for i in range(self.__n_pages):
            if i % 2 == 0:
                size = self.__pdf_document.get_page(i).get_size()
                self.__pages_model.append([
                    str(i + 1),
                    PageInfo(i, CropSetting(self.__odd_crop), size)
                ])
            else:
                self.__pages_model.append([
                    str(i + 1),
                    PageInfo(i, CropSetting(self.__even_crop), size)
                ])

        if not self.__pdf_view:
            self.__pdf_view = PdfView()
            self.__canvas.get_root_item().add_child(self.__pdf_view,
                                                    next_index())
Example #6
0
    def makeExp(self,e):   # An experimental function
        f=open('final.tex', 'r')
        tex = f.readlines()
        # Find the block containing the new block entry
        k=-1
        for i in range(len(tex)):
            if '%NEW_DOC_ENTRY' in tex[i]:
                k=i
        # insert all the line at that point
        with open("coord.txt", "a") as cfile:
            cfile.write('[%s , %s ]\n'%(e.GetX(),e.GetY()))

        win = PopUp(None,title="BORDER")


        out='\\begin{textblock*}{10cm}(%scm,%scm) This is the comment I am putting in pdf \end{textblock*}\n'%((e.GetX()*12/self.width),(e.GetY()*20/self.height))
        tex.insert(k,out)
        f=open('final.tex','w')
        f.writelines(tex)
        f.close()
        cmd = "pdflatex final.tex"
        os.system(cmd)
        self.document = poppler.document_new_from_file("file://" + os.path.abspath('final.pdf'), None)
        print 'The tex file is : ' + ("file://"+os.path.abspath('final.pdf'))
        self.n_pages = self.document.get_n_pages()
        self.current_page = self.document.get_page(self.n_page)
        self.width, self.height = self.current_page.get_size() 
        self._UpdateSize()
        self.Refresh()
Example #7
0
def get_annotations(annotated_pdf, synctex_pdf):
    document = poppler.document_new_from_file('file://%s' % \
                                              urllib.pathname2url(os.path.abspath(annotated_pdf)), None)
    n_pages = document.get_n_pages()
    all_annots = 0

    annotation_data = []
    input_filenames = set()
    for page_no in range(n_pages):
        page = document.get_page(page_no)
        annot_mappings = page.get_annot_mapping()
        num_annots = len(annot_mappings)
        if num_annots > 0:
            for annot_mapping in annot_mappings:
                if annot_mapping.annot.get_annot_type().value_name != 'POPPLER_ANNOT_LINK':
                    all_annots += 1
                    rect = annot_mapping.area
                    data = get_latex_position(page_no + 1, page.get_size(), rect, synctex_pdf)
                    data["AnnotType"] = annot_mapping.annot.get_annot_type().value_nick
                    data["Page"] = str(page_no + 1)
                    data["Modified"] = annot_mapping.annot.get_modified()
                    data["Contents"] = annot_mapping.annot.get_contents()
                    annotation_data.append(data)
                    input_filenames.add(data['Input'])
    return annotation_data, input_filenames
    def getAnnotations(filepath):

        # absolute path
        if not filepath.startswith('file://'):
            # if filepath.startswith('/'):
            #     filepath = "file:/%s" % filepath
            # else:
            filepath = "file://%s" % filepath
        # print filepath

        doc = poppler.document_new_from_file(filepath, None)
        pages = [doc.get_page(i) for i in range(doc.get_n_pages())]

        annotations = []

        # process annotations
        for page_no, page in enumerate(pages):
            # get the annotations
            items = [i.annot.get_contents() for i in page.get_annot_mapping()]
            # filter out empty annotations
            items = [i for i in items if i]
            # print "page: %s comments: %s " % (page_no + 1, items)
            for it in items:
                it = AnnotationExtractor.cleanAnnotation(it)
                # write to file
                annotations.append({'page': page_no + 1, 'annotation': it})

        # return json.dumps(annotations, indent=4, encoding="utf-8")
        return annotations
Example #9
0
    def refresh_pdf_preview_pane(self):
        pdf_preview = self.ui.get_widget('pdf_preview')
        rebuild = False

        if os.path.isfile( self.pdf_file ):
            try:
                self.pdf_preview['document'] = poppler.document_new_from_file ('file://%s' % (self.pdf_file), None)
                self.pdf_preview['n_pages'] = self.pdf_preview['document'].get_n_pages()
                self.pdf_preview['scale'] = None
                self.goto_pdf_page( self.pdf_preview['current_page_number'], new_doc=True )
            except glib.GError:
                rebuild = True
        else:
            rebuild = True

        if rebuild:
            pdf_preview.set_size_request(0,0)
            self.pdf_preview['current_page'] = None
            self.ui.get_widget('button_move_previous_page').set_sensitive( False )
            self.ui.get_widget('button_move_next_page').set_sensitive( False )
            self.ui.get_widget('button_zoom_out').set_sensitive( False )
            self.ui.get_widget('button_zoom_in').set_sensitive( False )
            self.ui.get_widget('button_zoom_normal').set_sensitive( False )
            self.ui.get_widget('button_zoom_best_fit').set_sensitive( False )
        pdf_preview.queue_draw()
Example #10
0
 def LoadDocument(self, file):
     self.document = poppler.document_new_from_file("file://" + file, None)
     self.n_pages = self.document.get_n_pages()
     self.current_page = self.document.get_page(self.n_page)
     self.width, self.height = self.current_page.get_size()
     print "wt:", self.width, self.height
     self._UpdateSize()
Example #11
0
    def setup_template(self, source_path, output=None):
        if self.cr and self.page and self.pdf:
            return

        # Get source document
        self.document = poppler.document_new_from_file('file://{}'.format(source_path), None)
        self.page = self.document.get_page(0)

        # Create destination document
        # TODO: There seems to be an issue with quality, possibly due to issues
        # with size calculations here.
        self.width, self.height = self.page.get_size()
        self.pdf = cairo.PDFSurface(output, self.width, self.height)
        self.cr = cairo.Context(self.pdf)
        
        # Set a white background
        self.cr.save()
        self.cr.set_source_rgb(1,1,1) # set white bg
        self.cr.paint()
        self.cr.restore()

        # Render source pdf to destination
        self.cr.save()
        # NOTE: This is a costly function, especially with large PDFs. Consider
        # using task queuing (eg, celery)
        self.page.render_for_printing(self.cr)
        self.cr.restore()
Example #12
0
def extract():
    input_filename = sys.argv[1]
    # http://blog.hartwork.org/?p=612
    document = poppler.document_new_from_file('file://%s' % \
      urllib.parse.urlparse(os.path.abspath(input_filename)), None)
    n_pages = document.get_n_pages()
    all_annots = 0

    for i in range(n_pages):
        page = document.get_page(i)
        annot_mappings = page.get_annot_mapping()
        num_annots = len(annot_mappings)
        if num_annots > 0:
            for annot_mapping in annot_mappings:
                if annot_mapping.annot.get_annot_type(
                ).value_name != 'POPPLER_ANNOT_LINK':
                    all_annots += 1
                    print('page: {0:3}, {1:10}, type: {2:10}, content: {3}'.
                          format(
                              i + 1, annot_mapping.annot.get_modified(),
                              annot_mapping.annot.get_annot_type().value_nick,
                              annot_mapping.annot.get_contents()))

    if all_annots > 0:
        print(str(all_annots) + " annotation(s) found")
    else:
        print("no annotations found")
Example #13
0
 def __init__(self, uri, filename, timestamp):
     self.doc = poppler.document_new_from_file(uri, None)
     self.timestamp = timestamp
     self.n_pgs = self.doc.get_n_pages()
     self.filename = filename
     self.curr_pg = 0
     self.curr_pg_disp = self.doc.get_page(self.curr_pg)
     self.doc_width, self.doc_height = self.curr_pg_disp.get_size()
Example #14
0
 def __init__(self, filename, parser, mime, backup, add2archive):
     super(PdfStripper, self).__init__(filename, parser, mime, backup,
                                       add2archive)
     uri = 'file://' + os.path.abspath(self.filename)
     self.password = None
     self.document = poppler.document_new_from_file(uri, self.password)
     self.meta_list = ('title', 'author', 'subject', 'keywords', 'creator',
                       'producer', 'creation-date', 'mod-date', 'metadata')
Example #15
0
 def LoadDocument(self, file):
     cmd = "pdflatex final.tex"
     os.system(cmd)
     self.document = poppler.document_new_from_file("file://" + os.path.abspath('final.pdf'), None)
     self.n_pages = self.document.get_n_pages()
     self.current_page = self.document.get_page(self.n_page)
     self.width, self.height = self.current_page.get_size() 
     self._UpdateSize()
 def __init__(self, filename, parser, mime, backup, add2archive):
     super(PdfStripper, self).__init__(filename, parser, mime, backup,
         add2archive)
     uri = 'file://' + os.path.abspath(self.filename)
     self.password = None
     self.document = poppler.document_new_from_file(uri, self.password)
     self.meta_list = ('title', 'author', 'subject', 'keywords', 'creator',
         'producer', 'creation-date', 'mod-date', 'metadata')
Example #17
0
File: FLEG.py Project: kcg/fleg
 def LoadDocument(self, file):
     self.document = poppler.document_new_from_file("file://" + file, None)
     self.n_pages = self.document.get_n_pages()
     self.current_page = self.document.get_page(self.n_page)
     self.width, self.height = (325, 160)  # self.current_page.get_size()
     self.scale = min(325.0 / self.current_page.get_size()[0], 160.0 / self.current_page.get_size()[1])
     self.initscale = self.scale
     self.panel.SetSize((self.width, self.height))
Example #18
0
 def LoadDocument(self, file):
     self.document = poppler.document_new_from_file("file://" + file, None)
     self.n_pages = self.document.get_n_pages()
     self.current_page = self.document.get_page(self.n_page)
     self.width, self.height = self.current_page.get_size()
     self._UpdateSize()
     self._UpdateScale(self.scale)
     self.myparent.SetTitle(file)
Example #19
0
    def __init__(self, ifname, ofname, width = 1024, height = 1322):
        self.width = width
        self.height = height
        self.ifname = ifname
        self.ofname = ofname

        self.doc = poppler.document_new_from_file('file://%s' % \
            urllib.pathname2url(self.ifname), password = None)
Example #20
0
 def __init__(self, uri, statbuf):
     sha = hashlib.sha1()
     sha.update('%u \0' % (statbuf.st_size,))
     with closing(urllib.urlopen(uri)) as f:
         for data in generate_file_data(f):
             sha.update(data)
     self.id = sha.hexdigest()
     self.popplerdoc = P.document_new_from_file(uri, None)
     self.clear_selection()
Example #21
0
 def LoadDocument(self, file):
     self.document = poppler.document_new_from_file("file://" + file, None)
     self.n_pages = self.document.get_n_pages()
     self.current_page = self.document.get_page(self.n_page)
     self.width, self.height = (325, 160)  #self.current_page.get_size()
     self.scale = min(325.0 / self.current_page.get_size()[0],
                      160.0 / self.current_page.get_size()[1])
     self.initscale = self.scale
     self.panel.SetSize((self.width, self.height))
    def __init__(self):
        uri = "file://" + sys.argv[1]

        self.document = poppler.document_new_from_file(uri, None)
        self.n_pages = self.document.get_n_pages()

        self.current_page = self.document.get_page(0)
        self.scale = 1
        self.width, self.height = self.current_page.get_size()

        win = gtk.Window(gtk.WINDOW_TOPLEVEL)
        win.set_default_size(600, 600)
        win.set_title("Poppler GLib Demo")
        win.connect("delete-event", gtk.main_quit)

        adjust = gtk.Adjustment(0, 0, self.n_pages - 1, 1)
        page_selector = gtk.SpinButton(adjust, 0, 0)
        page_selector.connect("value-changed", self.on_changed)

        lab = gtk.Label('Page Number:')

        hbox = gtk.HBox(False, 0)

        vbox = gtk.VBox(False, 0)
        vbox.pack_start(hbox, False, False, 0)

        hbox.pack_start(lab, False, False, 4)
        hbox.pack_start(page_selector, False, False, 0)

        adjust = gtk.Adjustment(1, 1, 5, 1)
        scale_selector = gtk.SpinButton(adjust, 0, 0)
        scale_selector.connect("value-changed", self.on_scale_changed)

        lab = gtk.Label('Scale:')

        hbox.pack_start(lab, False, False, 4)
        hbox.pack_start(scale_selector, False, False, 0)

        b_scan_fonts = gtk.Button('Scan Fonts')
        b_scan_fonts.connect("clicked", self.on_scan_fonts)

        hbox.pack_start(b_scan_fonts, False, False, 4)

        sw = gtk.ScrolledWindow()
        sw.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC)

        self.dwg = gtk.DrawingArea()
        self.dwg.set_size_request(int(self.width), int(self.height))
        self.dwg.connect("expose-event", self.on_expose)

        sw.add_with_viewport(self.dwg)

        vbox.pack_start(sw, True, True, 0)

        win.add(vbox)

        win.show_all()
Example #23
0
 def on_changed(self, uri):
     """Lorsque l'on change de document actualise la
     vue du nouveau pdf
     """
     self.document = poppler.document_new_from_file (uri, None)
     self.current_page = self.document.get_page(0)
     self.dwg.set_size_request(int(self.width*self.scale),
                               int(self.height*self.scale))
     self.dwg.queue_draw()
Example #24
0
def main():
    uri = ("file:///"+sys.argv[1])
    doc = poppler.document_new_from_file(uri, None)
      
    iterp = poppler.IndexIter(doc)
    link = iterp.get_action()
    s = doc.find_dest(link.dest.named_dest)
    print link.title,' ', doc.get_page(s.page_num-1).get_label()
    walk_index(iterp, doc)
    return 0
Example #25
0
def get_pdf(self, uri):
    uri = self.tabFileDictionary[str(self.fileDisplayArea.get_current_page())]
    uri = "file://" + os.path.realpath(uri)
    self.document = poppler.document_new_from_file(uri, None)
    self.n_pages = self.document.get_n_pages()

    self.current_page = self.document.get_page(int(self.page))
    self.width, self.height = self.current_page.get_size()
    self.total_pages = self.document.get_n_pages()
    virtualThread(self)
Example #26
0
def main():
    uri = ("file:///" + sys.argv[1])
    doc = poppler.document_new_from_file(uri, None)

    iterp = poppler.IndexIter(doc)
    link = iterp.get_action()
    s = doc.find_dest(link.dest.named_dest)
    print link.title, ' ', doc.get_page(s.page_num - 1).get_label()
    walk_index(iterp, doc)
    return 0
Example #27
0
def get_pdf(self, uri):
    uri = self.tabFileDictionary[str(self.fileDisplayArea.get_current_page())]
    uri = "file://" + os.path.realpath(uri)
    self.document = poppler.document_new_from_file (uri, None)
    self.n_pages = self.document.get_n_pages()

    self.current_page = self.document.get_page(int(self.page))
    self.width, self.height = self.current_page.get_size()
    self.total_pages = self.document.get_n_pages()
    virtualThread(self)
Example #28
0
    def __init__(self, pdf_url):
        document = poppler.document_new_from_file("file://" + os.path.abspath(pdf_url), None)

        self.main_window = SlideWindow(DocumentManager(document))
        self.note_window = NoteWindow(DocumentManager(document, 1))

        self.is_fullscreen = False

        for w in (self.main_window, self.note_window):
            w.connect("key-press-event", self.on_key_press)
            w.connect("button-press-event", self.on_button_press)
            w.connect("delete-event", gtk.main_quit)
Example #29
0
    def render(self):
        # FIXME: This is an specific fix for boleto printing in landscape
        # orientation. We should find a better fix for it or simply remove
        # PrintOperationPoppler when migrating the last reports using
        # reportlab to weasyprint
        if getattr(self._report, 'print_as_landscape', False):
            default_page_setup = gtk.PageSetup()
            default_page_setup.set_orientation(gtk.PAGE_ORIENTATION_LANDSCAPE)
            self.set_default_page_setup(default_page_setup)

        self._report.save()
        uri = gio.File(path=self._report.filename).get_uri()
        self._document = poppler.document_new_from_file(uri, password="")
	def preview_expose_event_cb(self, widget, event):
		"""Expose event callback: update the preview area."""
		index = self.__getitem__("goComboBox").get_active()
		if index > -1 and os.path.exists(self.project_path+"/card_"+str(index)+".pdf"):
			pdf = poppler.document_new_from_file ("file://"+self.project_path+"/card_"+str(index)+".pdf", None)
			width, height = pdf.get_page(0).get_size()
			widget.set_size_request(int(width), int(height))
			cairo_renderer = widget.window.cairo_create()
			cairo_renderer.set_source_rgb(1, 1, 1)
			cairo_renderer.scale(1, 1)
			cairo_renderer.rectangle(0, 0, width, height)
			cairo_renderer.fill()
			pdf.get_page(0).render(cairo_renderer)
Example #31
0
    def render(self):
        # FIXME: This is an specific fix for boleto printing in landscape
        # orientation. We should find a better fix for it or simply remove
        # PrintOperationPoppler when migrating the last reports using
        # reportlab to weasyprint
        if getattr(self._report, 'print_as_landscape', False):
            default_page_setup = gtk.PageSetup()
            default_page_setup.set_orientation(gtk.PAGE_ORIENTATION_LANDSCAPE)
            self.set_default_page_setup(default_page_setup)

        self._report.save()
        uri = gio.File(path=self._report.filename).get_uri()
        self._document = poppler.document_new_from_file(uri, password="")
	def render_cairo(self, ctx):
		if self.background_type == 'pdf' and self.background_filename != '':
			pdf_filename = os.path.abspath(self.background_filename)
			pdf_uri = 'file://%s' % urllib.pathname2url(pdf_filename)
			pdfdoc = poppler.document_new_from_file(pdf_uri, password=None)

			pdf_page = pdfdoc.get_page(self.background_pageno - 1)
			pdf_page.render_for_printing(ctx)

		for layer in self.layers:
			layer.render_cairo(ctx)

		ctx.show_page()
Example #33
0
 def parse(self):
     doc = poppler.document_new_from_file('file://' +
                                          os.path.abspath(self.input_fn),
                                          None);
     title = doc.get_property('title')
     author = doc.get_property('author')
     if title is None:
         title = ''
     if author is None:
         author = ''
     meta = PDFMeta(title, author, doc.get_n_pages())
     bookmarks = self.get_bookmarks(doc)
     return (meta, bookmarks)
Example #34
0
    def render_cairo(self, ctx):
        if self.background_type == 'pdf' and self.background_filename != '':
            pdf_filename = os.path.abspath(self.background_filename)
            pdf_uri = 'file://%s' % urllib.pathname2url(pdf_filename)
            pdfdoc = poppler.document_new_from_file(pdf_uri, password=None)

            pdf_page = pdfdoc.get_page(self.background_pageno - 1)
            pdf_page.render_for_printing(ctx)

        for layer in self.layers:
            layer.render_cairo(ctx)

        ctx.show_page()
Example #35
0
 def open(self, pdf, num=0):
     self.pageNum = num
     self.path = pdf
     self.parseConfig()
     #        self.createBottomBox()
     self.createRightPanel()
     self.createViewer()
     self.document = poppler.document_new_from_file(self.path, None)
     self.numPages = self.document.get_n_pages()
     self.page = self.document.get_page(self.pageNum)
     self.surface = None
     self.createSurface()
     self.ctx = cairo.Context(self.surface)
     self.page.render(self.ctx)
Example #36
0
    def open(self, pdf, num=0):
        self.pageNum = num
        self.path = pdf
        self.parseConfig()
#        self.createBottomBox()
        self.createRightPanel()
        self.createViewer()
        self.document = poppler.document_new_from_file(self.path, None)
        self.numPages = self.document.get_n_pages()
        self.page = self.document.get_page(self.pageNum)
        self.surface = None
        self.createSurface()
        self.ctx = cairo.Context(self.surface)
        self.page.render(self.ctx)
Example #37
0
    def import_doc(self, file_uri, config, docsearch, current_doc=None):
        parent = gio.File(file_uri)
        doc = None

        idx = 0

        for child in self.__get_all_children(parent):
            if not child.get_basename().lower().endswith(".pdf"):
                continue
            try:
                # make sure we can import it
                poppler.document_new_from_file(child.get_uri(), password=None)
            except Exception:
                continue
            doc = PdfDoc(config.workdir)
            doc.path += ("_%02d" % idx)
            doc.docid += ("_%02d" % idx)
            doc.import_pdf(config, child.get_uri())
            for page in doc.pages:
                docsearch.index_page(page)
            idx += 1

        assert (doc != None)
        return (doc, doc.pages[0])
Example #38
0
    def __init__(self, fn, parent):
        fn = os.path.splitext(fn)[0]
        self.notes = parse_notes(fn + '.notes.xml')

        self.pdf = fn + '.pdf'

        self.slide = 0
        self.document = poppler.document_new_from_file('file://' + os.path.abspath(self.pdf), None)
        self.nslides = self.document.get_n_pages()
        self.index_to_note_num = parse_indices_labels([
            self.document.get_page(n).get_label() for n in range(self.nslides)])

        self.next_page = self.document.get_page(self.slide + 1)
        self.slide_size = tuple(int(_) for _ in self.next_page.get_size())
        self.mupdf_pid = subprocess.Popen(["/usr/bin/mupdf", self.pdf]).pid

        self.nextkeys = ['j', 'J', 'Right', 'Down', 'Next', 'space']
        self.prevkeys = ['k', 'K', 'Left', 'Up', 'Prior', 'BackSpace']
        # store digits so you can re-sync slides & pdf
        self.digits = ''

        Tkinter.Frame.__init__(self, parent, background=BG)
        self.textsize = 20
        parent.title('Presenting {}'.format(self.pdf))

        self.notes[0] = "\nDo `f', then `W' to fullscreen mupdf.\n" + self.notes[0]
        self.note = Tkinter.StringVar()
        self.note.set('1/{}: '.format(self.nslides) + self.notes[0])
        self.do_msg()

        # dummy just to get initial UI arrangement
        self.label = Tkinter.Label(self)
        self.label.pack(anchor='ne') #side=Tkinter.RIGHT, anchor='n')

        self.timer = Tkinter.Label(self, text="hit `t' to start timer",
                                   font=('Helvetica', 32, 'bold'),
                                   background=BG,
                                   fg=FG)
        self.timer.pack(anchor='center')
        self.start_time = 0

        self.surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, self.slide_size[0], self.slide_size[1])
        self.context = cairo.Context(self.surface)
        self.shownote()

        self.focus_get()
        self.bind_all("<Key>", self.onKeyPressed)
Example #39
0
def extract_highlights(filepath: str) -> List[Tuple[int, int, int, int, str]]:
    """
    This is based on code from Marwan Alsabbagh, https://stackoverflow.com/questions/13748242/extracting-pdf-annotations-comments
    see http://socialdatablog.com/extract-pdf-annotations.html
    """
    doc = poppler.document_new_from_file(path, None)
    pages = [doc.get_page(i) for i in range(doc.get_n_pages())]

    for page_no, page in enumerate(pages):
        items = [i.annot.get_contents() for i in page.get_annot_mapping()]
        items = [i for i in items if i]
        for j in items:
            j = j.replace("\r\n", " ")
            j = j.replace("\r\n", " ")
            x = x + "\n\n" + "'{}' (page {})".format(j, page_no + 1)
            if "xk" in j:
                print(j)
Example #40
0
    def _get_text (self, file):
	uri = "file://" + file
	document = poppler.document_new_from_file (uri, None)
	npages = document.get_n_pages()
	text = ""
	for p in range(0,npages):
		page = document.get_page(p)
		w,h = page.get_size()
		r = poppler.Rectangle ()
		r.x1 = 0
		r.x2 = w
		r.y1 = 0
		r.y2 = h
		# Currently we are getting the layout from the pdf here
		# we should collapse it
		text += page.get_text(poppler.SELECTION_GLYPH,r)

	return text
	def LoadDocument(self, file1):
		self.document = poppler.document_new_from_file("file://"+file1, None)
		#Copy Slides to local
		self.total_pages = self.document.get_n_pages()
		self.current_page = self.document.get_page(self.n_page)
		self.width, self.height = self.current_page.get_size()
		ScrRes = wx.DisplaySize()
		S1 = ( ScrRes[0] / float (self.width ) ) * 0.9
		S2 = ( ScrRes[1] / float (self.height ) ) * 0.9
		print "S1 S2 ", S1 , S2 
		if (S1 < S2 ):
			self.scale = S1
		else:
			self.scale = S2
		print "Scale is --> ", self.scale
		self._UpdateSize()
		self.OnPaint(self)
		self.parent.parent.ShowFullScreen(True, style=wx.FULLSCREEN_NOBORDER^wx.FULLSCREEN_NOTOOLBAR^wx.FULLSCREEN_NOMENUBAR^wx.FULLSCREEN_NOSTATUSBAR)
Example #42
0
    def __init__(self, filename, nfile, tmp_dir):

        self.filename = os.path.abspath(filename)
        (self.path, self.shortname) = os.path.split(self.filename)
        (self.shortname, self.ext) = os.path.splitext(self.shortname)
        f = gio.File(filename)
        mime_type = f.query_info('standard::content-type').get_content_type()
        expected_mime_type = pdf_mime_type

        if mime_type == expected_mime_type:
            self.nfile = nfile + 1
            self.mtime = os.path.getmtime(filename)
            self.copyname = os.path.join(tmp_dir, '%02d_' % self.nfile +
                                                  self.shortname + '.pdf')
            shutil.copy(self.filename, self.copyname)
            self.document = poppler.document_new_from_file (file_prefix + self.copyname, None)
            self.npage = self.document.get_n_pages()
        else:
            self.nfile = 0
            self.npage = 0
Example #43
0
 def setPDFBox(self, uri = ""):
     if uri == "":
         uri = "file://" + self._conf["PAGE_ACCEUIL"]
     self.document = poppler.document_new_from_file (uri, None)
     self.n_pages = self.document.get_n_pages()
     self.current_page = self.document.get_page(0)
     self.scale = self._conf["ZOOM"]
     self.width, self.height = self.current_page.get_size()
     
     self.surface = cairo.ImageSurface(cairo.FORMAT_RGB24,
                                       int(self.width*self.scale),
                                       int(self.height*self.scale))
     sw = self.wTree.get_widget("scrolledwindow2")
     #sw.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC)
     
     self.dwg = gtk.DrawingArea()
     self.dwg.set_size_request(int(self.width*self.scale), int(self.height*self.scale))
     self.dwg.connect("expose-event", self.on_expose)
     
     sw.add_with_viewport(self.dwg)
     self.dwg.show()
Example #44
0
 def __init__(self, parent, content="", filename="", render=False):
     self.parent = parent
     self.buffer = sourceview.Buffer(language=LATEXLANG)
     self.buffer.connect("modified-changed", self.on_buffer_modified_changed)
     self.doc = None
     self.pb = self.parent.window.render_icon(gtk.STOCK_MISSING_IMAGE, gtk.ICON_SIZE_DIALOG)
     self._filename = filename
     self._modified_since_save = True  # Set to False at end of document load.
     self._modified_since_compile = True
     self.set_content(content)
     
     cached = False
     if self._filename:
         pdffn = base_filename(self.fullfilename) + '.pdf'
         if os.path.exists(self.fullfilename + '.tex') and os.path.exists(pdffn):
             self.doc = poppler.document_new_from_file('file://' + os.path.abspath(pdffn), None)
             self.render_thumb()
             self._modified_since_compile = False
             cached = True
     if render and not cached:
         self.compile(lambda status: not status and self.render_thumb(), False)
def main():
  input_filename = sys.argv[1]
	# http://blog.hartwork.org/?p=612
  document = poppler.document_new_from_file('file://%s' % \
    urllib.pathname2url(os.path.abspath(input_filename)), None)
  n_pages = document.get_n_pages()
  all_annots = 0

  for i in range(n_pages):
		page = document.get_page(i)
		annot_mappings = page.get_annot_mapping ()
		num_annots = len(annot_mappings)
		if num_annots > 0:
			for annot_mapping in annot_mappings:
				if  annot_mapping.annot.get_annot_type().value_name != 'POPPLER_ANNOT_LINK':
					all_annots += 1
					print 'page: {0:3}, {1:10}, type: {2:10}, content: {3}'.format(i+1, annot_mapping.annot.get_modified(), annot_mapping.annot.get_annot_type().value_nick, annot_mapping.annot.get_contents())
	
  if all_annots > 0:
    print str(all_annots) + " annotation(s) found"
  else:
    print "no annotations found"
Example #46
0
    def show_pdf(self):
        """
        Get PDF, render in pyplot, hook input events
        """
        pyplot.clf()
        self.pdf_name = self.pdf_names[self.pdf_index]
        path = absolute_file_scheme_path(self.pdf_name)
        doc = poppler.document_new_from_file(path, None)
        self.num_pages = doc.get_n_pages()

        # Handle edge condition of moving to new pdf with fewer pages
        if self.page_num > self.num_pages:
            self.page_num = 0

        page = doc.get_page(self.page_num)

        try:
            self.page_width, self.page_height = page.get_size()
        except AttributeError:
            self.pdf_names.remove(self.pdf_name)
            self.add_unprocessed()
            return

        self.page_width = int(self.page_width)
        self.page_height = int(self.page_height)

        image_surface = cairo.ImageSurface(
            cairo.FORMAT_ARGB32, self.page_width, self.page_height
        )
        ctxt = cairo.Context(image_surface)
        page.render(ctxt)

        image_matrix = np.asarray(image_surface.get_data())
        image_matrix = image_matrix.astype("|u1")
        image_matrix = image_matrix.reshape((self.page_height, self.page_width, 4))

        pyplot.imshow(image_matrix)
        self.render_rects()
        pyplot.draw()
Example #47
0
    def __init__(self, uri, page=0):
        """
        :param uri: URI to the PDF file to open (local only, starting with
           :file:`file://`)
        :type  uri: string
        :param page: page number to which the file should be opened
        :type  page: integer
        """

        # Check poppler-python version -- we need Bazaar rev. 62
        if not pympress.util.poppler_links_available():
            print >>sys.stderr, "Hyperlink support not found in poppler-python -- be sure to use at least bazaar rev. 62 to have them working"

        # Open PDF file
        self.doc = poppler.document_new_from_file(uri, None)

        # Pages number
        self.nb_pages = self.doc.get_n_pages()

        # Number of the current page
        self.cur_page = page

        # Pages cache
        self.pages_cache = {}

        # Guess if the document has notes
        page0 = self.page(page)
        if page0 is not None:
            # "Regular" pages will have an apsect ratio of 4/3, 16/9, 16/10...
            # Full A4 pages will have an aspect ratio < 1.
            # So if the aspect ratio is >= 2, we can assume it is a document with notes.
            ar = page0.get_aspect_ratio()
            self.notes = (ar >= 2)

        # Create windows
        self.ui = pympress.ui.UI(self)
        self.ui.on_page_change(False)
        self.ui.run()
Example #48
0
  def __load_pdf_file(self, filename):
    self.__pdf_filename = filename
    LAST_OPEN_FOLDER = os.path.dirname(filename)

    filename = os.path.abspath(filename)
    self.__pdf_document = poppler.document_new_from_file(
        'file://%s' % filename, None)
    self.__n_pages = self.__pdf_document.get_n_pages()

    self.__pages_model.clear()
    for i in range(self.__n_pages):
      if i % 2 == 0:
        size = self.__pdf_document.get_page(i).get_size()
        self.__pages_model.append(
            [str(i+1),
             PageInfo(i, CropSetting(self.__odd_crop), size)])
      else:
        self.__pages_model.append(
            [str(i+1), PageInfo(i, CropSetting(self.__even_crop), size)])

    if not self.__pdf_view:
      self.__pdf_view = PdfView()
      self.__canvas.get_root_item().add_child(self.__pdf_view, next_index())
Example #49
0
 def set_document(self, filename, operation, context):
     if not filename.startswith('file'):
         filename = 'file://' + os.path.realpath(filename)
     self.d = poppler.document_new_from_file(filename, None)
     operation.set_n_pages(self.d.get_n_pages())
     # Assume all pages are same
     page = self.d.get_page(0)
     w, h = page.get_size()
     if w > h:
         w, h = h, w
         ori = gtk.PAGE_ORIENTATION_LANDSCAPE
     else:
         ori = gtk.PAGE_ORIENTATION_PORTRAIT
     page_setup = gtk.PageSetup()
     page_setup.set_orientation(ori)
     size = int(round(w)), int(round(h))
     gtk_size = rl2gtk_papersizes.get(size, None)
     if gtk_size:
         ps = gtk.PaperSize(gtk_size)
     else:
         ps = gtk.paper_size_new_custom('', '', w, h, gtk.UNIT_POINTS)
     page_setup.set_paper_size(ps)
     operation.set_default_page_setup(page_setup)
Example #50
0
 def set_document (self, filename, operation,context):
     if not filename.startswith('file'):
         filename = 'file://' + os.path.realpath(filename)
     self.d = poppler.document_new_from_file(filename,None)
     operation.set_n_pages(self.d.get_n_pages())
     # Assume all pages are same
     page = self.d.get_page(0)
     w,h = page.get_size()
     if w > h:
         w,h = h,w
         ori = gtk.PAGE_ORIENTATION_LANDSCAPE
     else:
         ori = gtk.PAGE_ORIENTATION_PORTRAIT
     page_setup = gtk.PageSetup()
     page_setup.set_orientation(ori)
     size = int(round(w)),int(round(h))
     gtk_size = rl2gtk_papersizes.get(size,None)
     if gtk_size:
         ps = gtk.PaperSize(gtk_size)
     else:
         ps = gtk.paper_size_new_custom('','',w,h,gtk.UNIT_POINTS)
     page_setup.set_paper_size(ps)
     operation.set_default_page_setup(page_setup)
    def __init__(self):
        self.presentation_config = config.presentation

        file_url = "file://%s" % self.presentation_config["slides"]

        self.presentation = \
            poppler.document_new_from_file(file_url, None)
        self.n_pages = self.presentation.get_n_pages()
        self.current_page = self.presentation.get_page(0)

        self.setup_window()

        self.post_slide_deferred = None
        self.posted_slides = []
        self.slides_to_post = self.presentation_config["to_post"]
        self.slide_titles = self.presentation_config["titles"]
        self.image_pattern = self.presentation_config["slide_image_pattern"]

        self.commands = {"next": self.display_next,
                         "previous": self.display_previous,
                         "quit": self.quit}

        # Hack for now to display first slide
        self.display_relative_slide(0, lambda x: True)
Example #52
0
    def refresh(self):
        # make the preview

        tmp = "%s.%s" % (os.tempnam(None, "paperwork_export_"),
                         self.valid_exts[0])
        path = self.__save(tmp, pages=(0, 1))

        # reload the preview

        pdfdoc = poppler.document_new_from_file(
            ("file://%s" % path), password=None)
        assert(pdfdoc.get_n_pages() > 0)

        pdfpage = pdfdoc.get_page(0)
        pdfpage_size = pdfpage.get_size()

        surface = cairo.ImageSurface(cairo.FORMAT_ARGB32,
                                     int(pdfpage_size[0]),
                                     int(pdfpage_size[1]))
        ctx = cairo.Context(surface)
        pdfpage.render(ctx)
        img = surface2image(surface)

        self.__preview = (path, img)
Example #53
0
 def render(self):
     self._report.save()
     uri = gio.File(path=self._report.filename).get_uri()
     self._document = poppler.document_new_from_file(uri, password="")
Example #54
0
def main():
    parser = argparse.ArgumentParser(
        description='Add ToUnicode tables to PDF files.')
    parser.add_argument('--outdir',
                        default='tmp/sfd',
                        type=str,
                        help='Output .sfd files to this directory')
    parser.add_argument('pdfs',
                        type=str,
                        nargs='+',
                        help='PDF files to process')
    args = parser.parse_args()

    fontnum = 0
    for pdf in args.pdfs:
        print("Adding ToUnicode tables to PDF file {}".format(pdf))
        with open(pdf, 'rb') as fobj:
            pdfdata = fobj.read()
        doc = PdfReader(fdata=pdfdata)
        doc.read_all()
        fonts = [
            o for o in doc.indirect_objects.values()
            if hasattr(o, 'Type') and o.Type == '/Font'
        ]
        fonts = {
            font.FontDescriptor.FontName[1:]: font
            for font in fonts if font.FontDescriptor is not None
        }
        embedded_fonts = fontforge.fontsInFile(pdf)
        for fontname in embedded_fonts:
            if fontname not in fonts:
                print(
                    "WARNING: font {} not found in pdf file".format(fontname))
                continue
            print("Adding ToUnicode table to font {}".format(fontname))
            font = fontforge.open('{}({})'.format(pdf, fontname))
            fonts[fontname].ToUnicode = PdfDict()
            fonts[fontname].ToUnicode.stream = generate_tounicode(
                font, fonts[fontname])
            # Need to save the modified font because fontforge won't read
            # ToUnicode when it converts to woff later.
            font.fontname = 'pretex{:06d}'.format(fontnum)
            font.save(
                os.path.join(
                    args.outdir,
                    '[{}]{}.sfd'.format(os.path.basename(pdf)[:-4], fontname)))
            fontnum += 1
        PdfWriter(pdf, trailer=doc).write()

        # Measure extents for displayed equations
        pdfpath = os.path.realpath(os.path.dirname(pdf))
        doc = poppler.document_new_from_file(
            'file://{}'.format(os.path.realpath(pdf)), None)
        boxsize = os.path.join(pdfpath, 'boxsize.txt')
        with open(boxsize) as fobj:
            lines = fobj.readlines()
        with open(boxsize, 'w') as fobj:
            pageno = 0
            for line in lines:
                if not (line.startswith('inline:')
                        or line.startswith('display:')):
                    fobj.write(line)
                    continue
                pageno += 1
                if not line.startswith('display:'):
                    fobj.write(line)
                    continue
                page = doc.get_page(pageno - 1)
                width, height = page.get_size()
                surf = cairo.RecordingSurface(
                    cairo.Content.COLOR_ALPHA,
                    cairo.Rectangle(0, 0, width, height))
                ctx = cairo.Context(surf)
                page.render_for_printing(ctx)
                x, y, w, h = surf.ink_extents()
                fobj.write(line.strip() + '{},{},{},{}\n'.format(x, y, w, h))
Example #55
0
command).
"""

import os
import itertools

import gtk
import goocanvas
import gobject

import poppler

import read_gov_law_proposal as gov
import pdftotext_ext as ext

pdf = poppler.document_new_from_file('file://%s/538.pdf' % os.getcwd(),
                                     password=None)


def squares(width, height, n_wide, n_high):
    dx = float(width) / n_wide
    dy = float(height) / n_high
    for j in xrange(n_high):
        for i in xrange(n_wide):
            yield (dx * i, dy * j, dx, dy)


def enlarging_square_range(start, height, end_width, n):
    for i in xrange(n + 1):
        yield (start[0], start[1], end_width * i / n, height)

Example #56
0
import sys
import os.path

import poppler
import cairo


def convert_to_svg(page, filename):
    w,h = page.get_size()
    surface = cairo.SVGSurface(filename, w, h)
    ctx = cairo.Context(surface)
    page.render(ctx)


if __name__ == '__main__':
    filename = sys.argv[1]

    doc = poppler.document_new_from_file('file://'+os.path.abspath(filename), None)
    page_nums = map(int, sys.argv[2:]) or range(doc.get_n_pages())
    dirpath = os.path.dirname(filename)
    
    for pn in page_nums:
        page = doc.get_page(pn)
        convert_to_svg(page, os.path.join(dirpath, '%d.svg'%(pn+1,)))

    html = ''.join('<a href="{0}.svg" target="page">Page {0}</a><br />\n'.format(pn+1) for pn in page_nums)
    with open(os.path.join(dirpath, "pages.html"), "w") as f:
        f.write(html)

Example #57
0
 def load_file(self, file):
     self.document = poppler.document_new_from_file("file://" + file, None)
     self.n_pages = self.document.get_n_pages()
     self.current_page = self.document.get_page(self.n_page)
     self.width, self.height = self.current_page.get_size()
     self._update_size()
Example #58
0
def get_pdf(filename):
    if filename not in pdf_cache:
        pdf_cache[filename] = poppler.document_new_from_file(
            'file://%s' % os.path.realpath(filename), password=None)
    return pdf_cache[filename]