Esempio n. 1
0
def create_match_sheet(match_num, blue_team=None, red_team=None, male=False,
        female=False, a_team=False, b_team=False, c_team=False):
    teams_canvas = canvas.Canvas("teams.pdf", pagesize=letter)
    teams_canvas.setLineWidth(3)
    xoffset, yoffset = 0, 0
    if male:
        teams_canvas.ellipse(
                3.2*inch + xoffset, 9.5*inch + yoffset,
                2.4*inch + xoffset, 9.1*inch + yoffset)
    xoffset = 4.23 * inch
    if female:
        teams_canvas.ellipse(
                3.2*inch + xoffset, 9.5*inch + yoffset,
                2.4*inch + xoffset, 9.1*inch + yoffset)
    yoffset = -.3 * inch
    if c_team:
        teams_canvas.ellipse(
                3.2*inch + xoffset, 9.5*inch + yoffset,
                2.4*inch + xoffset, 9.1*inch + yoffset)
    xoffset = 0
    if a_team:
        teams_canvas.ellipse(
                3.2*inch + xoffset, 9.5*inch + yoffset,
                2.4*inch + xoffset, 9.1*inch + yoffset)
    xoffset = 4.23/ 2 * inch
    if b_team:
        teams_canvas.ellipse(
                3.2*inch + xoffset, 9.5*inch + yoffset,
                2.4*inch + xoffset, 9.1*inch + yoffset)
    teams_canvas.setFont('Helvetica', 24)
    teams_canvas.drawString(1.7*inch, 7.8*inch, str(match_num))
    teams_canvas.setFont('Helvetica', 18)
    if blue_team:
        teams_canvas.drawString(.65*inch, 6.65*inch, blue_team)
    xoffset = 3.75*inch
    if red_team:
        teams_canvas.drawString(.65*inch + xoffset, 6.65*inch, red_team)
    teams_canvas.save()

    match_sheet_fn = '/home/user/dev/match_sheet/match_sheet.pdf'
    teams_fn = 'teams.pdf'
    output_fn = os.path.join(match_sheet_dir, 'match_%d.pdf' %(match_num))
    output_canvas = PageMerge().add(PdfReader(match_sheet_fn).pages[0])[0]
    trailer = PdfReader(teams_fn)

    for page in trailer.pages:
        PageMerge(page).add(output_canvas, prepend=True).render()
    PdfWriter().write(output_fn, trailer)
    return output_fn
Esempio n. 2
0
def create_merged_pdf(in_pdf_dir):
    """Create merged pdf."""
    writer = PdfWriter()
    files = [x for x in os.listdir(in_pdf_dir) if x.endswith('.pdf')]
    pagenums = [1]
    pagenum = 1
    for fname in natsorted(files):
        page_length = len((PdfReader(os.path.join(in_pdf_dir, fname)).pages))
        pagenum = pagenum + page_length
        pagenums.append(pagenum)
        writer.addpages(PdfReader(os.path.join(in_pdf_dir, fname)).pages)
    outpdf = "tmp.pdf"
    writer.write(outpdf)
    del pagenums[-1]  # we dont need last page number
    return pagenums
Esempio n. 3
0
def mark_pdf(clean_path, marked_path):
    """Draw rectangles around the boxes, lines, and characters in a document."""
    try:
        with open(os.path.join(clean_path)) as fp:
            parser = PDFParser(fp)
            pdf = PDFDocument(parser)
            parser.set_document(pdf)

            rsrcmgr = PDFResourceManager()
            laparams = LAParams()
            device = PDFPageAggregator(rsrcmgr, laparams=laparams)
            interpreter = PDFPageInterpreter(rsrcmgr, device)

            marked_dir, marked_fn = os.path.split(marked_path)
            rect_file = os.path.join(marked_dir, ".rects." + marked_fn)

            c = canvas.Canvas(rect_file)

            for i, page in enumerate(PDFPage.create_pages(pdf)):
                interpreter.process_page(page)
                layout = device.get_result()
                boxes = [obj for obj in layout if isinstance(obj, LTTextBox)]
                for box in boxes:
                    c.setLineWidth(1)
                    c.setStrokeColorRGB(1, 0, 0)
                    make_rectangle(c, box.bbox)
                    lines = [obj for obj in box if isinstance(obj, LTTextLine)]
                    for line in lines:
                        c.setLineWidth(0.5)
                        c.setStrokeColorRGB(0, 1, 0)
                        make_rectangle(c, line.bbox)
                        for char in line:
                            if isinstance(char, LTChar):
                                c.setLineWidth(0.3)
                                c.setStrokeColorRGB(0, 0, 1)
                                make_rectangle(c, char.bbox)
                c.showPage()
            c.save()

        pdf_content = PdfReader(clean_path)
        for i, page in enumerate(pdf_content.pages):
            rects = PageMerge().add(PdfReader(rect_file).pages[i])[0]
            PageMerge(page).add(rects).render()

        PdfWriter().write(marked_path, pdf_content)
        os.remove(rect_file)
    except Exception as e:
        print(e)
Esempio n. 4
0
def main():
    args = parse_args()
    page = PdfReader(args.pdf_filepath, decompress=True).pages[args.page_index]
    target_substrings = [(encode(args.target_substring,
                                 font2cmap(font))[1:-1].upper(), font_id)
                         for font_id, font in page.Resources.Font.items()]
    target_substrings.append((args.target_substring, None))
    contents = page.Contents if isinstance(page.Contents,
                                           PdfArray) else [page.Contents]
    matching_line = None
    for content in contents:
        for i, line in enumerate(content.stream.split('\n')):
            for encoded_substring, font_id in target_substrings:
                if encoded_substring in line:
                    matching_line = line
                    break  # font_id is also captured here
            if matching_line:
                break
        if matching_line:
            break
    if not matching_line:
        raise IndexError('Target substring not found in PDF on page {}'.format(
            args.page_index))
    matching_str = matching_line.split('[')[1].split(']')[0]
    if font_id is None:
        print(matching_str)
    else:
        print(decode(matching_str, font2cmap(page.Resources.Font[font_id])))
Esempio n. 5
0
 def load_pdf(self):
     """
     return: PdfReader object.
     Can use index and slice obj.pages for the pages, then call Path.save_pdf to save
     """
     from pdfrw import PdfReader
     return PdfReader(self)
Esempio n. 6
0
 def __init__(self, controller):
     self.controller = controller
     self.selections = self.controller.selections
     
     self._set_packet_type_id()
     
     if self.packet_type_id:
         # flags that indicate whether each of the files that need to be checked 
         # are being included in the packet
         self.check_files_included = {i : False for i in self.check_files_ids}
         
         self.addon_ids = []
         self.missing_files = []
         self.selected_reports = []
         self.selected_report_attributes = []
         self.selected_report_ids = []
         
         self.output_path = ''
         self.output_path_print = ''
         self.packet_type = ''
         
         self.output_name = self.output_names[self.selections.type_option]
         
         self.is_liaison = ('Liaison' in self.selections.type_option)
         
         self.create_print_file = False
         
         self.total_pages = 0
         
         self.blank_pdf_page = pagexobj(PdfReader(self.blank_pdf_path).pages[0])
             
         self.pdf_converter = PdfConverter()
             
         self._open_excel()
Esempio n. 7
0
def pdf_generate_proc(file_template, file_pdf, receiver, price):
    sys.stderr.write("*** pdf_generate_proc *** start ***\n")
    #
    pdf_canvas = canvas.Canvas(file_pdf)
    #
    fontname_g = "HeiseiKakuGo-W5"
    pdfmetrics.registerFont(UnicodeCIDFont(fontname_g))
    pdf_canvas.setFont(fontname_g, 16)
    #
    page = PdfReader(file_template, decompress=False).pages
    pp = pagexobj(page[0])
    pdf_canvas.doForm(makerl(pdf_canvas, pp))
    #
    today = datetime.today()
    str_today = today.strftime('%Y-%m-%d')
    #
    pdf_canvas.drawString(400, 800, str_today)
    pdf_canvas.drawString(100, 725, receiver)
    pdf_canvas.drawString(100, 680, str(price))
    #
    draw_rect_proc(pdf_canvas)
    #
    pdf_canvas.showPage()
    pdf_canvas.save()
    #
    sys.stderr.write("*** pdf_generate_proc *** end ***\n")
    #
    str_res = "Supdf_canvasess"
    return str_res
Esempio n. 8
0
def test_bookmarks_6():
    pdf_bytes = FakeHTML(string='''
      <h2>1</h2> h2 level 1
      <h4>2</h4> h4 level 2
      <h3>3</h3> h3 level 2
      <h5>4</h5> h5 level 3
      <h1>5</h1> h1 level 1
      <h2>6</h2> h2 level 2
      <h2>7</h2> h2 level 2
      <h4>8</h4> h4 level 3
      <h1>9</h1> h1 level 1
    ''').write_pdf()
    # 1
    # |_ 2
    # L_ 3
    #    L_ 4
    # 5
    # |_ 6
    # L_ 7
    #    L_ 8
    # 9
    outlines = PdfReader(fdata=pdf_bytes).Root.Outlines
    assert outlines.Count == '9'
    assert outlines.First.Title == '(1)'
    assert outlines.First.First.Title == '(2)'
    assert outlines.First.First.Next.Title == '(3)'
    assert outlines.First.First.Next.First.Title == '(4)'
    assert outlines.First.Next.Title == '(5)'
    assert outlines.First.Next.First.Title == '(6)'
    assert outlines.First.Next.First.Next.Title == '(7)'
    assert outlines.First.Next.First.Next.First.Title == '(8)'
    assert outlines.Last.Title == '(9)'
Esempio n. 9
0
def add_footer(input_file, output_file):
    logging.info("add_footer started")
    # Get pages
    reader = PdfReader("%s.pdf" % (input_file))
    pages = [pagexobj(p) for p in reader.pages]

    # Compose new pdf
    canvas = Canvas("%s.pdf" % (output_file))
    pdfmetrics.registerFont(
        TTFont('SourceSansPro', 'SourceSansPro-Regular.ttf'))

    for page_num, page in enumerate(pages, start=1):

        # Add page
        canvas.setPageSize((page.BBox[2], page.BBox[3]))
        canvas.doForm(makerl(canvas, page))

        # Draw footer
        footer_text = "www.borderviolence.eu"
        x = 80
        canvas.saveState()
        canvas.setStrokeColorRGB(0.19, 0.19, 0.19)
        canvas.setLineWidth(0.3)
        canvas.line(75, 78, page.BBox[2] - 66, 78)
        canvas.setFont('SourceSansPro', 10)
        canvas.setFillColor(HexColor(0x333333))
        canvas.drawString(page.BBox[2] - x, 85, str(page_num))
        canvas.drawString(page.BBox[2] - x - 436, 85, footer_text)
        canvas.restoreState()

        canvas.showPage()

    canvas.save()
    logging.info("PDF with footer %s.pdf was saved" % (output_file))
    return 1
Esempio n. 10
0
def upscale(file_name, scale=1.5, margin_x=0, margin_y=0, suffix='scaled', tempdir=None):
    """Upscale a PDF to a large size."""
    def adjust(page):
        info = PageMerge().add(page)
        x1, y1, x2, y2 = info.xobj_box
        viewrect = (margin_x, margin_y, x2 - x1 - 2 * margin_x, y2 - y1 - 2 * margin_y)
        page = PageMerge().add(page, viewrect=viewrect)
        page[0].scale(scale)
        return page.render()

    # Set output file name
    if tempdir:
        output = NamedTemporaryFile(suffix='.pdf', dir=tempdir, delete=False).name
    elif suffix:
        output = os.path.join(os.path.dirname(file_name), add_suffix(file_name, suffix))
    else:
        output = NamedTemporaryFile(suffix='.pdf').name

    reader = PdfReader(file_name)
    writer = PdfWriter(output)
    for i in list(range(0, len(reader.pages))):
        writer.addpage(adjust(reader.pages[i]))
    writer.trailer.Info = IndirectPdfDict(reader.Info or {})
    writer.write()
    return output
Esempio n. 11
0
 def _create_overlay(self, data):
     """Create the filled in overlay"""
     # initiate an overlay buffer where we will fill in the information
     overlay_buffer = StringIO()
     overlay_canvas = canvas.Canvas(overlay_buffer)
     overlay_canvas.setFont('Times-Roman', 10)
     # open a copy of the template form to fill in
     self.form.seek(0)
     template = PdfReader(self.form)
     # for each field on each page, find the position and fill in the information
     for page in template.Root.Pages.Kids:
         for field in page.Annots:
             sides_positions = [float(i) for i in field.Rect]
             left = min(sides_positions[0], sides_positions[2])
             bottom = min(sides_positions[1], sides_positions[3])
             label = field.T.decode() if field.T else None
             value = data.get(label, '')
             overlay_canvas.drawString(
                 x=left + 2,
                 y=bottom + 1,
                 text=value,
             )
         overlay_canvas.showPage()
     overlay_canvas.save()
     overlay_buffer.seek(0)
     return overlay_buffer
Esempio n. 12
0
def makeA3(ipath, opath, onesided=False):
    """Take the first 4 (A4) pages of the input file and place them in
    "booklet" order on an A3 sheet (double-sided, short-side join).
    If <onesided> is <True>, or there are only two pages in the input file,
    place the first two (A4) pages on one side of an A3 sheet, the first
    page on the right-hand side.
    Note that the input pages do not have to be A4, the output will simply
    be twice as wide (as the first page). All input pages should have the same size.
    """
    ipages = PdfReader(ipath).pages
    # Make sure we have an even number of pages
    if len(ipages) & 1:
        ipages.append(None)
    fpage = PageMerge()
    fpage.add(ipages[0])
    width = fpage[0].w
    opages = []
    if onesided or len(ipages) == 2:
        p4 = ipages[1]
    else:
        p4 = ipages[3]
        bpage = PageMerge()
        bpage.add(ipages[1])
        bpage.add(ipages[2], prepend=True)
        bpage[0].x = width
        opages.append(bpage.render())
    if p4:
        fpage.add(p4)
    fpage[0].x = width
    opages.insert(0, fpage.render())
    PdfWriter().addpages(opages).write(opath)
Esempio n. 13
0
    def write_fillable_pdf(self, input_pdf_path, output_pdf_path, data_dict):
        template_pdf = PdfReader(input_pdf_path)

        for index, _ in enumerate(template_pdf.pages):
            annotations = template_pdf.pages[index][self.ANNOT_KEY]
            if hasattr(annotations, "__len__"):
                for annotation in annotations:
                    if annotation[self.SUBTYPE_KEY] == self.WIDGET_SUBTYPE_KEY:

                        if self.ANNOT_FIELD_KEY in annotation:
                            key = annotation[self.ANNOT_FIELD_KEY][1:-1]

                            if key in data_dict.keys():
                                value = data_dict[key]
                                if annotation["/FT"] == "/Tx":
                                    if value != "-":
                                        annotation.update(
                                            PdfDict(
                                                V='{}'.format(value.upper())))
                                elif annotation[
                                        "/FT"] == "/Btn" and value == "Yes":
                                    annotation.update(
                                        PdfDict(V=objects.pdfname.BasePdfName(
                                            '/Yes')))
                    annotation.update(PdfDict(Ff=1))

        template_pdf.Root.AcroForm.update(
            PdfDict(NeedAppearances=PdfObject('true')))
        PdfWriter().write(output_pdf_path, template_pdf)
Esempio n. 14
0
def extract_crossword(file_path: Path,
                      output_path: Path,
                      overwrite: bool = True) -> Path:
    """Save page with crossword.

    Open a PDF document, search for a string identifying Le Monde
    crossword, and save the corresponding page to a file.

    Args:
      file_path: Path of the PDF document to process

      output_path: Path of the output directory

      overwrite: Whether to overwrite existing files (default to True)

    Returns:
      Path of the saved file

    """
    LOGGER.debug(f'Processing {file_path}')
    max_extracted_pages = 15
    rsrcmgr = PDFResourceManager(caching=True)
    crossword_page = None
    m = None
    with open(file_path, 'rb') as f:
        pages = [page for page in PDFPage.get_pages(f)]
        LOGGER.debug(f'Found {len(pages)} pages')
        first_checked_pageno = max(0, len(pages) - max_extracted_pages)
        LOGGER.debug(f'Searching last {max_extracted_pages} pages first')
        for i, page in enumerate(pages[first_checked_pageno:]):
            m = _search_in_page(page, rsrcmgr)
            if m:
                crossword_page = first_checked_pageno + i
                break

        if not crossword_page:
            LOGGER.debug(f'Extending search to all pages')
            for i, page in enumerate(pages[:first_checked_pageno]):
                m = _search_in_page(page, rsrcmgr)
                if m:
                    crossword_page = i
                    break

    if not crossword_page or not m:
        raise CrosswordNotFoundError

    LOGGER.debug(f'Crossword found on page {crossword_page}')

    path = output_path / '{}.pdf'.format(m.group(1))
    if path.exists() and not overwrite:
        LOGGER.debug(f'File already exist ${path}')
        raise FileAlreadyExistError

    x = PdfReader(file_path)
    page = x.pages[crossword_page]
    y = PdfWriter()
    y.addpage(page)
    y.write(path)

    return path
Esempio n. 15
0
def ocr(tar_gz_filename, empty_page_threshold, language='eng'):
    tar = tarfile.open(tar_gz_filename)
    tar.extractall(path=TMP_DIR)
    env = os.environ.copy()
    env.update(dict(LD_LIBRARY_PATH=LIB_DIR, TESSDATA_PREFIX="{}/tessdata".format(SCRIPT_DIR)))

    output = PdfWriter()
    for filename in tar.getnames():
        cmd = ['./tesseract', '-l', language,
            '-c', 'min_orientation_margin=0', # don't leave out characters close to border
            '{}/{}'.format(TMP_DIR, filename),
            '{}/partial'.format(TMP_DIR),
            'pdf']
        try:
            out = subprocess.check_output(cmd, cwd=SCRIPT_DIR, env=env, stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError as e:
            print('tesseract call failed, here\'s the output so far:')
            print(e.output)
            sys.exit(1)
        print(out)
        for p in PdfReader("{}/{}".format(TMP_DIR, "partial.pdf")).pages:
            try:
                if int(p.Contents['/Length']) < empty_page_threshold:
                    continue
            except:
                # if in doubt add the page
                pass
            output.addpage(p)
    output.write('{}/output.pdf'.format(TMP_DIR))

    for f in ['partial.pdf', DOWNLOAD_FILE] + tar.getnames():
        os.remove("{}/{}".format(TMP_DIR, f))
    return '{}/output.pdf'.format(TMP_DIR)
Esempio n. 16
0
def test_relative_links_relative():
    # Relative URI reference without a base URI: allowed for anchors
    pdf_bytes = FakeHTML(string='<a href="../lipsum" style="display: block">',
                         base_url=None).write_pdf()
    link, = PdfReader(fdata=pdf_bytes).Root.Pages.Kids[0].Annots
    assert link.A == {'/URI': '(../lipsum)', '/S': '/URI', '/Type': '/Action'}
    assert [round(float(value)) for value in link.Rect] == [0, TOP, RIGHT, TOP]
Esempio n. 17
0
def subset_pdf(inp_file, ranges):  # Create PDF with subset pages

    ranges = ranges.split(' ')

    for x in ranges:  # If ranges is something like a word or negative
        for y in x.split('-'):
            try:
                int(y)
            except ValueError:
                return -1

    ranges = ([int(y) for y in x.split('-')] for x in ranges)
    pages = PdfReader(inp_file).pages
    out_data = PdfWriter(inp_file)
    num_pages = 0
    try:
        for one_range in ranges:
            one_range = (one_range + one_range[-1:])[:2]
            for page_num in range(one_range[0], one_range[1] + 1):
                out_data.addpage(pages[page_num - 1])
                num_pages += 1
    except IndexError:  # If user gave invalid pages
        return -1
    out_data.write()
    return num_pages
async def makePdf(Item: Item):
    pdfBytes = base64.b64decode(Item.pdffile)
    reader = PdfReader(fdata=pdfBytes)
    annotator = PdfAnnotator(reader)
    for i in Item.annotations:
        if i.anType == "line":
            await addLine(annotator, i)
        elif i.anType == 'polyline':
            await addLine(annotator, i)
        elif i.anType == 'highlighter':
            await addHighlight(annotator, i)
        elif i.anType == 'highlighterPolyline':
            await addHighlight(annotator, i)
        elif i.anType == 'text':
            await addText(annotator, i)
        elif i.anType == 'identity':
            await addIdentity(annotator, i)
    # Creating file results
    fileid = uuid.uuid4()
    fileName = str(fileid) + ".pdf"
    annotator.write(fileName)
    encoded_string = ""
    with open(fileName, "rb") as pdf_file:
        encoded_string = base64.b64encode(pdf_file.read())
    os.remove(fileName)
    return {"result": encoded_string}
def remove_chainpoint_proof_and_hash(pdf_file):
    filename = os.path.basename(pdf_file)
    tmp_filename = '__' + filename
    shutil.copy(pdf_file, tmp_filename)

    # get txid and target hash from proof
    pdf = PdfReader(tmp_filename)
    try:
        proof = json.loads(pdf.Info.chainpoint_proof.decode())
    except AttributeError:
        # TODO: log error
        return None, None
    except json.decoder.JSONDecodeError:
        # TODO: log error
        return None, None

    txid = proof['anchors'][0]['sourceId']
    targetHash = proof['targetHash']

    # remove the proof and get the hash
    metadata = PdfDict(chainpoint_proof='')
    pdf.Info.update(metadata)
    PdfWriter().write(tmp_filename, pdf)

    cert_hash = None
    with open(tmp_filename, 'rb') as cert:
        # note that the cert_hash is a hash object -- can use hexdigest() to debug
        cert_hash = hashlib.sha256(cert.read())
    os.remove(tmp_filename)

    if targetHash == cert_hash.hexdigest():
        return cert_hash.digest(), txid
    else:
        return None, None
Esempio n. 20
0
 def __init__(self,
              filename_or_object,
              width=None,
              height=None,
              kind="direct"):
     if hasattr(filename_or_object, "read"):
         filename_or_object.seek(0)
     page = PdfReader(filename_or_object, decompress=False).pages[0]
     self.xobj = pagexobj(page)
     self.dynamic = 0
     # Actual image size
     x1, y1, x2, y2 = self.xobj.BBox
     imgw, imgh = x2 - x1, y2 - y1
     self._imgw, self._imgh = imgw, imgh
     if kind in ["direct", "absolute"]:
         self.drawWidth = width or imgw
         self.drawHeight = height or imgh
     elif kind in ["percentage", "%"]:
         self.drawWidth = imgw * width * 0.01
         self.drawHeight = imgh * height * 0.01
     elif kind in ["bound", "proportional"]:
         w, h = width or imgw, height or imgh
         factor = min(float(w) / imgw, float(h) / imgh)
         self.drawWidth = imgw * factor
         self.drawHeight = imgh * factor
     elif kind in ["dynamic"]:
         self.dynamic = 1
Esempio n. 21
0
def upload_book(request):
    if request.method == 'POST':
        form = BookForm(request.POST, request.FILES)
        if form.is_valid():
            form2 = form.save(commit=False)
            inpfn = form.cleaned_data['pdf']
            print(inpfn)
            
            page_range = [int(y) for y in form.cleaned_data['page'].split('-')]
            page_start = int(page_range[0])
            page_end = int(page_range[1])
            path = os.path.join('/books/pdfs', 'extracted_page_{}-{}.pdf'.format(page_start, page_end))
            outfn = os.path.join('media', 'extracted_page_{}-{}.pdf'.format(page_start, page_end))
            pages = PdfReader(inpfn).pages
            outdata = PdfWriter(outfn)  
            page_range = (page_range + page_range[-1:])[:2]
    
            for pagenum in range(page_range[0], page_range[1]+1):
                outdata.addpage(pages[pagenum-1])
            outdata.write()
            form2.pdf = os.path.join('extracted_page_{}-{}.pdf'.format(page_start, page_end))
            form2.save()
            return redirect('book_list')
    else:
        form = BookForm()
    return render(request, 'upload_book.html', {
        'form': form
    })
Esempio n. 22
0
def re_arrange(file_path, output_file_name, dic):
    """
    The function reorder takes two arguments path and dic
    path is the path of the source pdf file which is in wrong
    order and then creates a modified pdf file with pages in the right order.
    Parameters:
        path : Path of the pdf file to be modified
        dic  : A dictionary with key value pairs of pages.
    Returns:
        None    
    """
    file_path = Path(file_path)
    # create a pdf object using PdfReader that could be read
    pdf_obj = PdfReader(file_path)
    # pdf_obj.pages attribute gives the length of the pages in pdf
    total_pages = len(pdf_obj.pages)
    print("Total Pages in PDF are:", total_pages)
    # Initialising the writer object using the PdfWriter class,from this we would create a new modified Pdf
    writer = PdfWriter()

    # new and old here mean the new position of the "old" page location
    for new, old in dic.items():
        # indexing pages list
        writer.addpage(pdf_obj.pages[old - 1])
        print(f"page{new} added from {old}")

    # accesing the name of the file without .pdf to save it with a new one
    writer.write(Path(os.path.dirname(file_path) + "\\" + output_file_name))
Esempio n. 23
0
    def __init__(self,
                 filename_or_object,
                 width=None,
                 height=None,
                 kind='direct'):
        #        from reportlab.lib.units import inch
        # If using StringIO buffer, set pointer to begining
        if hasattr(filename_or_object, 'read'):
            filename_or_object.seek(0)
        page = PdfReader(filename_or_object, decompress=False).pages[0]
        self.xobj = pagexobj(page)
        self.imageWidth = width
        self.imageHeight = height
        x1, y1, x2, y2 = self.xobj.BBox

        self._w, self._h = x2 - x1, y2 - y1
        if not self.imageWidth:
            self.imageWidth = self._w
        if not self.imageHeight:
            self.imageHeight = self._h
        self.__ratio = float(self.imageWidth) / self.imageHeight
        if kind in ['direct', 'absolute'] or width == None or height == None:
            self.drawWidth = width or self.imageWidth
            self.drawHeight = height or self.imageHeight
        elif kind in ['bound', 'proportional']:
            factor = min(float(width) / self._w, float(height) / self._h)
            self.drawWidth = self._w * factor
            self.drawHeight = self._h * factor
Esempio n. 24
0
def strip_pages_pdf(indir,
                    infile,
                    outdir=None,
                    outfile=None,
                    numpages=1,
                    keep=False):
    '''
    Deletes the first pages from a PDF. Omit outfile name to replace. Default is one page.
    If option keep is specified, keeps first pages of PDF, dropping rest.
    '''
    if outfile is None:
        outfile = infile

    if outdir is None:
        outdir = indir

    output = PdfWriter()
    inpath = os.path.join(indir, infile)
    outpath = os.path.join(outdir, outfile)

    for i, page in enumerate(PdfReader(inpath).pages):
        if not keep:
            if i > (numpages - 1):
                output.addpage(page)
        if keep:
            if i <= (numpages - 1):
                output.addpage(page)

    output.write(outpath)
Esempio n. 25
0
def total_pdf_pages_in_tar_gz(tfn):
    tar = tarfile.open(tfn, "r:gz")
    errors = {}
    pages = {}
    for i, tarinfo in enumerate(tar):
        name = tarinfo.name
        name = name.split('/')[-1]
        if tarinfo.isreg():
            member = tar.getmember(tarinfo.name)
            try:
                f = tar.extractfile(member)
                pdf = PdfReader(f)
                count = len(pdf.pages)
                pages[name] = count
            except Exception as e:
                pages[name] = 0
                msg = e.msg
                if msg in errors:
                    errors[msg] += 1
                else:
                    errors[msg] = 1
            finally:
                f.close()
        elif tarinfo.isdir():
            pass
        else:
            print("something else.")

    return (pages, errors)
Esempio n. 26
0
 def test_html_to_pdf(self):
     output = BytesIO()
     for chunk in PDFStreamer(HTMLParser(self.html, CSS(''))):
         output.write(chunk)
     pdf = PdfReader(fdata=output.getvalue())
     self.assertEqual(pdf['/Info']['/Producer'], '(bericht)')
     self.assertEqual(pdf['/Root']['/Pages']['/Count'], '3')
Esempio n. 27
0
def go(inpfn, outfn):
    reader = PdfReader(inpfn, decompress=False)
    page, = reader.pages
    writer = PdfWriter()
    writer.addpage(adjust(page))
    writer.trailer.Info = IndirectPdfDict(reader.Info)
    writer.write(outfn)
Esempio n. 28
0
 def new_page():
     fpdf = FPDF()
     fpdf.add_page()
     fpdf.set_font("helvetica", size=36)
     fpdf.text(50, 50, "Hello!")
     reader = PdfReader(fdata=bytes(fpdf.output()))
     return reader.pages[0]
Esempio n. 29
0
def pdf_insert_doi_using_pdfrw(req_content, doi):
    input_file = io.BytesIO(req_content)
    pdf_buffer = io.BytesIO()
    reader = PdfReader(input_file)
    pages = [pagexobj(p) for p in reader.pages]

    canvas = Canvas(pdf_buffer)

    for page_num, page in enumerate(pages, start=1):
        canvas.setPageSize((page.BBox[2], page.BBox[3]))
        canvas.doForm(makerl(canvas, page))

        # Draw footer
        if page_num == 1:
            footer_text = "https://doi.org/{}".format(doi)
            canvas.saveState()
            canvas.setFont("Helvetica-Bold", 8)
            canvas.setFillColor(HexColor('#990100'))
            canvas.drawCentredString(page.BBox[2] / 2, 20, footer_text)
            canvas.restoreState()

        canvas.showPage()

    canvas.save()
    pdf_bytes = pdf_buffer.getbuffer()
    return pdf_bytes
def _fill_pdf_metadata(out_file, issuer, issuer_address, columns, data):
    # create metadata objest (json)
    metadata_object = {}
    metadata_fields = columns.split(",")
    for md in metadata_fields:
        if md in data:
            metadata_object[md] = data[md]

    # issuer and issuer_address used to go as separate metadata fields
    # but now go to the metadata_object. They are still compulsory!
    # The validator that reads metadata requires to look for issuer and
    # issuer_address both in the metadata_object and if not fount it has
    # to look for them as separate metadata fields for backwards
    # compatibility (certificates issued with v0.9.3 and before)
    metadata_object['issuer'] = issuer
    metadata_object['issuer_address'] = issuer_address

    # add the metadata
    metadata = PdfDict(metadata_object=json.dumps(metadata_object),
                       chainpoint_proof='')
    pdf = PdfReader(out_file)
    pdf.Info.update(metadata)
    PdfWriter().write(out_file, pdf)

    # print progress
    print('.', end="", flush=True)