Python ArrayObject Examples, PyPDF2.generic.ArrayObject Python Examples

Example #1

0

Show file

    def createHighlight(self,x1, y1, x2, y2, meta, color = [1, 0, 0]):
        newHighlight = DictionaryObject()

        newHighlight.update({
            NameObject("/F"): NumberObject(4),
            NameObject("/Type"): NameObject("/Annot"),
            NameObject("/Subtype"): NameObject("/Highlight"),

            NameObject("/T"): TextStringObject(meta["author"]),
            NameObject("/Contents"): TextStringObject(meta["contents"]),

            NameObject("/C"): ArrayObject([FloatObject(c) for c in color]),
            NameObject("/Rect"): ArrayObject([
                FloatObject(x1),
                FloatObject(y1),
                FloatObject(x2),
                FloatObject(y2)
            ]),
            NameObject("/QuadPoints"): ArrayObject([
                FloatObject(x1),
                FloatObject(y2),
                FloatObject(x2),
                FloatObject(y2),
                FloatObject(x1),
                FloatObject(y1),
                FloatObject(x2),
                FloatObject(y1)
            ]),
        })

        return newHighlight

Example #2

0

Show file

File: splitter_tmp.py Project: chebrolu/Question-Bank

def create_annot_box(x1, y1, x2, y2, meta, color=[1, 0, 0]):
    new_annot = DictionaryObject()

    new_annot.update({
        # NameObject("/P"): parent,
        NameObject("/F"):
        NumberObject(4),
        NameObject("/Type"):
        NameObject("/Annot"),
        NameObject("/Subtype"):
        NameObject("/Square"),
        NameObject("/T"):
        TextStringObject(meta["author"]),
        NameObject("/Contents"):
        TextStringObject(meta["contents"]),
        NameObject("/C"):
        ArrayObject([FloatObject(c) for c in color]),
        NameObject("/Rect"):
        ArrayObject([
            FloatObject(x1),
            FloatObject(y1),
            FloatObject(x2),
            FloatObject(y2)
        ]),
    })
    return new_annot

Example #3

0

Show file

File: PyPDF2Highlight.py Project: kod2nd/poor-man-redactor

def createHighlight(x0, y0, x1, y1, color=[0, 0, 0]):
    newHighlight = DictionaryObject()

    newHighlight.update({
        NameObject("/F"):
        NumberObject(4),
        NameObject("/Type"):
        NameObject("/Annot"),
        NameObject("/Subtype"):
        NameObject("/Highlight"),
        NameObject("/C"):
        ArrayObject([FloatObject(c) for c in color]),
        NameObject("/Rect"):
        ArrayObject([
            FloatObject(x0),
            FloatObject(y0),
            FloatObject(x1),
            FloatObject(y1)
        ]),
        NameObject("/QuadPoints"):
        ArrayObject([
            FloatObject(x0),
            FloatObject(y1),
            FloatObject(x1),
            FloatObject(y1),
            FloatObject(x0),
            FloatObject(y0),
            FloatObject(x1),
            FloatObject(y0)
        ]),
    })

    return newHighlight

Example #4

0

Show file

File: pdf.py Project: mausvt/flectra

    def addAttachment(self, name, data, subtype=None):
        """
        Add an attachment to the pdf. Supports adding multiple attachment, while respecting PDF/A rules.
        :param name: The name of the attachement
        :param data: The data of the attachement
        :param subtype: The mime-type of the attachement. This is required by PDF/A, but not essential otherwise.
        It should take the form of "/xxx#2Fxxx". E.g. for "text/xml": "/text#2Fxml"
        """
        adapted_subtype = subtype
        if subtype:
            # If we receive the subtype in an 'unformated' (mimetype) format, we'll try to convert it to a pdf-valid one
            if REGEX_SUBTYPE_UNFORMATED.match(subtype):
                adapted_subtype = '/' + subtype.replace('/', '#2F')

            if not REGEX_SUBTYPE_FORMATED.match(adapted_subtype):
                # The subtype still does not match the correct format, so we will not add it to the document
                _logger.warning(
                    "Attempt to add an attachment with the incorrect subtype '%s'. The subtype will be ignored.",
                    subtype)
                adapted_subtype = ''

        attachment = self._create_attachment_object({
            'filename': name,
            'content': data,
            'subtype': adapted_subtype,
        })
        if self._root_object.get('/Names') and self._root_object['/Names'].get(
                '/EmbeddedFiles'):
            names_array = self._root_object["/Names"]["/EmbeddedFiles"][
                "/Names"]
            names_array.extend([attachment.getObject()['/F'], attachment])
        else:
            names_array = ArrayObject()
            names_array.extend([attachment.getObject()['/F'], attachment])

            embedded_files_names_dictionary = DictionaryObject()
            embedded_files_names_dictionary.update(
                {NameObject("/Names"): names_array})
            embedded_files_dictionary = DictionaryObject()
            embedded_files_dictionary.update({
                NameObject("/EmbeddedFiles"):
                embedded_files_names_dictionary
            })
            self._root_object.update(
                {NameObject("/Names"): embedded_files_dictionary})

        if self._root_object.get('/AF'):
            attachment_array = self._root_object['/AF']
            attachment_array.extend([attachment])
        else:
            # Create a new object containing an array referencing embedded file
            # And reference this array in the root catalogue
            attachment_array = self._addObject(ArrayObject([attachment]))
            self._root_object.update({NameObject("/AF"): attachment_array})

Example #5

0

Show file

def createHighlight(bbox=(0, 0, 1, 1),
                    contents="",
                    color=[1, 1, 0],
                    author="iwasakishuto(@cabernet_rock)"):
    """Create a Highlight

    Args:
        bbox (tuple)   : a bounding box showing the location of highlight.
        contents (str) : Text comments for a highlight label.
        color (list)   : Highlight color. Defaults to ``[1,1,0]``. (yellow)
        author (str)   : Who wrote the annotation (comment). Defaults to ``"iwasakishuto(@cabernet_rock)"`` .

    Returns:
        DictionaryObject: Highlight information.

    Examples:
        >>> from gummy.utils import createHighlight, addHighlightToPage
        >>> from PyPDF2 import PdfFileWriter, PdfFileReader
        >>> page_no = 0
        >>> pdfOutput = PdfFileWriter()
        >>> with open("input.pdf", mode="rb") as inPdf:
        ...     pdfInput = PdfFileReader(inPdf)
        ...     page = pdfInput.getPage(page_no)
        ...     highlight = createHighlight(bbox=(10,10,90,90), contents="COMMENT", color=(1,1,0))
        ...     addHighlightToPage(highlight, page, pdfOutput)
        ...     pdfOutput.addPage(page)
        ...     with open("output.pdf", mode="wb") as outPdf:
        ...         pdfOutput.write(outPdf)
    """
    from PyPDF2.generic import (DictionaryObject, NumberObject, FloatObject,
                                NameObject, TextStringObject, ArrayObject)
    x1, y1, x2, y2 = bbox
    newHighlight = DictionaryObject()
    newHighlight.update({
        NameObject("/F"):
        NumberObject(4),
        NameObject("/Type"):
        NameObject("/Annot"),
        NameObject("/Subtype"):
        NameObject("/Highlight"),
        NameObject("/T"):
        TextStringObject(author),
        NameObject("/Contents"):
        TextStringObject(contents),
        NameObject("/C"):
        ArrayObject([FloatObject(c) for c in color]),
        NameObject("/Rect"):
        ArrayObject([FloatObject(e) for e in bbox]),
        NameObject("/QuadPoints"):
        ArrayObject([FloatObject(e)
                     for e in [x1, y2, x2, y2, x1, y1, x2, y1]]),
    })
    return newHighlight

Example #6

0

Show file

File: COISearchEngine.py Project: Berni1557/COISearchEngine

    def create_highlight(self, x1, y1, x2, y2, meta, color=[0, 1, 0]):
        """
        Create a highlight for a PDF.

        Parameters
        ----------
        x1, y1 : float
            bottom left corner
        x2, y2 : float
            top right corner
        meta : dict
            keys are "author" and "contents"
        color : iterable
            Three elements, (r,g,b)
        """
        new_highlight = DictionaryObject()

        new_highlight.update({
            NameObject("/F"):
            NumberObject(4),
            NameObject("/Type"):
            NameObject("/Annot"),
            NameObject("/Subtype"):
            NameObject("/Highlight"),
            NameObject("/T"):
            TextStringObject(meta["author"]),
            NameObject("/Contents"):
            TextStringObject(meta["contents"]),
            NameObject("/C"):
            ArrayObject([FloatObject(c) for c in color]),
            NameObject("/Rect"):
            ArrayObject([
                FloatObject(x1),
                FloatObject(y1),
                FloatObject(x2),
                FloatObject(y2)
            ]),
            NameObject("/QuadPoints"):
            ArrayObject([
                FloatObject(x1),
                FloatObject(y2),
                FloatObject(x2),
                FloatObject(y2),
                FloatObject(x1),
                FloatObject(y1),
                FloatObject(x2),
                FloatObject(y1)
            ]),
        })

        return new_highlight

Example #7

0

Show file

File: pdf_highlighter.py Project: dmitrySorokin/pdf_highlighter

def _create_annotation(x1, y1, x2, y2, color, subtype):
    annotation = DictionaryObject()

    annotation.update({
        NameObject('/Subtype'): NameObject(subtype),
        NameObject('/C'): ArrayObject([FloatObject(c) for c in color]),
        NameObject('/Rect'): ArrayObject([
            FloatObject(x1),
            FloatObject(y1),
            FloatObject(x2),
            FloatObject(y2)]),
    })

    return annotation

Example #8

0

Show file

File: FIM_US_dev_debug.py Project: ERIS-GIS/GIS_Dev

def createAnnotPdf(geom_type, myShapePdf):
    # input variables
    # part 1: read geometry pdf to get the vertices and rectangle to use
    source = PdfFileReader(open(myShapePdf, 'rb'))
    geomPage = source.getPage(0)
    mystr = geomPage.getObject()['/Contents'].getData()
    # to pinpoint the string part: 1.19997 791.75999 m 1.19997 0.19466 l 611.98627 0.19466 l 611.98627 791.75999 l 1.19997 791.75999 l
    # the format seems to follow x1 y1 m x2 y2 l x3 y3 l x4 y4 l x5 y5 l
    geomString = mystr.split('S\r\n')[0].split('M\r\n')[1]
    coordsString = [
        value for value in geomString.split(' ')
        if value not in ['m', 'l', '']
    ]

    # part 2: update geometry in the map
    if geom_type.upper() == 'POLYGON':
        pdf_geom = PdfFileReader(open(annot_poly, 'rb'))
    elif geom_type.upper() == 'POLYLINE':
        pdf_geom = PdfFileReader(open(annot_line, 'rb'))
    page_geom = pdf_geom.getPage(0)

    annot = page_geom['/Annots'][0]
    updateVertices = "annot.getObject().update({NameObject('/Vertices'):ArrayObject([FloatObject(" + coordsString[
        0] + ")"
    for item in coordsString[1:]:
        updateVertices = updateVertices + ',FloatObject(' + item + ')'
    updateVertices = updateVertices + "])})"
    exec(updateVertices)

    xcoords = []
    ycoords = []
    for i in range(0, len(coordsString) - 1):
        if i % 2 == 0:
            xcoords.append(float(coordsString[i]))
        else:
            ycoords.append(float(coordsString[i]))

    # below rect seems to be geom bounding box coordinates: xmin, ymin, xmax,ymax
    annot.getObject().update({
        NameObject('/Rect'):
        ArrayObject([
            FloatObject(min(xcoords)),
            FloatObject(min(ycoords)),
            FloatObject(max(xcoords)),
            FloatObject(max(ycoords))
        ])
    })
    annot.getObject().pop('/AP')  # this is to get rid of the ghost shape

    annot.getObject().update({NameObject('/T'): createStringObject(u'ERIS')})

    output = PdfFileWriter()
    output.addPage(page_geom)
    annotPdf = os.path.join(scratch, "annot.pdf")
    outputStream = open(annotPdf, "wb")
    #output.setPageMode('/UseOutlines')
    output.write(outputStream)
    outputStream.close()
    output = None
    return annotPdf

Example #9

0

Show file

def addHighlightToPage(highlight, page, output):
    """Add a highlight to a page.

    Args:
        highlight (DictionaryObject) : Highlight information.
        page (PageObject)            : A single page within a PDF file.
        output (PdfFileWriter)       : A pdf writer.

    Examples:
        >>> from gummy.utils import createHighlight, addHighlightToPage
        >>> from PyPDF2 import PdfFileWriter, PdfFileReader
        >>> page_no = 0
        >>> pdfOutput = PdfFileWriter()
        >>> with open("input.pdf", mode="rb") as inPdf:
        ...     pdfInput = PdfFileReader(inPdf)
        ...     page = pdfInput.getPage(page_no)
        ...     highlight = createHighlight(bbox=(10,10,90,90), contents="COMMENT", color=(1,1,0))
        ...     addHighlightToPage(highlight, page, pdfOutput)
        ...     pdfOutput.addPage(page)
        ...     with open("output.pdf", mode="wb") as outPdf:
        ...         pdfOutput.write(outPdf)
    """
    from PyPDF2.generic import (NameObject, ArrayObject)
    highlight_ref = output._addObject(highlight)
    if "/Annots" in page:
        page[NameObject("/Annots")].append(highlight_ref)
    else:
        page[NameObject("/Annots")] = ArrayObject([highlight_ref])

Example #10

0

Show file

    def addHighlightToPage(self,highlight, page, output):
        highlight_ref = output._addObject(highlight)

        if "/Annots" in page:
            page[NameObject("/Annots")].append(highlight_ref)
        else:
            page[NameObject("/Annots")] = ArrayObject([highlight_ref])

Example #11

0

Show file

File: splitter_tmp.py Project: chebrolu/Question-Bank

def add_annot_to_page(annot, page, output):
    annot_ref = output._addObject(annot)

    if "/Annots" in page:
        page[NameObject("/Annots")].append(annot_ref)
    else:
        page[NameObject("/Annots")] = ArrayObject([annot_ref])

Example #12

0

Show file

File: document.py Project: glins97/PPA

    def _create_highlight(self,
                          x0,
                          y0,
                          width,
                          height,
                          comment,
                          author='',
                          color=[0, 0, 0, 0]):
        self.add_rect(x0, y0, width, height)
        highlight = DictionaryObject()

        highlight.update({
            NameObject("/F"):
            NumberObject(4),
            NameObject("/Type"):
            NameObject("/Annot"),
            NameObject("/Subtype"):
            NameObject("/Highlight"),
            NameObject("/T"):
            TextStringObject(author),
            NameObject("/Contents"):
            TextStringObject(comment),
            NameObject("/C"):
            ArrayObject([FloatObject(c) for c in color]),
            NameObject("/Rect"):
            ArrayObject([
                FloatObject(x0),
                FloatObject(y0),
                FloatObject(x0 + width),
                FloatObject(y0 + width)
            ]),
            NameObject("/QuadPoints"):
            ArrayObject([
                FloatObject(x0),
                FloatObject(y0 + width),
                FloatObject(x0 + width),
                FloatObject(y0 + width),
                FloatObject(x0),
                FloatObject(y0),
                FloatObject(x0 + width),
                FloatObject(y0)
            ]),
        })

        return highlight

Example #13

0

Show file

    def addAttachment(self, name, data, subtype=""):
        """
        Add an attachment to the pdf. Supports adding multiple attachment, while respecting PDF/A rules.
        :param name: The name of the attachement
        :param data: The data of the attachement
        :param subtype: The mime-type of the attachement. This is required by PDF/A, but not essential otherwise.
        It should take the form of "/xxx%2Fxxx". E.g. for "text/xml": "/text%2Fxml"
        """
        if subtype == 'application/xml':
            subtype = '/application#2Fxml'

        attachment = self._create_attachment_object({
            'filename': name,
            'content': data,
            'subtype': subtype,
        })
        if self._root_object.get('/Names') and self._root_object['/Names'].get(
                '/EmbeddedFiles'):
            names_array = self._root_object["/Names"]["/EmbeddedFiles"][
                "/Names"]
            names_array.extend([attachment.getObject()['/F'], attachment])
        else:
            names_array = ArrayObject()
            names_array.extend([attachment.getObject()['/F'], attachment])

            embedded_files_names_dictionary = DictionaryObject()
            embedded_files_names_dictionary.update(
                {NameObject("/Names"): names_array})
            embedded_files_dictionary = DictionaryObject()
            embedded_files_dictionary.update({
                NameObject("/EmbeddedFiles"):
                embedded_files_names_dictionary
            })
            self._root_object.update(
                {NameObject("/Names"): embedded_files_dictionary})

        if self._root_object.get('/AF'):
            attachment_array = self._root_object['/AF']
            attachment_array.extend([attachment])
        else:
            # Create a new object containing an array referencing embedded file
            # And reference this array in the root catalogue
            attachment_array = self._addObject(ArrayObject([attachment]))
            self._root_object.update({NameObject("/AF"): attachment_array})

Example #14

0

Show file

File: guide_NLP_model.py Project: Charan1010/InformationRetrieval

    def addHighlightToPage(highlight, page, output):
        highlight_ref = output._addObject(highlight)
        print(highlight_ref)

        if "/Annots" in page:
            print("Annots in page")
            page[NameObject("/Annots")].append(highlight_ref)
        else:
            print("Annots not in page")
            page[NameObject("/Annots")] = ArrayObject([highlight_ref])

Example #15

0

Show file

File: utils.py Project: esantus/ConceptClassification

def addHighlightToPage(highlight, page, output):
    '''
	Add the annotation object to the page
	'''
    highlight_ref = output._addObject(highlight)

    if "/Annots" in page:
        page[NameObject("/Annots")].append(highlight_ref)
    else:
        page[NameObject("/Annots")] = ArrayObject([highlight_ref])

Example #16

0

Show file

File: handlePDF.py Project: cadnant/oomap

def get_pdf_gpts(m, poly):
    """
    Returns the GPTS array object containing the four corners of the
    map rect in map projection.
    The GPTS entry is an array of numbers, taken pairwise, defining
    points as latitude and longitude.
    m = mapnik map object
    poly = tuple of (x,y) tuples describing rect polygon - allows geocoding of rotated maps.
    """
    gpts = ArrayObject()

    proj = mapnik.Projection(m.srs)
    for x, y in poly:
        latlon_corner = proj.inverse(mapnik.Coord(x, y))
        # these are in lat,lon order according to the specification
        gpts.append(FloatObject(str(latlon_corner.y)))
        gpts.append(FloatObject(str(latlon_corner.x)))

    return gpts

Example #17

0

Show file

File: utils.py Project: esantus/ConceptClassification

def createHighlight(x1, y1, x2, y2, meta, color=[1, 0, 0]):
    '''
	Create a highlight object which will be applied to a box in a PDF page (please,
	notice that coordinates start in the bottom left) with specific metadata and
	colors.
	'''
    newHighlight = DictionaryObject()

    newHighlight.update({
        NameObject("/F"):
        NumberObject(4),
        NameObject("/Type"):
        NameObject("/Annot"),
        NameObject("/Subtype"):
        NameObject("/Highlight"),
        NameObject("/T"):
        TextStringObject(meta["author"]),
        NameObject("/Contents"):
        TextStringObject(meta["contents"]),
        NameObject("/C"):
        ArrayObject([FloatObject(c) for c in color]),
        NameObject("/Rect"):
        ArrayObject([
            FloatObject(x1),
            FloatObject(y1),
            FloatObject(x2),
            FloatObject(y2)
        ]),
        NameObject("/QuadPoints"):
        ArrayObject([
            FloatObject(x1),
            FloatObject(y2),
            FloatObject(x2),
            FloatObject(y2),
            FloatObject(x1),
            FloatObject(y1),
            FloatObject(x2),
            FloatObject(y1)
        ]),
    })
    return newHighlight

Example #18

0

Show file

File: handlePDF.py Project: cadnant/oomap

def get_pdf_measure(m, gcs, poly, bounds_default):
    """
    Returns the PDF Measure dictionary.
    The Measure dictionary is used in the viewport array
    and specifies the scale and units that apply to the output map.
    """
    measure = DictionaryObject()
    measure[NameObject('/Type')] = NameObject('/Measure')
    measure[NameObject('/Subtype')] = NameObject('/GEO')
    bounds = ArrayObject()
    """
    Returns the PDF BOUNDS array.
    The PDF's bounds array is equivalent to the map's neatline, i.e.,
    the border delineating the extent of geographic data on the output map.
    """
    for x in [0, 1, 0, 0, 1, 0, 1, 1]:
        bounds.append(FloatObject(str(x)))

    measure[NameObject('/Bounds')] = bounds
    measure[NameObject('/GPTS')] = get_pdf_gpts(m, poly)
    measure[NameObject('/LPTS')] = bounds
    measure[NameObject('/GCS')] = gcs
    return measure

Example #19

0

Show file

File: COISearchEngine.py Project: Berni1557/COISearchEngine

    def add_highlight_to_page(self, highlight, page, output):
        """
        Add a highlight to a PDF page.

        Parameters
        ----------
        highlight : Highlight object
        page : PDF page object
        output : PdfFileWriter object
        """
        highlight_ref = output._addObject(highlight)

        if "/Annots" in page:
            page[NameObject("/Annots")].append(highlight_ref)
        else:
            page[NameObject("/Annots")] = ArrayObject([highlight_ref])

Example #20

0

Show file

File: document.py Project: glins97/PPA

    def _add_highlight(self,
                       x0,
                       y0,
                       width,
                       height,
                       comment,
                       author='',
                       color=[0, 0, 0, 0]):
        highlight = self._create_highlight(x0, y0, width, height, comment,
                                           author, color)
        highlight_ref = self.pdf._addObject(highlight)

        if "/Annots" in self.pdf.getPage(0):
            self.pdf.getPage(0)[NameObject("/Annots")].append(highlight_ref)
        else:
            self.pdf.getPage(0)[NameObject("/Annots")] = ArrayObject(
                [highlight_ref])

Example #21

0

Show file

def add_comment(output, page, text, rectangle):
    obj = output._addObject(
        DictionaryObject({
            NameObject('/DA'):
            TextStringObject(' /Helv 10 Tf'),
            NameObject('/Subtype'):
            NameObject('/FreeText'),
            NameObject('/Rect'):
            RectangleObject(rectangle),
            NameObject('/Type'):
            NameObject('/Annot'),
            NameObject('/Contents'):
            TextStringObject(text),
            NameObject('/C'):
            ArrayObject([FloatObject(1),
                         FloatObject(1),
                         FloatObject(1)]),
        }))
    page['/Annots'].append(obj)

Example #22

0

Show file

File: pdf.py Project: cisagov/con-pca-api

def append_attachment(writer: PdfFileWriter, fname: str, fdata: bytes):
    """Append attachments to a PDF."""
    # The entry for the file
    file_entry = DecodedStreamObject()
    file_entry.setData(fdata)
    file_entry.update({NameObject("/Type"): NameObject("/EmbeddedFile")})

    # The Filespec entry
    efEntry = DictionaryObject()
    efEntry.update({NameObject("/F"): file_entry})

    filespec = DictionaryObject()
    filespec.update({
        NameObject("/Type"): NameObject("/Filespec"),
        NameObject("/F"): createStringObject(fname),
        NameObject("/EF"): efEntry,
    })

    if "/Names" not in writer._root_object.keys():
        # No files attached yet. Create the entry for the root, as it needs a reference to the Filespec
        embeddedFilesNamesDictionary = DictionaryObject()
        embeddedFilesNamesDictionary.update({
            NameObject("/Names"):
            ArrayObject([createStringObject(fname), filespec])
        })

        embeddedFilesDictionary = DictionaryObject()
        embeddedFilesDictionary.update(
            {NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary})
        writer._root_object.update(
            {NameObject("/Names"): embeddedFilesDictionary})
    else:
        # There are files already attached. Append the new file.
        writer._root_object["/Names"]["/EmbeddedFiles"]["/Names"].append(
            createStringObject(fname))
        writer._root_object["/Names"]["/EmbeddedFiles"]["/Names"].append(
            filespec)

Example #23

0

Show file

    def _update_metadata_add_attachment(self, pdf_metadata, output_intents):
        '''This method is inspired from the code of the addAttachment()
        method of the PyPDF2 lib'''
        
        # The entry for the file
        facturx_xml_str = self.factx.xml_str
        md5sum = hashlib.md5().hexdigest()
        md5sum_obj = createStringObject(md5sum)
        params_dict = DictionaryObject({
            NameObject('/CheckSum'): md5sum_obj,
            NameObject('/ModDate'): createStringObject(_get_pdf_timestamp()),
            NameObject('/Size'): NameObject(str(len(facturx_xml_str))),
            })
        file_entry = DecodedStreamObject()
        file_entry.setData(facturx_xml_str)  # here we integrate the file itself
        file_entry.update({
            NameObject("/Type"): NameObject("/EmbeddedFile"),
            NameObject("/Params"): params_dict,
            # 2F is '/' in hexadecimal
            NameObject("/Subtype"): NameObject("/text#2Fxml"),
            })
        file_entry_obj = self._addObject(file_entry)
        # The Filespec entry
        ef_dict = DictionaryObject({
            NameObject("/F"): file_entry_obj,
            NameObject('/UF'): file_entry_obj,
            })

        xmp_filename = self.factx.flavor.details['xmp_filename']
        fname_obj = createStringObject(xmp_filename)
        filespec_dict = DictionaryObject({
            NameObject("/AFRelationship"): NameObject("/Data"),
            NameObject("/Desc"): createStringObject("Factur-X Invoice"),
            NameObject("/Type"): NameObject("/Filespec"),
            NameObject("/F"): fname_obj,
            NameObject("/EF"): ef_dict,
            NameObject("/UF"): fname_obj,
            })
        filespec_obj = self._addObject(filespec_dict)
        name_arrayobj_cdict = {fname_obj: filespec_obj}
        
        # TODO: add back additional attachments?
        logger.debug('name_arrayobj_cdict=%s', name_arrayobj_cdict)
        name_arrayobj_content_sort = list(
            sorted(name_arrayobj_cdict.items(), key=lambda x: x[0]))
        logger.debug('name_arrayobj_content_sort=%s', name_arrayobj_content_sort)
        name_arrayobj_content_final = []
        af_list = []
        for (fname_obj, filespec_obj) in name_arrayobj_content_sort:
            name_arrayobj_content_final += [fname_obj, filespec_obj]
            af_list.append(filespec_obj)
        embedded_files_names_dict = DictionaryObject({
            NameObject("/Names"): ArrayObject(name_arrayobj_content_final),
            })
        
        # Then create the entry for the root, as it needs a
        # reference to the Filespec
        embedded_files_dict = DictionaryObject({
            NameObject("/EmbeddedFiles"): embedded_files_names_dict,
            })
        res_output_intents = []
        logger.debug('output_intents=%s', output_intents)
        for output_intent_dict, dest_output_profile_dict in output_intents:
            dest_output_profile_obj = self._addObject(
                dest_output_profile_dict)
            # TODO detect if there are no other objects in output_intent_dest_obj
            # than /DestOutputProfile
            output_intent_dict.update({
                NameObject("/DestOutputProfile"): dest_output_profile_obj,
                })
            output_intent_obj = self._addObject(output_intent_dict)
            res_output_intents.append(output_intent_obj)
        
        # Update the root
        xmp_level_str = self.factx.flavor.details['levels'][self.factx.flavor.level]['xmp_str']
        xmp_template = self.factx.flavor.get_xmp_xml()
        metadata_xml_str = _prepare_pdf_metadata_xml(xmp_level_str, xmp_filename, xmp_template, pdf_metadata)
        metadata_file_entry = DecodedStreamObject()
        metadata_file_entry.setData(metadata_xml_str)
        metadata_file_entry.update({
            NameObject('/Subtype'): NameObject('/XML'),
            NameObject('/Type'): NameObject('/Metadata'),
            })
        metadata_obj = self._addObject(metadata_file_entry)
        af_value_obj = self._addObject(ArrayObject(af_list))
        self._root_object.update({
            NameObject("/AF"): af_value_obj,
            NameObject("/Metadata"): metadata_obj,
            NameObject("/Names"): embedded_files_dict,
            # show attachments when opening PDF
            NameObject("/PageMode"): NameObject("/UseAttachments"),
            })
        logger.debug('res_output_intents=%s', res_output_intents)
        if res_output_intents:
            self._root_object.update({
                NameObject("/OutputIntents"): ArrayObject(res_output_intents),
            })
        metadata_txt_dict = _prepare_pdf_metadata_txt(pdf_metadata)
        self.addMetadata(metadata_txt_dict)

Example #24

0

Show file

File: downloader.py Project: Ernstsen/PearsonEbookDownloader

def main():

    print("Loading metadata and eText information...")

    with open("bookinfo.json", 'r') as bookInfoRequest:
        str_response = bookInfoRequest.read()
        bookInfo = json.loads(str_response)
        bookInfo = bookInfo[0]['userBookTOList'][0]

    with open("pageinfo.json", 'r') as pageInfoRequest:
        pageInfo = json.loads(pageInfoRequest.read())
        pageInfo = pageInfo[0]['pdfPlayerPageInfoTOList']

    with open("pages.json", 'r') as file:
        downloadedData = json.loads(file.read())[0]["pdfPlayerPageInfoTOList"]

    def get_data(page_id):
        b = next((x['data'] for x in downloadedData if x['pageID'] == page_id), None)
        return bytearray(base64.standard_b64decode(b[len("data:application/pdf;base64,"):]))

    with tempfile.TemporaryDirectory() as pdfDownloadDir:
        # Use a temporary directory to download all the pdf files to
        # First, download the cover file
        pdfPageTable = {}

        pdf_page_label_table = {}

        # urllib.request.urlretrieve(getPageUrl(bookInfo['pdfCoverArt'], isCover="Y"), os.path.join(pdfDownloadDir, "0000 - cover.pdf"))
        with open(os.path.join(pdfDownloadDir, "0000 - cover.pdf"), 'w+b') as ous:
            ous.write(get_data(pageInfo[0]['pageID']))

        # Then, download all the individual pages for the e-book
        def download(pdfPage):
            pdfPageTable[pdfPage['bookPageNumber']] = pdfPage['pageOrder']
            savePath = os.path.join(pdfDownloadDir,
                                    "{:04} - {}.pdf".format(pdfPage['pageOrder'], pdfPage['bookPageNumber']))
            with open(savePath, 'w+b') as out:
                out.write(get_data(pdfPage['pageID']))
            # urllib.request.urlretrieve(getPageUrl(pdfPage['pdfPath']), savePath)

        threadPool = ThreadPool(40)  # 40 threads should download a book fairly quickly
        print("Reading pages from pageinfo.json to \"{}\"...".format(pdfDownloadDir))
        threadPool.map(download, pageInfo)

        print("Assembling PDF...")

        # Begin to assemble the final PDF, first by adding all the pages
        fileMerger = PdfFileWriter()
        for pdfFile in sorted(os.listdir(pdfDownloadDir)):
            page = PdfFileReader(os.path.join(pdfDownloadDir, pdfFile)).getPage(0)
            os.remove(os.path.join(pdfDownloadDir, pdfFile))  # Save on memory a bit
            fileMerger.addPage(page)

        bookmarksExist = True

        # TODO: Bookmarks currently not supported
        with open("bookmarks.json", 'r') as bookmarkInfoRequest:
            try:
                bookmarkInfo = json.loads(bookmarkInfoRequest.read())
                bookmarkInfo = bookmarkInfo[0]['basketsInfoTOList'][0]
            except Exception as e:
                bookmarksExist = False

        def recursiveSetBookmarks(aDict, parent=None):
            if isinstance(aDict, dict):
                aDict = [aDict]
            for bookmark in aDict:
                # These are the main bookmarks under this parent (or the whole document if parent is None)
                bookmarkName = bookmark['name']  # Name of the section
                pageNum = str(bookmark['linkvalue']['content'])  # First page (in the pdf's format)

                latestBookmark = fileMerger.addBookmark(bookmarkName, pdfPageTable[pageNum], parent)

                if 'basketentry' in bookmark:
                    recursiveSetBookmarks(bookmark['basketentry'], latestBookmark)

        if bookmarksExist:
            print("Adding bookmarks...")
            fileMerger.addBookmark("Cover", 0) # Add a bookmark to the cover at the beginning
            recursiveSetBookmarks(bookmarkInfo['document'][0]['basketcollection']['basket']['basketentry'])
        else:
            print("Bookmarks don't exist for book")
        print("Fixing metadata...")
        # Hack to fix metadata and page numbers:
        pdf_page_label_table = [(v, k) for k, v in pdfPageTable.items()]
        pdf_page_label_table = sorted(pdf_page_label_table, key=(lambda x: int(x[0])))
        labels = ArrayObject([
            NameObject(0), DictionaryObject({NameObject("/P"): NameObject("(cover)")})
        ])
        last_mode = None
        last_prefix = ""
        # Now we check to see the ranges where we have roman numerals or arabic numerals
        # The following code is not ideal for this, so I'd appreciate a PR with a better solution
        for pageNumber, pageLabel in pdf_page_label_table:
            curr_mode = None
            prefix = ""
            style = DictionaryObject()
            if arabicRegex.match(pageLabel):
                curr_mode = "arabic"
                prefix = arabicRegex.match(pageLabel).group("prefix")
                style.update({NameObject("/S"): NameObject("/D")})
            elif romanRegex.match(pageLabel):
                curr_mode = "roman"
                prefix = romanRegex.match(pageLabel).group("prefix")
                style.update({NameObject("/S"): NameObject("/r")})
            if curr_mode != last_mode or prefix != last_prefix:
                if prefix:
                    style.update({
                        NameObject("/P"): NameObject("({})".format(prefix))
                    })
                labels.extend([
                    NumberObject(pageNumber),
                    style,
                ])
                last_mode = curr_mode
                last_prefix = prefix
        root_obj = fileMerger._root_object
        # Todo: Fix the weird page numbering bug
        pageLabels = DictionaryObject()
        # fileMerger._addObject(pageLabels)
        pageLabels.update({
            NameObject("/Nums"): ArrayObject(labels)
        })
        root_obj.update({
            NameObject("/PageLabels"): pageLabels
        })

        print("Writing PDF...")
        with open("{}.pdf".format(bookInfo['title']).replace("/", "").replace(":", "_"), "wb") as outFile:
            fileMerger.write(outFile)

Example #25

0

Show file

File: pdf.py Project: olf42/python-drafthorse

def _facturx_update_metadata_add_attachment(pdf_filestream, facturx_xml_str, pdf_metadata, facturx_level,
                                            output_intents):
    md5sum = hashlib.md5(facturx_xml_str).hexdigest()
    md5sum_obj = createStringObject(md5sum)
    params_dict = DictionaryObject({
        NameObject('/CheckSum'): md5sum_obj,
        NameObject('/ModDate'): createStringObject(datetime.datetime.now().isoformat()),
        NameObject('/Size'): NameObject(str(len(facturx_xml_str))),
    })
    file_entry = DecodedStreamObject()
    file_entry.setData(facturx_xml_str)  # here we integrate the file itself
    file_entry.update({
        NameObject("/Type"): NameObject("/EmbeddedFile"),
        NameObject("/Params"): params_dict,
        # 2F is '/' in hexadecimal
        NameObject("/Subtype"): NameObject("/text#2Fxml"),
    })
    file_entry_obj = pdf_filestream._addObject(file_entry)
    # The Filespec entry
    ef_dict = DictionaryObject({
        NameObject("/F"): file_entry_obj,
        NameObject('/UF'): file_entry_obj,
    })

    fname_obj = createStringObject("ZUGFeRD-invoice.xml")
    filespec_dict = DictionaryObject({
        NameObject("/AFRelationship"): NameObject("/Data"),
        NameObject("/Desc"): createStringObject("Factur-X Invoice"),
        NameObject("/Type"): NameObject("/Filespec"),
        NameObject("/F"): fname_obj,
        NameObject("/EF"): ef_dict,
        NameObject("/UF"): fname_obj,
    })
    filespec_obj = pdf_filestream._addObject(filespec_dict)
    name_arrayobj_cdict = {fname_obj: filespec_obj}
    name_arrayobj_content_sort = list(
        sorted(name_arrayobj_cdict.items(), key=lambda x: x[0]))
    name_arrayobj_content_final = []
    af_list = []
    for (fname_obj, filespec_obj) in name_arrayobj_content_sort:
        name_arrayobj_content_final += [fname_obj, filespec_obj]
        af_list.append(filespec_obj)
    embedded_files_names_dict = DictionaryObject({
        NameObject("/Names"): ArrayObject(name_arrayobj_content_final),
    })
    # Then create the entry for the root, as it needs a
    # reference to the Filespec
    embedded_files_dict = DictionaryObject({
        NameObject("/EmbeddedFiles"): embedded_files_names_dict,
    })
    res_output_intents = []
    for output_intent_dict, dest_output_profile_dict in output_intents:
        dest_output_profile_obj = pdf_filestream._addObject(
            dest_output_profile_dict)
        # TODO detect if there are no other objects in output_intent_dest_obj
        # than /DestOutputProfile
        output_intent_dict.update({
            NameObject("/DestOutputProfile"): dest_output_profile_obj,
        })
        output_intent_obj = pdf_filestream._addObject(output_intent_dict)
        res_output_intents.append(output_intent_obj)
    # Update the root
    metadata_xml_str = _prepare_pdf_metadata_xml(facturx_level, pdf_metadata)
    metadata_file_entry = DecodedStreamObject()
    metadata_file_entry.setData(metadata_xml_str)
    metadata_file_entry.update({
        NameObject('/Subtype'): NameObject('/XML'),
        NameObject('/Type'): NameObject('/Metadata'),
    })
    metadata_obj = pdf_filestream._addObject(metadata_file_entry)
    af_value_obj = pdf_filestream._addObject(ArrayObject(af_list))
    pdf_filestream._root_object.update({
        NameObject("/AF"): af_value_obj,
        NameObject("/Metadata"): metadata_obj,
        NameObject("/Names"): embedded_files_dict,
        # show attachments when opening PDF
        NameObject("/PageMode"): NameObject("/UseAttachments"),
    })
    if res_output_intents:
        pdf_filestream._root_object.update({
            NameObject("/OutputIntents"): ArrayObject(res_output_intents),
        })
    metadata_txt_dict = _prepare_pdf_metadata_txt(pdf_metadata)
    pdf_filestream.addMetadata(metadata_txt_dict)

Example #26

0

Show file

def main(bookId):
    if bookId.startswith("http"):
        print("Trying to extract bookId from url")
        bookData = urllib.parse.parse_qs(bookId.split("?")[-1])
        if (bookData.get("values", None)) is not None:
            bookData = {
                itemName: [itemValue]
                for itemName, itemValue in zip(
                    *[iter(bookData["values"][0].split("::"))] * 2)
            }
            # Fix capitalization
            bookData["bookid"] = bookData["bookID"]
        bookId = bookData["bookid"][0]

    bookId = int(bookId)
    print(
        "Downloading book id {}. Please open an issue on GitHub if this book id is incorrect."
        .format(bookId))

    print("Downloading metadata and eText information...")

    bookInfoGetUrl = bookInfoUrl.format(bookId)
    #print(hsidUrl(bookInfoGetUrl))
    with urllib.request.urlopen(hsidUrl(bookInfoGetUrl)) as bookInfoRequest:
        str_response = bookInfoRequest.read().decode('utf-8')
        bookInfo = json.loads(str_response)
        bookInfo = bookInfo[0]['userBookTOList'][0]

    pageInfoGetUrl = pageInfoUrl.format(
        userroleid=roletypeid,
        bookid=bookId,
        bookeditionid=bookInfo['bookEditionID'])
    with urllib.request.urlopen(hsidUrl(pageInfoGetUrl)) as pageInfoRequest:
        pageInfo = json.loads(pageInfoRequest.read().decode('utf-8'))
        pageInfo = pageInfo[0]['pdfPlayerPageInfoTOList']

    def getPageUrl(pdfPage, isCover="N"):
        pdfPage = pdfPage.replace("/assets/", "")
        getPage = pagePath = pdfUrl.format(bookid=bookInfo['globalBookID'],
                                           pdfpage=pdfPage,
                                           iscover=isCover)
        return hsidUrl(getPage)

    with tempfile.TemporaryDirectory() as pdfDownloadDir:
        # Use a temporary directory to download all the pdf files to
        # First, download the cover file
        pdfPageTable = {}

        pdfPageLabelTable = {}

        urllib.request.urlretrieve(
            getPageUrl(bookInfo['pdfCoverArt'], isCover="Y"),
            os.path.join(pdfDownloadDir, "0000 - cover.pdf"))

        # Then, download all the individual pages for the e-book
        def download(pdfPage):
            pdfPageTable[pdfPage['bookPageNumber']] = pdfPage['pageOrder']
            savePath = os.path.join(
                pdfDownloadDir,
                "{:04} - {}.pdf".format(pdfPage['pageOrder'],
                                        pdfPage['bookPageNumber']))
            urllib.request.urlretrieve(getPageUrl(pdfPage['pdfPath']),
                                       savePath)

        threadPool = ThreadPool(
            40)  # 40 threads should download a book fairly quickly
        print("Downloading pages to \"{}\"...".format(pdfDownloadDir))
        threadPool.map(download, pageInfo)

        print("Assembling PDF...")

        # Begin to assemble the final PDF, first by adding all the pages
        fileMerger = PdfFileWriter()
        for pdfFile in sorted(os.listdir(pdfDownloadDir)):
            fileMerger.addPage(
                PdfFileReader(os.path.join(pdfDownloadDir,
                                           pdfFile)).getPage(0))

        # And then add all the bookmarks to the final PDF
        bookmarkInfoGetUrl = bookmarkInfoUrl.format(
            userroleid=roletypeid,
            bookid=bookId,
            language=language,
            bookeditionid=bookInfo['bookEditionID'],
            scenarioid=1001)

        bookmarksExist = True

        with urllib.request.urlopen(
                hsidUrl(bookmarkInfoGetUrl)) as bookmarkInfoRequest:
            try:
                bookmarkInfo = json.loads(
                    bookmarkInfoRequest.read().decode('utf-8'))
                bookmarkInfo = bookmarkInfo[0]['basketsInfoTOList'][0]
            except Exception as e:
                bookmarksExist = False

        def recursiveSetBookmarks(aDict, parent=None):
            if isinstance(aDict, dict):
                aDict = [aDict]
            for bookmark in aDict:
                # These are the main bookmarks under this parent (or the whole document if parent is None)
                bookmarkName = bookmark['n']  # Name of the section
                pageNum = str(bookmark['lv']
                              ['content'])  # First page (in the pdf's format)

                latestBookmark = fileMerger.addBookmark(
                    bookmarkName, pdfPageTable[pageNum], parent)

                if 'be' in bookmark:
                    recursiveSetBookmarks(bookmark['be'], latestBookmark)

        if bookmarksExist:
            print("Adding bookmarks...")
            fileMerger.addBookmark(
                "Cover", 0)  # Add a bookmark to the cover at the beginning
            recursiveSetBookmarks(bookmarkInfo['document'][0]['bc']['b']['be'])
        else:
            print("Bookmarks don't exist for ID {}".format(bookId))
        print("Fixing metadata...")
        # Hack to fix metadata and page numbers:
        pdfPageLabelTable = [(v, k) for k, v in pdfPageTable.items()]
        pdfPageLabelTable = sorted(pdfPageLabelTable,
                                   key=(lambda x: int(x[0])))
        labels = ArrayObject([
            NameObject(0),
            DictionaryObject({NameObject("/P"): NameObject("(cover)")})
        ])
        lastMode = None
        lastPrefix = ""
        # Now we check to see the ranges where we have roman numerals or arabic numerals
        # The following code is not ideal for this, so I'd appreciate a PR with a better solution
        for pageNumber, pageLabel in pdfPageLabelTable:
            currMode = None
            prefix = ""
            style = DictionaryObject()
            if arabicRegex.match(pageLabel):
                currMode = "arabic"
                prefix = arabicRegex.match(pageLabel).group("prefix")
                style.update({NameObject("/S"): NameObject("/D")})
            elif romanRegex.match(pageLabel):
                currMode = "roman"
                prefix = romanRegex.match(pageLabel).group("prefix")
                style.update({NameObject("/S"): NameObject("/r")})
            if currMode != lastMode or prefix != lastPrefix:
                if prefix:
                    style.update(
                        {NameObject("/P"): NameObject("({})".format(prefix))})
                labels.extend([
                    NumberObject(pageNumber),
                    style,
                ])
                lastMode = currMode
                lastPrefix = prefix
        rootObj = fileMerger._root_object
        # Todo: Fix the weird page numbering bug
        pageLabels = DictionaryObject()
        #fileMerger._addObject(pageLabels)
        pageLabels.update({NameObject("/Nums"): ArrayObject(labels)})
        rootObj.update({NameObject("/PageLabels"): pageLabels})

        print("Writing PDF...")
        with open(
                "{} - {}.pdf".format(bookId, bookInfo['title']).replace(
                    "/", "").replace(":", "_"), "wb") as outFile:
            fileMerger.write(outFile)

Example #27

0

Show file

File: pdf_helper.py Project: shashwatjay/chesspdftofen

def create_annotation(x, y, meta):
    color = [255.0 / 255.0, 209 / 255.0, 0]
    # link
    linkAnnotation = DictionaryObject()
    # https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf
    linkAnnotation.update({
        # Table 165 NoZoom
        NameObject("/F"):
        NumberObject(4),
        NameObject("/Type"):
        NameObject("/Annot"),
        NameObject("/Subtype"):
        NameObject("/Link"),

        # Table 164 color, annotation rectangle
        NameObject("/C"):
        ArrayObject([FloatObject(c) for c in color]),
        NameObject("/Rect"):
        ArrayObject([
            FloatObject(x),
            FloatObject(y),
            FloatObject(x + 20),
            FloatObject(y + 20)
        ]),

        # Table 173 link annotation
        NameObject('/A'):
        DictionaryObject({
            # Table 206 uri
            NameObject('/S'): NameObject('/URI'),
            NameObject('/URI'): TextStringObject(meta["contents"])
        }),
        # Table 173 invert rect when mouse
        NameObject('/H'):
        NameObject('/I'),
        # table 164 hor corner radius, vert corner radius, border width
        # dash array table 56
        NameObject('/Border'):
        ArrayObject([
            NameObject(0),
            NameObject(0),
            NameObject(5),
        ]),
    })

    commentAnnotation = DictionaryObject()
    # https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf
    commentAnnotation.update({
        # Table 165 NoZoom
        NameObject("/F"):
        NumberObject(4),
        NameObject("/Type"):
        NameObject("/Annot"),
        NameObject("/Subtype"):
        NameObject("/Text"),

        # Table 170 titlebar
        NameObject("/T"):
        TextStringObject(meta["author"]),
        NameObject("/Contents"):
        TextStringObject(meta["contents"]),

        # Table 164 color, annotation rectangle
        NameObject("/C"):
        ArrayObject([FloatObject(c) for c in color]),
        NameObject("/Rect"):
        ArrayObject([
            FloatObject(x),
            FloatObject(y),
            FloatObject(x + 5),
            FloatObject(y + 5)
        ]),

        # 12.5.6.4 text annotation
        NameObject('/Open'):
        BooleanObject(False),
        NameObject('/Name'):
        NameObject('/Comment'),
    })

    return linkAnnotation, commentAnnotation

Example #28

0

Show file

    def convert_to_pdfa(self):
        """
        Transform the opened PDF file into a PDF/A compliant file
        """
        # Set the PDF version to 1.7 (as PDF/A-3 is based on version 1.7) and make it PDF/A compliant.
        # See https://github.com/veraPDF/veraPDF-validation-profiles/wiki/PDFA-Parts-2-and-3-rules#rule-612-1

        # " The file header shall begin at byte zero and shall consist of "%PDF-1.n" followed by a single EOL marker,
        # where 'n' is a single digit number between 0 (30h) and 7 (37h) "
        # " The aforementioned EOL marker shall be immediately followed by a % (25h) character followed by at least four
        # bytes, each of whose encoded byte values shall have a decimal value greater than 127 "
        self._header = b"%PDF-1.7\n%\xFF\xFF\xFF\xFF"

        # Add a document ID to the trailer. This is only needed when using encryption with regular PDF, but is required
        # when using PDF/A
        pdf_id = ByteStringObject(md5(self._reader.stream.getvalue()).digest())
        # The first string is based on the content at the time of creating the file, while the second is based on the
        # content of the file when it was last updated. When creating a PDF, both are set to the same value.
        self._ID = ArrayObject((pdf_id, pdf_id))

        with file_open('tools/data/files/sRGB2014.icc', mode='rb') as icc_profile:
            icc_profile_file_data = compress(icc_profile.read())

        icc_profile_stream_obj = DecodedStreamObject()
        icc_profile_stream_obj.setData(icc_profile_file_data)
        icc_profile_stream_obj.update({
            NameObject("/Filter"): NameObject("/FlateDecode"),
            NameObject("/N"): NumberObject(3),
            NameObject("/Length"): NameObject(str(len(icc_profile_file_data))),
        })

        icc_profile_obj = self._addObject(icc_profile_stream_obj)

        output_intent_dict_obj = DictionaryObject()
        output_intent_dict_obj.update({
            NameObject("/S"): NameObject("/GTS_PDFA1"),
            NameObject("/OutputConditionIdentifier"): createStringObject("sRGB"),
            NameObject("/DestOutputProfile"): icc_profile_obj,
            NameObject("/Type"): NameObject("/OutputIntent"),
        })

        output_intent_obj = self._addObject(output_intent_dict_obj)
        self._root_object.update({
            NameObject("/OutputIntents"): ArrayObject([output_intent_obj]),
        })

        pages = self._root_object['/Pages']['/Kids']

        # PDF/A needs the glyphs width array embedded in the pdf to be consistent with the ones from the font file.
        # But it seems like it is not the case when exporting from wkhtmltopdf.
        if TTFont:
            fonts = {}
            # First browse through all the pages of the pdf file, to get a reference to all the fonts used in the PDF.
            for page in pages:
                for font in page.getObject()['/Resources']['/Font'].values():
                    for descendant in font.getObject()['/DescendantFonts']:
                        fonts[descendant.idnum] = descendant.getObject()

            # Then for each font, rewrite the width array with the information taken directly from the font file.
            # The new width are calculated such as width = round(1000 * font_glyph_width / font_units_per_em)
            # See: http://martin.hoppenheit.info/blog/2018/pdfa-validation-and-inconsistent-glyph-width-information/
            for font in fonts.values():
                font_file = font['/FontDescriptor']['/FontFile2']
                stream = io.BytesIO(decompress(font_file._data))
                ttfont = TTFont(stream)
                font_upm = ttfont['head'].unitsPerEm
                glyphs = ttfont.getGlyphSet()._hmtx.metrics
                glyph_widths = []
                for key, values in glyphs.items():
                    if key[:5] == 'glyph':
                        glyph_widths.append(NumberObject(round(1000.0 * values[0] / font_upm)))

                font[NameObject('/W')] = ArrayObject([NumberObject(1), ArrayObject(glyph_widths)])
                stream.close()
        else:
            _logger.warning('The fonttools package is not installed. Generated PDF may not be PDF/A compliant.')

        outlines = self._root_object['/Outlines'].getObject()
        outlines[NameObject('/Count')] = NumberObject(1)

        # Set odoo as producer
        self.addMetadata({
            '/Creator': "Odoo",
            '/Producer': "Odoo",
        })
        self.is_pdfa = True

Example #29

0

Show file

    def zugferd_update_metadata_add_attachment(self, pdf_filestream, fname,
                                               fdata):
        '''This method is inspired from the code of the addAttachment()
        method of the PyPDF2 lib'''
        # The entry for the file
        moddate = DictionaryObject()
        moddate.update({
            NameObject('/ModDate'):
            createStringObject(self._get_pdf_timestamp())
        })
        file_entry = DecodedStreamObject()
        file_entry.setData(fdata)
        file_entry.update({
            NameObject("/Type"):
            NameObject("/EmbeddedFile"),
            NameObject("/Params"):
            moddate,
            # 2F is '/' in hexadecimal
            NameObject("/Subtype"):
            NameObject("/text#2Fxml"),
        })
        file_entry_obj = pdf_filestream._addObject(file_entry)
        # The Filespec entry
        efEntry = DictionaryObject()
        efEntry.update({
            NameObject("/F"): file_entry_obj,
            NameObject('/UF'): file_entry_obj,
        })

        fname_obj = createStringObject(fname)
        filespec = DictionaryObject()
        filespec.update({
            NameObject("/AFRelationship"):
            NameObject("/Alternative"),
            NameObject("/Desc"):
            createStringObject("ZUGFeRD Invoice"),
            NameObject("/Type"):
            NameObject("/Filespec"),
            NameObject("/F"):
            fname_obj,
            NameObject("/EF"):
            efEntry,
            NameObject("/UF"):
            fname_obj,
        })
        embeddedFilesNamesDictionary = DictionaryObject()
        embeddedFilesNamesDictionary.update({
            NameObject("/Names"):
            ArrayObject([fname_obj,
                         pdf_filestream._addObject(filespec)])
        })
        # Then create the entry for the root, as it needs a
        # reference to the Filespec
        embeddedFilesDictionary = DictionaryObject()
        embeddedFilesDictionary.update(
            {NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary})
        # Update the root
        metadata_xml_str = self._prepare_pdf_metadata()
        metadata_file_entry = DecodedStreamObject()
        metadata_file_entry.setData(metadata_xml_str)
        metadata_value = pdf_filestream._addObject(metadata_file_entry)
        af_value = pdf_filestream._addObject(
            ArrayObject([pdf_filestream._addObject(filespec)]))
        pdf_filestream._root_object.update({
            NameObject("/AF"):
            af_value,
            NameObject("/Metadata"):
            metadata_value,
            NameObject("/Names"):
            embeddedFilesDictionary,
        })
        info_dict = self._prepare_pdf_info()
        pdf_filestream.addMetadata(info_dict)

Example #30

0

Show file

def test_array_object_exception():
    stream = BytesIO(b"False")
    with pytest.raises(PdfReadError) as exc:
        ArrayObject.read_from_stream(stream, None)
    assert exc.value.args[0] == "Could not read array"