Python TextStringObject Beispiele, PyPDF2.generic.TextStringObject Python Beispiele

Beispiel #1

0

Datei anzeigen

    def createHighlight(self,x1, y1, x2, y2, meta, color = [1, 0, 0]):
        newHighlight = DictionaryObject()

        newHighlight.update({
            NameObject("/F"): NumberObject(4),
            NameObject("/Type"): NameObject("/Annot"),
            NameObject("/Subtype"): NameObject("/Highlight"),

            NameObject("/T"): TextStringObject(meta["author"]),
            NameObject("/Contents"): TextStringObject(meta["contents"]),

            NameObject("/C"): ArrayObject([FloatObject(c) for c in color]),
            NameObject("/Rect"): ArrayObject([
                FloatObject(x1),
                FloatObject(y1),
                FloatObject(x2),
                FloatObject(y2)
            ]),
            NameObject("/QuadPoints"): ArrayObject([
                FloatObject(x1),
                FloatObject(y2),
                FloatObject(x2),
                FloatObject(y2),
                FloatObject(x1),
                FloatObject(y1),
                FloatObject(x2),
                FloatObject(y1)
            ]),
        })

        return newHighlight

Beispiel #2

0

Datei anzeigen

Datei: splitter_tmp.py Projekt: chebrolu/Question-Bank

def create_annot_box(x1, y1, x2, y2, meta, color=[1, 0, 0]):
    new_annot = DictionaryObject()

    new_annot.update({
        # NameObject("/P"): parent,
        NameObject("/F"):
        NumberObject(4),
        NameObject("/Type"):
        NameObject("/Annot"),
        NameObject("/Subtype"):
        NameObject("/Square"),
        NameObject("/T"):
        TextStringObject(meta["author"]),
        NameObject("/Contents"):
        TextStringObject(meta["contents"]),
        NameObject("/C"):
        ArrayObject([FloatObject(c) for c in color]),
        NameObject("/Rect"):
        ArrayObject([
            FloatObject(x1),
            FloatObject(y1),
            FloatObject(x2),
            FloatObject(y2)
        ]),
    })
    return new_annot

Beispiel #3

0

Datei anzeigen

def createHighlight(bbox=(0, 0, 1, 1),
                    contents="",
                    color=[1, 1, 0],
                    author="iwasakishuto(@cabernet_rock)"):
    """Create a Highlight

    Args:
        bbox (tuple)   : a bounding box showing the location of highlight.
        contents (str) : Text comments for a highlight label.
        color (list)   : Highlight color. Defaults to ``[1,1,0]``. (yellow)
        author (str)   : Who wrote the annotation (comment). Defaults to ``"iwasakishuto(@cabernet_rock)"`` .

    Returns:
        DictionaryObject: Highlight information.

    Examples:
        >>> from gummy.utils import createHighlight, addHighlightToPage
        >>> from PyPDF2 import PdfFileWriter, PdfFileReader
        >>> page_no = 0
        >>> pdfOutput = PdfFileWriter()
        >>> with open("input.pdf", mode="rb") as inPdf:
        ...     pdfInput = PdfFileReader(inPdf)
        ...     page = pdfInput.getPage(page_no)
        ...     highlight = createHighlight(bbox=(10,10,90,90), contents="COMMENT", color=(1,1,0))
        ...     addHighlightToPage(highlight, page, pdfOutput)
        ...     pdfOutput.addPage(page)
        ...     with open("output.pdf", mode="wb") as outPdf:
        ...         pdfOutput.write(outPdf)
    """
    from PyPDF2.generic import (DictionaryObject, NumberObject, FloatObject,
                                NameObject, TextStringObject, ArrayObject)
    x1, y1, x2, y2 = bbox
    newHighlight = DictionaryObject()
    newHighlight.update({
        NameObject("/F"):
        NumberObject(4),
        NameObject("/Type"):
        NameObject("/Annot"),
        NameObject("/Subtype"):
        NameObject("/Highlight"),
        NameObject("/T"):
        TextStringObject(author),
        NameObject("/Contents"):
        TextStringObject(contents),
        NameObject("/C"):
        ArrayObject([FloatObject(c) for c in color]),
        NameObject("/Rect"):
        ArrayObject([FloatObject(e) for e in bbox]),
        NameObject("/QuadPoints"):
        ArrayObject([FloatObject(e)
                     for e in [x1, y2, x2, y2, x1, y1, x2, y1]]),
    })
    return newHighlight

Beispiel #4

0

Datei anzeigen

Datei: COISearchEngine.py Projekt: Berni1557/COISearchEngine

    def create_highlight(self, x1, y1, x2, y2, meta, color=[0, 1, 0]):
        """
        Create a highlight for a PDF.

        Parameters
        ----------
        x1, y1 : float
            bottom left corner
        x2, y2 : float
            top right corner
        meta : dict
            keys are "author" and "contents"
        color : iterable
            Three elements, (r,g,b)
        """
        new_highlight = DictionaryObject()

        new_highlight.update({
            NameObject("/F"):
            NumberObject(4),
            NameObject("/Type"):
            NameObject("/Annot"),
            NameObject("/Subtype"):
            NameObject("/Highlight"),
            NameObject("/T"):
            TextStringObject(meta["author"]),
            NameObject("/Contents"):
            TextStringObject(meta["contents"]),
            NameObject("/C"):
            ArrayObject([FloatObject(c) for c in color]),
            NameObject("/Rect"):
            ArrayObject([
                FloatObject(x1),
                FloatObject(y1),
                FloatObject(x2),
                FloatObject(y2)
            ]),
            NameObject("/QuadPoints"):
            ArrayObject([
                FloatObject(x1),
                FloatObject(y2),
                FloatObject(x2),
                FloatObject(y2),
                FloatObject(x1),
                FloatObject(y1),
                FloatObject(x2),
                FloatObject(y1)
            ]),
        })

        return new_highlight

Beispiel #5

0

Datei anzeigen

 def _update_page_form_checkbox_values(self, page, fields):
     """Updates the checkbox values in a form. 
     It is needed in order the checked answers to become visible.
     
     Parameters:
         page (PyPDF2.pdf.PageObject): Page object from a PDF file.
         fields (dict): Dictionary containing the key -> value assignments.
     
     (From/inspired by: https://github.com/mstamy2/PyPDF2/issues/355)
     """
     for j in range(0, len(page['/Annots'])):
         writer_annot = page['/Annots'][j].getObject()
         for field in fields:
             if writer_annot.get('/T') == field:
                 if fields[field] in self.YES:
                     writer_annot.update({
                         NameObject("/V"):
                         NameObject(fields[field]),
                         NameObject("/AS"):
                         NameObject(fields[field])
                     })
                 else:
                     writer_annot.update({
                         NameObject("/V"):
                         TextStringObject(fields[field])
                     })

Beispiel #6

0

Datei anzeigen

    def fill(self, input_dict: Dict[str, Any]) -> None:
        for p, m in zip(self.input_pdf.pages, self.mapping):
            if "/Annots" not in p:
                self.pdf.addPage(p)
                continue
            for j in range(0, len(p["/Annots"])):
                writer_annot = p["/Annots"][j].getObject()
                writer_annot.update({NameObject("/Ff"): NumberObject(1)})  # make ReadOnly

                for mk, mv in m.items():
                    if writer_annot.get("/T") == mk:
                        input_value: Any = input_dict[mv]
                        value: str
                        if type(input_value) == bool:
                            if input_value:
                                writer_annot.update(
                                    {NameObject("/V"): NameObject("/1"), NameObject("/AS"): NameObject("/1")}
                                )
                            else:
                                if "/V" in writer_annot:
                                    del writer_annot["/V"]
                                writer_annot.update({NameObject("/AS"): NameObject("/Off")})
                        else:
                            value = str(input_value)
                            writer_annot.update(
                                {NameObject("/V"): TextStringObject(value), NameObject("/AP"): TextStringObject(value)}
                            )

            self.pdf.addPage(p)

Beispiel #7

0

Datei anzeigen

Datei: document.py Projekt: glins97/PPA

    def _create_highlight(self,
                          x0,
                          y0,
                          width,
                          height,
                          comment,
                          author='',
                          color=[0, 0, 0, 0]):
        self.add_rect(x0, y0, width, height)
        highlight = DictionaryObject()

        highlight.update({
            NameObject("/F"):
            NumberObject(4),
            NameObject("/Type"):
            NameObject("/Annot"),
            NameObject("/Subtype"):
            NameObject("/Highlight"),
            NameObject("/T"):
            TextStringObject(author),
            NameObject("/Contents"):
            TextStringObject(comment),
            NameObject("/C"):
            ArrayObject([FloatObject(c) for c in color]),
            NameObject("/Rect"):
            ArrayObject([
                FloatObject(x0),
                FloatObject(y0),
                FloatObject(x0 + width),
                FloatObject(y0 + width)
            ]),
            NameObject("/QuadPoints"):
            ArrayObject([
                FloatObject(x0),
                FloatObject(y0 + width),
                FloatObject(x0 + width),
                FloatObject(y0 + width),
                FloatObject(x0),
                FloatObject(y0),
                FloatObject(x0 + width),
                FloatObject(y0)
            ]),
        })

        return highlight

Beispiel #8

0

Datei anzeigen

def add_comment(output, page, text, rectangle):
    obj = output._addObject(
        DictionaryObject({
            NameObject('/DA'):
            TextStringObject(' /Helv 10 Tf'),
            NameObject('/Subtype'):
            NameObject('/FreeText'),
            NameObject('/Rect'):
            RectangleObject(rectangle),
            NameObject('/Type'):
            NameObject('/Annot'),
            NameObject('/Contents'):
            TextStringObject(text),
            NameObject('/C'):
            ArrayObject([FloatObject(1),
                         FloatObject(1),
                         FloatObject(1)]),
        }))
    page['/Annots'].append(obj)

Beispiel #9

0

Datei anzeigen

Datei: utils.py Projekt: esantus/ConceptClassification

def createHighlight(x1, y1, x2, y2, meta, color=[1, 0, 0]):
    '''
	Create a highlight object which will be applied to a box in a PDF page (please,
	notice that coordinates start in the bottom left) with specific metadata and
	colors.
	'''
    newHighlight = DictionaryObject()

    newHighlight.update({
        NameObject("/F"):
        NumberObject(4),
        NameObject("/Type"):
        NameObject("/Annot"),
        NameObject("/Subtype"):
        NameObject("/Highlight"),
        NameObject("/T"):
        TextStringObject(meta["author"]),
        NameObject("/Contents"):
        TextStringObject(meta["contents"]),
        NameObject("/C"):
        ArrayObject([FloatObject(c) for c in color]),
        NameObject("/Rect"):
        ArrayObject([
            FloatObject(x1),
            FloatObject(y1),
            FloatObject(x2),
            FloatObject(y2)
        ]),
        NameObject("/QuadPoints"):
        ArrayObject([
            FloatObject(x1),
            FloatObject(y2),
            FloatObject(x2),
            FloatObject(y2),
            FloatObject(x1),
            FloatObject(y1),
            FloatObject(x2),
            FloatObject(y1)
        ]),
    })
    return newHighlight

Beispiel #10

0

Datei anzeigen

 def process_content(self):
     for page_num in range(self.reader.getNumPages()):
         page = self.reader.getPage(page_num)
         content_object = page["/Contents"].getObject()
         content = ContentStream(content_object, self.reader)
         for operands, operator in content.operations:
             if operator == b_("TJ") or operator == b_("Tj"):
                 text = operands[0]
                 if any_match(text, self.remove_list):
                     print(text)
                     operands[0] = TextStringObject('')
         page.__setitem__(NameObject('/Contents'), content)
         self.writer.addPage(page)

Beispiel #11

0

Datei anzeigen

Datei: Remover.py Projekt: Thowdy/wtm-remover

 def remove_text_from_normal_page(self, pg, pdf):
     content_object = pg["/Contents"].getObject()
     content = ContentStream(content_object, pdf)
     flag = False
     for operands, operator in content.operations:
         if operator in [b_('TJ'), b_('Tj')]:
             if type(operands[0]) is list:
                 text = ''.join(
                     map(
                         lambda x: x
                         if isinstance(x, TextStringObject) else '',
                         operands[0]))
             else:
                 text = operands[0]
             if isinstance(text, TextStringObject) and text.startswith(
                     self.wmtext):
                 operands[0] = TextStringObject('')
                 flag = True
     pg[NameObject('/Contents')] = content
     if not flag and self.form:
         pg = self.remove_form_from_normal_page(pg)
     return pg

Beispiel #12

0

Datei anzeigen

def pdf_flatten(filename, number):
    # open the pdf
    input_stream = open(filename, "rb")
    pdf_reader = PyPDF2.PdfFileReader(input_stream, strict=False)
    if "/AcroForm" in pdf_reader.trailer["/Root"]:
        pdf_reader.trailer["/Root"]["/AcroForm"].update(
            {NameObject("/NeedAppearances"): BooleanObject(True)})

    pdf_writer = PyPDF2.PdfFileWriter()
    set_need_appearances_writer(pdf_writer)
    if "/AcroForm" in pdf_writer._root_object:
        # Acro form is form field, set needs appearances to fix printing issues
        pdf_writer._root_object["/AcroForm"].update(
            {NameObject("/NeedAppearances"): BooleanObject(True)})

    data_dict = pdf_reader.getFields()  # this is a dict of your DB form values

    flatten_dict = ("number")

    for thisPage in range(pdf_reader.numPages):
        pdf_writer.addPage(pdf_reader.getPage(thisPage))
        page = pdf_writer.getPage(thisPage)
        for j in range(0, len(page['/Annots'])):
            writer_annot = page['/Annots'][j].getObject()
            for field in data_dict:
                if writer_annot.get('/T') == field:
                    if field in flatten_dict:
                        writer_annot.update({
                            NameObject("/Ff"):
                            NumberObject(1)  # make ReadOnly
                            ,
                            NameObject("/V"):
                            TextStringObject(number)  # update the value
                        })

    flatten_form = filename + "-flatten.pdf"
    output_stream = open(flatten_form, "wb")
    pdf_writer.write(output_stream)
    return flatten_form

Beispiel #13

0

Datei anzeigen

    def removeWordStyle(self, ignoreByteStringObject=False):
        """
        Removes imported styles from Word - Path Constructors rectangles - from this output.

        :param bool ignoreByteStringObject: optional parameter
            to ignore ByteString Objects.
        """

        pages = self.getObject(self._pages)['/Kids']
        for j in range(len(pages)):
            page = pages[j]
            pageRef = self.getObject(page)
            content = pageRef['/Contents'].getObject()

            if not isinstance(content, ContentStream):
                content = ContentStream(content, pageRef)

            _operations = []
            last_font_size = 0

            for operator_index, (operands,
                                 operator) in enumerate(content.operations):

                if operator == b_('Tf') and operands[0][:2] == '/F':
                    last_font_size = operands[1].as_numeric()

                if operator == b_('Tj'):
                    text = operands[0]
                    if ignoreByteStringObject:
                        if not isinstance(text, TextStringObject):
                            operands[0] = TextStringObject()
                elif operator == b_("'"):
                    text = operands[0]
                    if ignoreByteStringObject:
                        if not isinstance(text, TextStringObject):
                            operands[0] = TextStringObject()
                elif operator == b_('"'):
                    text = operands[2]
                    if ignoreByteStringObject:
                        if not isinstance(text, TextStringObject):
                            operands[2] = TextStringObject()
                elif operator == b_("TJ"):
                    for i in range(len(operands[0])):
                        if ignoreByteStringObject:
                            if not isinstance(operands[0][i],
                                              TextStringObject):
                                operands[0][i] = TextStringObject()

                operator_type = self._getOperatorType(operator)

                # we are ignoring all grayscale colors
                # tests showed that black underlines, borders and tables are defined by grayscale and arn't using rgb/cmyk colors
                if operator_type == 'rgb' or operator_type == 'cmyk':

                    color_target_operation_type = self._getColorTargetOperationType(
                        operator_index, content.operations)

                    new_color = None

                    # we are coloring all text in black and all rectangles in white
                    # removing all colors paints rectangles in black which gives us unwanted results
                    if color_target_operation_type == 'text':
                        new_color = 'black'
                    elif color_target_operation_type == 'rectangle':
                        new_color = 'white'

                    if new_color:
                        operands = self.colors_operands[operator_type][
                            new_color]

                # remove styled rectangles (highlights, lines, etc.)
                # the 're' operator is a Path Construction operator, creates a rectangle()
                # presumably, that's the way word embedding all of it's graphics into a PDF when creating one
                if operator == b_('re'):

                    rectangle_width = operands[-2].as_numeric()
                    rectangle_height = operands[-1].as_numeric()

                    minWidth = self.getMinimumRectangleWidth(
                        last_font_size,
                        1)  # (length of X letters at the current size)
                    maxHeight = last_font_size + 6  # range to catch really big highlights
                    minHeight = 1.5  # so that thin lines will not be removed

                    # remove only style that:
                    # it's width are bigger than the minimum
                    # it's height is smaller than maximum and larger than minimum
                    if rectangle_width > minWidth and rectangle_height > minHeight and rectangle_height <= maxHeight:
                        continue

                _operations.append((operands, operator))

            content.operations = _operations
            pageRef.__setitem__(NameObject('/Contents'), content)

Beispiel #14

0

Datei anzeigen

def write_form(lecturer, employee_dict, radio_dict, choice_dict):

    output = PdfFileWriter()
    if lecturer.job_code[0] == 1630 or lecturer.job_code[0] == 1632:
        template = PdfFileReader(open("Pre6_Data_Summary__Redacted.pdf", 'rb'))
        filename = lecturer.last_name + "." + lecturer.first_name + "_form.pdf"
    else:
        template = PdfFileReader(
            open("CL_Data_Summary_form_Redacted.pdf", 'rb'))
        filename = lecturer.last_name + "." + lecturer.first_name + "_Cont_form.pdf"
    output.cloneReaderDocumentRoot(template)
    output._root_object["/AcroForm"][NameObject(
        "/NeedAppearances")] = BooleanObject(True)

    for i in [0, 1]:

        output.updatePageFormFieldValues(template.getPage(i), employee_dict)

        page = template.getPage(i)

        #Checkboxes and drop downs:From PyPDF Library for updatePageFormFieldValues but edited for NameObject as value
        for j in range(0, len(page['/Annots'])):
            writer_annot = page['/Annots'][j].getObject()

            #dropdowns:changes "I" to index of option chosen ex: second option on list is "1"
            #"V" is the text of the field. Both V and I must be updated
            for field in choice_dict:
                #  print(lecturer.last_name, field)
                if writer_annot.get("/T") == field:

                    writer_annot.update({
                        NameObject("/I"):
                        NameObject(choice_dict[field][0]),
                        NameObject("/V"):
                        TextStringObject(choice_dict[field][1])
                    })

            #checkboxes on pre6 form are kids of a parent object.
            #accesses parent of object to get NameID
            #checkboxes on cont form are accesible by "/T" alone
            for field in radio_dict:
                if "/Parent" in writer_annot:
                    if writer_annot["/Parent"].get("/T") == field:

                        writer_annot.update({
                            NameObject("/V"):
                            NameObject(radio_dict[field]),
                            NameObject("/AS"):
                            NameObject(radio_dict[field])
                        })
                elif writer_annot.get("/T") == field:
                    writer_annot.update({
                        NameObject("/V"):
                        NameObject(radio_dict[field]),
                        NameObject("/AS"):
                        NameObject(radio_dict[field])
                    })
        if i == 0:
            # if there are two start dates and it's the first page add second set of dates to the proposed dates
            if len(lecturer.start) == 2 and (lecturer.break_service == True):
                start_end_2 = lecturer.start[1] + "-" + lecturer.end[1]
                add_comment(output, page, start_end_2,
                            [379.051, 405.913, 536.515, 424.313])
            #if they are eligible for a raise in the middle of the year add a line for a second monthly/annual(pg1 and pg2)
            if len(lecturer.annual) == 2:
                add_comment(output, page,
                            lecturer.start[1] + ": " + lecturer.annual[1],
                            [457.783, 465.165, 582.78, 483.565])
        if i == 1:
            if len(lecturer.monthly) == 2:
                add_comment(output, page,
                            lecturer.start[1] + ": " + lecturer.monthly[1],
                            [440.738, 679.446, 548.738, 697.846])

    outputStream = open(filename, "wb")
    output.write(outputStream)

Beispiel #15

0

Datei anzeigen

Datei: first.py Projekt: SethArchambault/pdf-generation-for-civilla

    class Scope:
        print("---------------------- begin export ----------------------")

        print(time.strftime('%Y-%m-%d %H:%M:%S'))
        csv_file = open("output/data.csv")
        csv_reader = csv.DictReader(csv_file, delimiter=',')
        imported_value_array = []
        for row in csv_reader:
            imported_value_array.append(row)

        print("rows %d" % len(imported_value_array))
        # :logic
        logic_fields = [
            "program_raw", "program_count", "errors", "page_count", "form",
            "row_id", "pdf_filename", "page_6_hide", "page_7_hide",
            "page_8_hide", "page_9_hide", "page_10_hide", "program_fap",
            "program_medicaid", "program_medicare", "program_cdc",
            "program_cash", "interview_date", "interview_time", "street",
            "apartment_lot", "city", "zipcode", "state", "mailing_street",
            "mailing_apartment_lot", "mailing_city", "mailing_zipcode"
        ]
        for i in range(1, 9):
            logic_fields.append("member_%d_taxes" % i)
        missing_template_fields = {}
        for ndx, values in enumerate(imported_value_array):
            if not config['generate_all_files']:
                if ndx == 15: break
            print("writing %s" % values['row_id'])
            writer = PdfFileWriter()
            fields_filled = []
            # scan pages for annot fields
            for pageNum in range(template_reader.numPages):
                # Skip pages based on program
                # :pages
                if pageNum == 6 and values["page_6_hide"] == 'True':
                    continue
                if pageNum == 7 and values["page_7_hide"] == 'True':
                    continue
                if pageNum == 8 and values["page_8_hide"] == 'True':
                    continue
                if pageNum == 9 and values["page_9_hide"] == 'True':
                    continue
                if pageNum == 10 and values["page_10_hide"] == 'True':
                    continue
                page = template_reader.getPage(pageNum)
                writer.addPage(page)
                if "/Annots" in page:
                    # writer.updatePageFormFieldValues(page, values)
                    for j in range(0, len(page['/Annots'])):
                        annot_child = page['/Annots'][j].getObject()
                        for field in values:
                            if annot_child.get('/Parent'):
                                annot_child_parent = annot_child.get(
                                    '/Parent').getObject()
                                if annot_child_parent.get('/T') == field:
                                    annot_child_parent.update({
                                        NameObject("/V"):
                                        TextStringObject(values[field]),
                                    })
                                    fields_filled.append(field)
                            if annot_child.get('/T') == field:
                                annot_child.update({
                                    NameObject("/V"):
                                    TextStringObject(values[field])
                                })
                                fields_filled.append(field)

            # check to see if everything was filled
            # some values are never in the pdf (logic only)
            print("%s" % values['pdf_filename'])
            # other values are sometimes not available to be filled
            fields_not_visible = []
            # @Todo: Testing - set some invalid data and see if it picks up on it.
            if values["page_8_hide"]:
                fields_not_visible.append("tax_check_yes")
                fields_not_visible.append("tax_check_no")
                fields_not_visible.append("tax_name")
            else:
                if values["tax_check_no"]:
                    fields_not_visible.append("tax_name")

            for k in values:
                if k not in fields_filled and k not in logic_fields and k not in fields_not_visible:
                    if k not in missing_template_fields:
                        missing_template_fields[k] = 1
                    else:
                        missing_template_fields[k] += 1
                    #print("template field not found: %s" % k)

            print("pdf_filename '%s'" % values['pdf_filename'])
            writer.write(open("output/%s" % values['pdf_filename'], "wb"))
        if len(missing_template_fields) > 0:
            print("missing template fields:")
        for field in missing_template_fields:
            print(field)

Beispiel #16

0

Datei anzeigen

def test_TextStringObject_autodetect_utf16():
    tso = TextStringObject("foo")
    tso.autodetect_utf16 = True
    assert tso.get_original_bytes() == b"\xfe\xff\x00f\x00o\x00o"

Beispiel #17

0

Datei anzeigen

def test_TextStringObject_exc():
    tso = TextStringObject("foo")
    with pytest.raises(Exception) as exc:
        tso.get_original_bytes()
    assert exc.value.args[0] == "no information about original bytes"

Beispiel #18

0

Datei anzeigen

def annotate(fp_in, annotations):
    reader = PdfFileReader(fp_in)
    pdf = PdfFileWriter()
    for page in reader.pages:
        pdf.addPage(page)

    for annotation in annotations:
        page = annotation.get('page', 0)
        try:
            pdfpage = pdf.getPage(page)
        except IndexError:
            print >> sys.stderr, 'Page %d not found in pdf, not adding annotations %r' % (
                page, annotation)
            continue

        size = pdfpage.mediaBox
        angle = int(pdfpage.get('/Rotate', 0))
        x = annotation['x']
        y = annotation['y']
        if angle == 0:
            x = float(x)
            y = size[3] - float(y) - 20
        elif angle == 90:
            x, y = float(y) - 2, float(x) - 15
        else:
            x = float(x)
            y = float(y)
            print >> sys.stderr, 'Page rotated by %d degrees not implemented yet' % (
                angle)

        color = annotation.get('color', None)
        if isinstance(color, basestring):
            if color[:1] != '#':
                print >> sys.stderr, 'Unsupported color format: %s' % (color)
                color = None
            else:
                # Assume HTML color with format "#RRGGBB".
                try:
                    color = int(color[1:], 16)
                except ValueError as e:
                    print >> sys.stderr, 'Unsupported color format: %s (%s)' % (
                        color, e)
                    color = None

        if color is not None:
            r, g, b = color >> 16, (color >> 8) & 0xff, color & 0xff
            color = (r * BYTE_TO_COLOR, g * BYTE_TO_COLOR, b * BYTE_TO_COLOR)
        else:
            color = None

        pages = pdf.getObject(pdf._pages)
        pageref = pages["/Kids"][page]

        anno = DictionaryObject()
        anno.update({
            NameObject('/Type'):
            NameObject('/Annot'),
            NameObject('/Subtype'):
            NameObject('/Text'),
            NameObject('/P'):
            pageref,
            NameObject('/Rect'):
            RectangleObject([x, y, x + 18, y + 20]),
            NameObject('/Contents'):
            TextStringObject(annotation['text']),
            NameObject('/C'):
            ArrayObject([FloatObject(x) for x in color]),
            NameObject('/Open'):
            BooleanObject(True),
        })
        author = annotation.get('author', None)
        if author:
            anno[NameObject('/T')] = TextStringObject(author)
        modified = annotation.get('modified', None)
        if modified:
            modified = time.strftime('%Y%m%d%H%M%SZ', time.gmtime(modified))
            anno[NameObject('/M')] = TextStringObject(modified)

        annoRef = pdf._addObject(anno)
        annots = pdfpage.get('/Annots', None)
        if annots is None:
            annots = pdfpage[NameObject('/Annots')] = ArrayObject([annoRef])
        else:
            annots.append(annoRef)

    fp_out = StringIO()
    pdf.write(fp_out)
    return fp_out.getvalue()

Beispiel #19

0

Datei anzeigen

Datei: handlePDF.py Projekt: cadnant/oomap

def add_geospatial_pdf_header(m, f, f2, map_bounds, poly, epsg=None, wkt=None):
    """
        Adds geospatial PDF information to the PDF file as per:
            Adobe® Supplement to the ISO 32000 PDF specification
            BaseVersion: 1.7
            ExtensionLevel: 3
            (June 2008)
        Notes:
            The epsg code or the wkt text of the projection must be provided.
            Must be called *after* the page has had .finish() called.
        """
    if not HAS_PYPDF2:
        raise RuntimeError(
            "PyPDF2 not available; PyPDF2 required to add geospatial header to PDF"
        )

    if not any((epsg, wkt)):
        raise RuntimeError(
            "EPSG or WKT required to add geospatial header to PDF")

    file_reader = PdfFileReader(f)
    file_writer = PdfFileWriter()

    # preserve OCProperties at document root if we have one
    if NameObject('/OCProperties'
                  ) in file_reader.trailer['/Root']:  #Python3-friendly
        file_writer._root_object[NameObject(
            '/OCProperties')] = file_reader.trailer['/Root'].getObject()[
                NameObject('/OCProperties')]

    for page in file_reader.pages:
        gcs = DictionaryObject()
        gcs[NameObject('/Type')] = NameObject('/PROJCS')

        if epsg:
            gcs[NameObject('/EPSG')] = NumberObject(int(epsg))
        if wkt:
            gcs[NameObject('/WKT')] = TextStringObject(wkt)

        measure = get_pdf_measure(m, gcs, poly, map_bounds)
        """
            Returns the PDF's VP array.
            The VP entry is an array of viewport dictionaries. A viewport is basiscally
            a rectangular region on the PDF page. The only required entry is the BBox which
            specifies the location of the viewport on the page.
            """
        viewport = DictionaryObject()
        viewport[NameObject('/Type')] = NameObject('/Viewport')

        bbox = ArrayObject()
        for x in (0, int(page.mediaBox[3]), int(page.mediaBox[2]), 0):  #in pts
            bbox.append(FloatObject(str(x)))  #Fixed

        viewport[NameObject('/BBox')] = bbox
        #viewport[NameObject('/Name')] = TextStringObject('OOMAP')
        viewport[NameObject('/Measure')] = measure

        vp_array = ArrayObject()
        vp_array.append(viewport)
        page[NameObject('/VP')] = vp_array
        file_writer.addPage(page)

    file_writer.write(f2)
    return (f2)

Beispiel #20

0

Datei anzeigen

Datei: pdf_helper.py Projekt: shashwatjay/chesspdftofen

def create_annotation(x, y, meta):
    color = [255.0 / 255.0, 209 / 255.0, 0]
    # link
    linkAnnotation = DictionaryObject()
    # https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf
    linkAnnotation.update({
        # Table 165 NoZoom
        NameObject("/F"):
        NumberObject(4),
        NameObject("/Type"):
        NameObject("/Annot"),
        NameObject("/Subtype"):
        NameObject("/Link"),

        # Table 164 color, annotation rectangle
        NameObject("/C"):
        ArrayObject([FloatObject(c) for c in color]),
        NameObject("/Rect"):
        ArrayObject([
            FloatObject(x),
            FloatObject(y),
            FloatObject(x + 20),
            FloatObject(y + 20)
        ]),

        # Table 173 link annotation
        NameObject('/A'):
        DictionaryObject({
            # Table 206 uri
            NameObject('/S'): NameObject('/URI'),
            NameObject('/URI'): TextStringObject(meta["contents"])
        }),
        # Table 173 invert rect when mouse
        NameObject('/H'):
        NameObject('/I'),
        # table 164 hor corner radius, vert corner radius, border width
        # dash array table 56
        NameObject('/Border'):
        ArrayObject([
            NameObject(0),
            NameObject(0),
            NameObject(5),
        ]),
    })

    commentAnnotation = DictionaryObject()
    # https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf
    commentAnnotation.update({
        # Table 165 NoZoom
        NameObject("/F"):
        NumberObject(4),
        NameObject("/Type"):
        NameObject("/Annot"),
        NameObject("/Subtype"):
        NameObject("/Text"),

        # Table 170 titlebar
        NameObject("/T"):
        TextStringObject(meta["author"]),
        NameObject("/Contents"):
        TextStringObject(meta["contents"]),

        # Table 164 color, annotation rectangle
        NameObject("/C"):
        ArrayObject([FloatObject(c) for c in color]),
        NameObject("/Rect"):
        ArrayObject([
            FloatObject(x),
            FloatObject(y),
            FloatObject(x + 5),
            FloatObject(y + 5)
        ]),

        # 12.5.6.4 text annotation
        NameObject('/Open'):
        BooleanObject(False),
        NameObject('/Name'):
        NameObject('/Comment'),
    })

    return linkAnnotation, commentAnnotation

Beispiel #21

0

Datei anzeigen

Datei: pdf_highlighter.py Projekt: dmitrySorokin/pdf_highlighter

def create_free_text(x1, y1, x2, y2, text, color=(1, 0, 0)):
    annotation = _create_annotation(x1, y1, x2, y2, color, '/FreeText')
    annotation[NameObject('/Contents')] = TextStringObject(text)
    return annotation