예제 #1
0
파일: pdf.py 프로젝트: filips123/pdf2docx
def _is_device_cs(xref, doc: fitz.Document):
    '''Check whether object xref is a device based color space.
    '''
    # cs definition
    obj_contents = doc.xrefObject(xref)

    # for now, just check /ICCBased CS:
    # it's treated as a device based cs if /Device[Gray|RGB|CMYK] exists in /Alternate.
    #
    # [ /ICCBased 15 0 R ]
    #
    # <<
    #   /Alternate /DeviceRGB
    #   /Filter /FlateDecode
    #   /Length 2597
    #   /N 3
    # >>
    if '/ICCBased' in obj_contents:
        name, x, *_ = obj_contents[1:-1].strip().split()
        ICC_contents = doc.xrefObject(int(x))
        return '/Alternate /Device' in ICC_contents

    # ignore all other color spaces, may include if facing associated cases
    return False
예제 #2
0
파일: pdf.py 프로젝트: phearun008/pdf2docx
def _check_device_cs(doc:fitz.Document, page:fitz.Page):
    '''Get all color space name used in current page and check if they're device based color space.'''
    # default device based cs
    cs = {
        '/DeviceGray': True, 
        '/DeviceRGB' : True, 
        '/DeviceCMYK': True
    }

    # content of page object, e.g.
    # <<
    # ...
    # /Resources <<
    #     ...
    #     /ColorSpace <<
    #     /Cs6 14 0 R
    #     >>
    # >>
    # /Rotate 0
    # /Type /Page
    # >>
    obj_contents = doc.xrefObject(page.xref)

    cs_found = False
    for line_ in obj_contents.splitlines():
        line = line_.strip()

        # check start/end of color space block
        if not cs_found and line.startswith('/ColorSpace'):
            cs_found = True
            continue

        if not cs_found:
            continue
        elif line=='>>':
            break

        # now within cs block, e.g. /Cs6 14 0 R
        cs_name, xref, *_ = line.split()
        cs[cs_name] = _is_device_cs(int(xref), doc)

    return cs